diff --git a/docs/solver_guide.rst b/docs/solver_guide.rst index c86e240..b6cf02b 100644 --- a/docs/solver_guide.rst +++ b/docs/solver_guide.rst @@ -131,6 +131,92 @@ Key parameters: - ``fast_measure=False``: Required for real devices. Uses ``|alpha|^2 - |beta|^2`` (Born rule) instead of the quantum-inspired ``|alpha| - |beta|`` shortcut. - ``parallel_qubits``: Packs N independent single-qubit circuits onto N qubits of one multi-qubit job, reducing QPU submissions by ~Nx. - ``shots``: Number of measurement samples per circuit. More shots = less statistical noise. +- ``initial_layout``: Controls which physical qubits receive the packed circuit (see below). Defaults to ``None`` (let the transpiler choose). + + +Qubit-calibration layout +~~~~~~~~~~~~~~~~~~~~~~~~ + +Modern IBM devices have substantial per-qubit calibration variance — on +a snapshot of ``FakeSherbrooke`` the best qubit has a readout error of +1.1% and the worst 25.7%, a 23x spread. When the transpiler maps a +packed N-qubit circuit onto physical qubits ``0..N-1`` (a common +default), several poorly-calibrated qubits can end up in the mix. Per-edge +variance dominates the QKAN fast-path output, and because ``rel MSE`` +scales as ``σ²``, a 6x higher mean noise across the selected set +compounds to ~36x higher aggregate error. + +Fix: pin the packed circuit to the best-calibrated qubits via +``initial_layout`` in ``solver_kwargs``. + +.. code-block:: python + + from qkan.solver import best_qubits + + layout = best_qubits(backend, 20) + + model = QKAN( + [1, 2, 1], solver="qiskit", fast_measure=False, + solver_kwargs={ + "backend": backend, + "shots": 1024, + "parallel_qubits": 20, + "initial_layout": layout, + }, + ) + +Alternatively, pass ``"auto"`` to let ``qiskit_solver`` compute the +layout internally from the current backend calibration: + +.. code-block:: python + + solver_kwargs={ + "backend": backend, + "shots": 1024, + "parallel_qubits": 20, + "initial_layout": "auto", + } + +``best_qubits(backend, n)`` scores each physical qubit by + +.. math:: + + \mathrm{score}(q) = \mathrm{readout\_error}(q) + + \mathrm{sx\_err}(q) + + 10^{-4} / \max(T_2(q)\,[\mu s],\, 1) + +and returns the ``n`` lowest-scoring qubit indices. Readout error +dominates the sum; sx error breaks ties; short :math:`T_2` is penalised +only slightly because QKAN's shallow single-qubit circuits aren't +T2-sensitive. + +**Empirical impact.** Smoke test on ``FakeSherbrooke`` with a trained +single-sample forecast, ``parallel_qubits=20``, ``shots=1024``: + ++-----------------------------+--------------------------+ +| Layout | rel MSE vs noiseless ref | ++=============================+==========================+ +| ``parallel_qubits=1`` | 0.134% | +| (single best qubit baseline)| | ++-----------------------------+--------------------------+ +| naive ``0..19`` | 5.218% | ++-----------------------------+--------------------------+ +| ``best_qubits(backend, 20)``| 0.127% | ++-----------------------------+--------------------------+ + +The smart layout at ``parallel_qubits=20`` fully recovers the +``parallel_qubits=1`` fidelity (a ~40x improvement over the naive +layout) at identical QPU cost. + +**Caveats and scope.** + +- Real-backend calibration drifts over time; re-querying + ``best_qubits`` before each submission is cheap and recommended. +- The helper assumes independent single-qubit circuits (the QKAN + ``parallel_qubits`` packing pattern). If you add 2-qubit gates, you + also need connectivity-aware routing. +- Returning ``[]`` when ``backend.properties()`` is unavailable lets + callers keep ``initial_layout=None`` fallback semantics. Error Mitigation diff --git a/src/qkan/solver/__init__.py b/src/qkan/solver/__init__.py index 8e5ae96..40cb596 100644 --- a/src/qkan/solver/__init__.py +++ b/src/qkan/solver/__init__.py @@ -31,7 +31,7 @@ from .cutile import _CUTILE_AVAILABLE, cutile_flash_exact_solver from .cutn import cutn_solver from .flash import _FLASH_AVAILABLE, flash_exact_solver -from .qiskit_solver import qiskit_solver +from .qiskit_solver import best_qubits, qiskit_solver from .qml import qml_solver from .torch_exact import torch_exact_solver @@ -39,6 +39,7 @@ "_CUTE_AVAILABLE", "_CUTILE_AVAILABLE", "_FLASH_AVAILABLE", + "best_qubits", "cudaq_solver", "cute_exact_solver", "cutile_flash_exact_solver", diff --git a/src/qkan/solver/qiskit_solver.py b/src/qkan/solver/qiskit_solver.py index 86d3b49..6e7b716 100644 --- a/src/qkan/solver/qiskit_solver.py +++ b/src/qkan/solver/qiskit_solver.py @@ -384,6 +384,101 @@ def _submit_and_collect(est, all_pubs, all_chunk_sizes, max_pubs): _MAX_PUBS_CACHE: dict = {} +def best_qubits(backend, n: int) -> list: + """Return the ``n`` best-calibrated qubit indices on ``backend``. + + When packing ``n`` independent single-qubit QKAN circuits onto ``n`` + physical qubits of one multi-qubit job, the naive transpiler layout + (qubits ``0..n-1``) often includes poorly-calibrated qubits. Because + per-edge noise dominates the fast-programmer's output, and + ``rel MSE ∝ σ²``, a 6× higher mean readout error across the selected + set can compound to ~36× higher aggregate rel MSE vs. the best-qubit + serial baseline. + + This helper scores each physical qubit by + + .. math:: + + \\mathrm{score}(q) = \\mathrm{readout\\_error}(q) + + \\mathrm{sx\\_err}(q) + + 10^{-4} / \\max(T_2(q)\\,[\\mu s],\\, 1) + + (readout error dominates, :math:`sx` error is secondary, short + :math:`T_2` gets a small penalty) and returns the indices of the + ``n`` lowest-scoring qubits. Pass the result as ``initial_layout`` + via ``solver_kwargs`` to pin the packed circuit onto them. + + Empirically on ``FakeSherbrooke`` with ``parallel_qubits=20``, + ``shots=1024``: the smart layout recovers near-``parallel_qubits=1`` + fidelity (≈0.13% rel MSE vs flash) where the naive layout lands at + ≈5% rel MSE — a ~40× improvement at identical QPU cost. + + Parameters + ---------- + backend : qiskit Backend + Backend with a ``properties()`` method (FakeProvider or real IBM). + Returns an empty list if the backend exposes no calibration. + n : int + Number of qubits to select. Must not exceed ``backend.num_qubits``. + + Returns + ------- + list[int] + Top-``n`` physical qubit indices, sorted by ascending score. + + Examples + -------- + >>> from qiskit_ibm_runtime.fake_provider import FakeSherbrooke + >>> backend = FakeSherbrooke() + >>> layout = best_qubits(backend, 20) + >>> model = QKAN( + ... [1, 2, 1], solver="qiskit", fast_measure=False, + ... solver_kwargs={ + ... "backend": backend, + ... "shots": 1024, + ... "parallel_qubits": 20, + ... "initial_layout": layout, + ... }, + ... ) + + See Also + -------- + `initial_layout`, `parallel_qubits` in ``solver_kwargs``. + """ + try: + props = backend.properties() + except Exception: + return [] + if props is None: + return [] + try: + num_qubits = backend.num_qubits + except Exception: + return [] + if n > num_qubits: + raise ValueError( + f"best_qubits: requested n={n} exceeds backend.num_qubits={num_qubits}" + ) + scored = [] + for q in range(num_qubits): + try: + ro = float(props.readout_error(q)) + except Exception: + ro = 0.5 + try: + sx = float(props.gate_error("sx", [q])) + except Exception: + sx = 1e-2 + try: + t2_us = float(props.t2(q)) * 1e6 + except Exception: + t2_us = 50.0 + score = ro + sx + 1e-4 / max(t2_us, 1.0) + scored.append((score, q)) + scored.sort() + return [q for _score, q in scored[:n]] + + def _qiskit_run_parallel( circuits, n_qubits, @@ -391,6 +486,7 @@ def _qiskit_run_parallel( backend, optimization_level, shots, + initial_layout=None, max_pubs_per_job=0, resilience_level=None, twirling=None, @@ -406,6 +502,11 @@ def _qiskit_run_parallel( - If `max_pubs_per_job` == 0 (default), starts with all PUBs in one job. - On memory error (6073), automatically halves and retries. - The discovered working batch size is cached per backend. + + When `initial_layout` is a list of `n_qubits` physical qubit indices, + the packed circuit is pinned to those qubits during transpilation. + This is how :func:`best_qubits` gets applied — qkan does not auto- + select qubits unless the caller explicitly requests it. """ total = len(circuits) @@ -431,7 +532,9 @@ def _qiskit_run_parallel( elif backend is not None: pm = generate_preset_pass_manager( - backend=backend, optimization_level=optimization_level + backend=backend, + optimization_level=optimization_level, + initial_layout=initial_layout, ) rt_estimator = Estimator(mode=backend) _configure_estimator(rt_estimator, shots, resilience_level, twirling) @@ -485,6 +588,7 @@ def _qiskit_evaluate( shots = config["shots"] optimization_level = config.get("optimization_level", 1) parallel_qubits = config.get("parallel_qubits", None) + initial_layout = config.get("initial_layout", None) # Broadcast theta/preacts to (out_dim, in_dim, ...) if len(theta.shape) != 4: @@ -562,7 +666,9 @@ def _qiskit_evaluate( and not (parallel_qubits and parallel_qubits > 1) ): _pm = generate_preset_pass_manager( - backend=backend, optimization_level=optimization_level + backend=backend, + optimization_level=optimization_level, + initial_layout=initial_layout, ) _rt_est = Estimator(mode=backend) _configure_estimator(_rt_est, shots, _rl, _tw) @@ -581,6 +687,7 @@ def _run_qiskit(scale_factor=1): backend, optimization_level, shots, + initial_layout=initial_layout, max_pubs_per_job=max_pubs, resilience_level=_rl, twirling=_tw, @@ -688,6 +795,23 @@ def qiskit_solver( if parallel_qubits == "auto" and backend is not None: parallel_qubits = backend.num_qubits + # Resolve initial_layout: + # None (default) -> let the transpiler choose + # "auto" -> top-N best-calibrated qubits via best_qubits() + # list[int] -> user-supplied physical qubit indices (used as-is) + initial_layout = kwargs.get("initial_layout", None) + if initial_layout == "auto": + if backend is None: + raise ValueError( + "initial_layout='auto' requires a backend with properties() " + "to score qubit calibration." + ) + n_layout = parallel_qubits if (parallel_qubits and parallel_qubits > 1) else 1 + initial_layout = best_qubits(backend, n_layout) + if not initial_layout: + # No calibration available — fall back to transpiler default. + initial_layout = None + max_pubs_per_job = kwargs.get("max_pubs_per_job", 0) config = { @@ -699,6 +823,7 @@ def qiskit_solver( "shots": shots, "optimization_level": optimization_level, "parallel_qubits": parallel_qubits, + "initial_layout": initial_layout, "max_pubs_per_job": max_pubs_per_job, "resilience_level": kwargs.get("resilience_level", None), "twirling": kwargs.get("twirling", None),