Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions include/cudaq/Optimizer/CodeGen/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ def QIRToQIRProfile : Pass<"convert-to-qir-profile"> {
let constructor = "cudaq::opt::createQIRToQIRProfilePass(\"qir-base\")";
}

def QirInsertArrayRecord : Pass<"qir-insert-array-record", "mlir::ModuleOp"> {
let summary = "Analyze instruction patterns and insert new instructions for QIR";
let description = [{
This pass performs analysis on the instruction patterns between QIR prep and
conversion passes, then inserts array recording instruction based on the analysis.
}];
let dependentDialects = ["quake::QuakeDialect", "cudaq::cc::CCDialect",
"mlir::LLVM::LLVMDialect"];
}

def RemoveMeasurements : Pass<"remove-measurements"> {
let summary =
"Remove measurements and output recording calls from a QIR program";
Expand Down
1 change: 1 addition & 0 deletions lib/Optimizer/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_cudaq_library(OptCodeGen
OptUtils.cpp
Passes.cpp
Pipelines.cpp
QirInsertArrayRecord.cpp
QuakeToCodegen.cpp
QuakeToExecMgr.cpp
QuakeToLLVM.cpp
Expand Down
1 change: 1 addition & 0 deletions lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2326,6 +2326,7 @@ void cudaq::opt::addConvertToQIRAPIPipeline(OpPassManager &pm, StringRef api,
QuakeToQIRAPIPrepOptions prepApiOpt{.api = api.str(), .opaquePtr = opaquePtr};
pm.addPass(cudaq::opt::createQuakeToQIRAPIPrep(prepApiOpt));
pm.addPass(cudaq::opt::createLowerToCG());
pm.addPass(cudaq::opt::createQirInsertArrayRecord());
QuakeToQIRAPIOptions apiOpt{.api = api.str(), .opaquePtr = opaquePtr};
pm.addPass(cudaq::opt::createQuakeToQIRAPI(apiOpt));
pm.addPass(createCanonicalizerPass());
Expand Down
213 changes: 213 additions & 0 deletions lib/Optimizer/CodeGen/QirInsertArrayRecord.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*******************************************************************************
* Copyright (c) 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/

#include "PassDetails.h"
#include "cudaq/Optimizer/Builder/Intrinsics.h"
#include "cudaq/Optimizer/Builder/Runtime.h"
#include "cudaq/Optimizer/CodeGen/Passes.h"
#include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h"
#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h"
#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
#include "llvm/ADT/SmallSet.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"

namespace cudaq::opt {
#define GEN_PASS_DEF_QIRINSERTARRAYRECORD
#include "cudaq/Optimizer/CodeGen/Passes.h.inc"
} // namespace cudaq::opt

#define DEBUG_TYPE "qir-insert-array-record"

using namespace mlir;

namespace {

// Trace a pointer to back to its corresponding `AllocaOp`
static cudaq::cc::AllocaOp tracePointerToAlloca(Value ptr) {
llvm::DenseSet<Value> visited;
while (ptr) {
if (!visited.insert(ptr).second)
return {};
Operation *defOp = ptr.getDefiningOp();
if (!defOp)
return {};
if (auto allocaOp = dyn_cast<cudaq::cc::AllocaOp>(defOp))
return allocaOp;
if (auto castOp = dyn_cast<cudaq::cc::CastOp>(defOp)) {
ptr = castOp.getValue();
continue;
}
if (auto computePtrOp = dyn_cast<cudaq::cc::ComputePtrOp>(defOp)) {
ptr = computePtrOp.getBase();
continue;
}
return {};
}
return {};
}

// Walk a function to identify all the measure-discriminate-store patterns and
// collect the associated `AllocaOp` when the measurement results are stored.
// Collect only unique AllocaOps - since each may correspond to multiple
// measurement operations. When there are no explicit stores, track the first
// measurement operation and the get the total number of measurements.
struct AllocaMeasureStoreAnalysis {
AllocaMeasureStoreAnalysis() = default;

explicit AllocaMeasureStoreAnalysis(func::FuncOp funcOp) {
size_t totalMeasurementCount = 0;
Operation *firstMeasureOp = nullptr;
DenseMap<Value, Operation *> valueToMeasurement;
llvm::SetVector<cudaq::cc::AllocaOp> uniqueAllocaOps;

// First pass: identify measurements and propagate through uses
funcOp.walk([&](Operation *op) {
if (op->hasTrait<cudaq::QuantumMeasure>()) {
if (op->hasAttr(cudaq::opt::ResultIndexAttrName)) {
totalMeasurementCount++;
if (!firstMeasureOp)
firstMeasureOp = op;
}
for (auto result : op->getResults())
valueToMeasurement[result] = op;
return WalkResult::advance();
}

// TODO: Check if more operations need to be added here.
if (!isa<quake::DiscriminateOp, cudaq::cc::CastOp>(op)) {
return WalkResult::advance();
}

// Find the operands derived from measurements
for (auto operand : op->getOperands()) {
if (valueToMeasurement.count(operand)) {
for (auto result : op->getResults())
valueToMeasurement[result] = valueToMeasurement[operand];
}
break; // Checking one operand is enough
}
return WalkResult::advance();
});

// Second pass: find stores of measurement values and trace to `alloca` ops
funcOp.walk([&](cudaq::cc::StoreOp storeOp) {
if (valueToMeasurement.count(storeOp.getValue())) {
Value ptr = storeOp.getPtrvalue();
auto allocaOp = tracePointerToAlloca(ptr);
if (allocaOp)
uniqueAllocaOps.insert(allocaOp);
}
});

if (!uniqueAllocaOps.empty()) {
// Use array sizes when explicit storage exists
for (auto allocaOp : uniqueAllocaOps) {
if (auto arrType =
allocaOp.getElementType().dyn_cast<cudaq::cc::ArrayType>()) {
arraySize += arrType.getSize();
} else {
arraySize += 1;
}
}
allocaOps.append(uniqueAllocaOps.begin(), uniqueAllocaOps.end());
} else if (totalMeasurementCount > 0) {
// This could be individual qubit(s)
arraySize = totalMeasurementCount;
firstMeasurementOp = firstMeasureOp;
}
}

SmallVector<cudaq::cc::AllocaOp> allocaOps;
size_t arraySize = 0;
Operation *firstMeasurementOp = nullptr;
};

// Inserts a QIR array record output call to declare measurement result storage.
// QIR requires `__quantum__rt__array_record_output()` be called before multiple
// measurements to declare the output array size and type label. This is
// required in `sample` API since it always returns a vector of measurement
// results. Following logic is used to determine the insertion point:
// 1. After first alloca (if explicit array storage exists)
// 2. Before first measurement (if no explicit storage)
// The label string is created as "array<i1 x N>" where N is the total number of
// measurement results. The array record output call is created as:
// `__quantum__rt__array_record_output(N, label);`
LogicalResult
insertArrayRecordingCalls(func::FuncOp funcOp, size_t resultCount,
const SmallVector<cudaq::cc::AllocaOp> &allocaOps,
Operation *firstMeasureOp) {
if (resultCount == 0)
return success();

auto ctx = funcOp.getContext();
OpBuilder builder(ctx);
mlir::Location loc = funcOp.getLoc();
// We insert only one array record call
if (!allocaOps.empty())
builder.setInsertionPointAfter(allocaOps[0]);
else if (firstMeasureOp)
builder.setInsertionPoint(firstMeasureOp);
else
return failure();

// Create the label string: "array<i1 x N>"
std::string labelStr = "array<i1 x " + std::to_string(resultCount) + ">";
auto strLitTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(
builder.getContext(), builder.getI8Type(), labelStr.size() + 1));
Value lit = builder.create<cudaq::cc::CreateStringLiteralOp>(
loc, strLitTy, builder.getStringAttr(labelStr));
auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type());
Value label = builder.create<cudaq::cc::CastOp>(loc, i8PtrTy, lit);
Value size = builder.create<arith::ConstantIntOp>(loc, resultCount, 64);
builder.create<func::CallOp>(loc, TypeRange{},
cudaq::opt::QIRArrayRecordOutput,
ArrayRef<Value>{size, label});

// Add the declaration to the module if it doesn't already exist
auto module = funcOp->getParentOfType<ModuleOp>();
if (!module.lookupSymbol(cudaq::opt::QIRArrayRecordOutput)) {
auto irBuilder = cudaq::IRBuilder::atBlockEnd(module.getBody());
if (failed(irBuilder.loadIntrinsic(module,
cudaq::opt::QIRArrayRecordOutput))) {
return failure();
}
}
return success();
}

struct QirInsertArrayRecordPass
: public cudaq::opt::impl::QirInsertArrayRecordBase<
QirInsertArrayRecordPass> {

using QirInsertArrayRecordBase::QirInsertArrayRecordBase;

void runOnOperation() override {
ModuleOp module = getOperation();
for (auto funcOp : module.getOps<func::FuncOp>()) {
if (!funcOp || funcOp.empty() ||
!funcOp->hasAttr(cudaq::entryPointAttrName) ||
funcOp->hasAttr(cudaq::runtime::enableCudaqRun))
continue;

AllocaMeasureStoreAnalysis analysis(funcOp);
if (analysis.arraySize == 0)
continue;

LLVM_DEBUG(llvm::dbgs() << "Before adding array recording call:\n"
<< *funcOp);
if (failed(insertArrayRecordingCalls(funcOp, analysis.arraySize,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should use the intrinsic loading instead of rolling your own. Aren't these already loaded by the QIR API Prep pass, though?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to make this pass self-contained, since we cannot guarantee that the prep pass has run before this one.
Should I move the loadIntrinsic call from line#176 here?

analysis.allocaOps,
analysis.firstMeasurementOp)))
return signalPassFailure();
LLVM_DEBUG(llvm::dbgs() << "After adding array recording call:\n"
<< *funcOp);
}
}
};
} // namespace
11 changes: 8 additions & 3 deletions runtime/common/RecordLogParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,14 @@ void cudaq::RecordLogParser::handleOutput(
(containerMeta.m_type == ContainerType::ARRAY &&
containerMeta.elementCount == 0);
if (isUninitializedContainer) {
// Currently, our QIR for sampled kernel only has a sequence of RESULT
// records, not wrapped in an ARRAY. Hence, we treat it as an array of
// results.
// NOTE: This is a temporary workaround until all backends consistently
// use the new transformation pass that wraps result records inside an
// array record output. For now, we permit "naked" RESULT records, i.e.,
// if the QIR produced by a sampled kernel emits a sequence of RESULT
// records without enclosing them in an ARRAY, we interpret them
// collectively as an array of results.
// NOTE: This assumption prevents us from correctly supporting `run` with
// `qir-base` profile.
containerMeta.m_type = ContainerType::ARRAY;
containerMeta.elementCount =
std::stoul(metadata[ResultCountMetadataName]);
Expand Down
Loading
Loading