diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp
index ee7202db46f..4002e57397f 100644
--- a/runtime/cudaq/builder/kernel_builder.cpp
+++ b/runtime/cudaq/builder/kernel_builder.cpp
@@ -17,6 +17,7 @@
 #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h"
 #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
 #include "cudaq/Optimizer/Transforms/Passes.h"
+#include "cudaq/platform/nvqpp_interface.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
@@ -30,16 +31,10 @@
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
 #include "mlir/Transforms/Passes.h"
-
 #include <numeric>
 
 using namespace mlir;
 
-extern "C" {
-void altLaunchKernel(const char *kernelName, void (*kernelFunc)(void *),
-                     void *kernelArgs, std::uint64_t argsSize);
-}
-
 namespace cudaq::details {
 
 /// @brief Track unique measurement register names.
@@ -1105,9 +1100,20 @@ void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
   }
 
   // Invoke and free the args memory.
-  auto thunk = reinterpret_cast<void (*)(void *)>(*thunkPtr);
+  auto thunk = reinterpret_cast<KernelThunkType>(*thunkPtr);
+
+  //  Extract the result offset, which we named.
+  auto roName = properName + ".returnOffset";
+  auto roPtr = jit->lookup(roName);
+  if (!roPtr)
+    throw std::runtime_error(
+        "cudaq::builder failed to get result offset function");
+
+  // Invoke and free the args memory.
+  auto resultOffset = reinterpret_cast<std::uint64_t>(*roPtr);
 
-  altLaunchKernel(properName.data(), thunk, rawArgs, size);
+  [[maybe_unused]] auto uncheckedResult =
+      altLaunchKernel(properName.data(), thunk, rawArgs, size, resultOffset);
   std::free(rawArgs);
   // TODO: any return values are dropped on the floor here.
 }
diff --git a/runtime/cudaq/platform/nvqpp_interface.h b/runtime/cudaq/platform/nvqpp_interface.h
new file mode 100644
index 00000000000..d7ed15dda9d
--- /dev/null
+++ b/runtime/cudaq/platform/nvqpp_interface.h
@@ -0,0 +1,40 @@
+/****************************************************************-*- C++ -*-****
+ * Copyright (c) 2025 NVIDIA Corporation & Affiliates.                         *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#pragma once
+
+#include "common/ThunkInterface.h"
+#include <cstdint>
+#include <vector>
+
+namespace cudaq {
+
+/// Entry point for the auto-generated kernel execution path. TODO: Needs to be
+/// tied to the quantum platform instance somehow. Note that the compiler cannot
+/// provide that information.
+extern "C" {
+// Client-server (legacy) interface.
+[[nodiscard]] KernelThunkResultType
+altLaunchKernel(const char *kernelName, KernelThunkType kernel, void *args,
+                std::uint64_t argsSize, std::uint64_t resultOffset);
+
+// Streamlined interface for launching kernels. Argument synthesis and JIT
+// compilation *must* happen on the local machine.
+[[nodiscard]] KernelThunkResultType
+streamlinedLaunchKernel(const char *kernelName,
+                        const std::vector<void *> &rawArgs);
+
+// Hybrid of the client-server and streamlined approaches. Letting JIT
+// compilation happen either early or late and can handle return values from
+// each kernel launch.
+[[nodiscard]] KernelThunkResultType
+hybridLaunchKernel(const char *kernelName, KernelThunkType kernel, void *args,
+                   std::uint64_t argsSize, std::uint64_t resultOffset,
+                   const std::vector<void *> &rawArgs);
+} // extern "C"
+} // namespace cudaq
diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h
index 3bfa43ac152..c52e7065ec7 100644
--- a/runtime/cudaq/platform/quantum_platform.h
+++ b/runtime/cudaq/platform/quantum_platform.h
@@ -15,6 +15,7 @@
 #include "common/ThunkInterface.h"
 #include "cudaq/remote_capabilities.h"
 #include "cudaq/utils/cudaq_utils.h"
+#include "nvqpp_interface.h"
 #include <cstring>
 #include <cxxabi.h>
 #include <functional>
@@ -22,7 +23,6 @@
 #include <memory>
 #include <optional>
 #include <string>
-#include <vector>
 
 namespace cudaq {