diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml
new file mode 100644
index 0000000..5d2fdeb
--- /dev/null
+++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml
@@ -0,0 +1,32 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_gsm8k_fetch
+version: 0.0.1pre1
+display_name: JSONL GSM8K Fetcher
+type: command
+description: Fetches the GSM8K dataset, and formats into JSONL
+is_deterministic: true
+
+inputs:
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output datasets
+
+outputs:
+  output_dataset:
+    type: uri_folder
+    description: |
+      Folder which will contain 'train.jsonl' and 'test.jsonl'
+
+code: ./src/
+
+command: >-
+  python ./jsonl_gsm8k_fetch.py
+  --output_encoding ${{ inputs.output_encoding }}
+  --output_dataset ${{ outputs.output_dataset }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file
diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml
new file mode 100644
index 0000000..c451731
--- /dev/null
+++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml
@@ -0,0 +1,90 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_guidance_mistral7b
+version: 0.0.1pre1
+display_name: JSONL Guidance Mistral7B
+type: command
+description: Runs a supplied Guidance program on every line of a JSONL file via Mistral7B
+is_deterministic: false
+
+inputs:
+  guidance_program:
+    type: uri_file
+    optional: false
+    description: Python file containing the guidance program
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  common_dataset:
+    type: uri_file
+    optional: true
+    description: Dataset containing data to be shared with all rows in input
+  common_encoding:
+    type: string
+    optional: true
+    default: utf-8-sig
+    description: Encoding format of the common dataset
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+  error_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the error dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: JSONL file
+  error_dataset:
+    type: uri_file
+    description: JSONL file containing failed lines
+
+code: ./src/
+
+command: |
+  # Install Rust toolchain
+  #apt update
+  #apt upgrade -y
+  #apt install -y rustc build-essential
+  #pip install setup-rust
+  # Download the zip
+  wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip
+  echo
+  ls
+  echo
+  # Unzip
+  unzip ./main.zip
+  echo
+  ls -p
+  echo
+  # Install from download
+  pip install --upgrade ./guidance-main/
+  echo
+  # Install LlamaCpp
+  CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58"
+  echo
+  # Run the script
+  python ./jsonl_guidance_mistral7b.py \
+    --guidance_program ${{ inputs.guidance_program }} \
+    --input_dataset ${{ inputs.input_dataset }} \
+    --input_encoding ${{ inputs.input_encoding }} \
+    $[[--common_dataset ${{ inputs.common_dataset }} ]] \
+    $[[--common_encoding ${{ inputs.common_encoding }} ]] \
+    --output_dataset ${{ outputs.output_dataset }} \
+    --output_encoding ${{ inputs.output_encoding }} \
+    --error_dataset ${{ outputs.error_dataset }} \
+    --error_encoding ${{ inputs.error_encoding }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:guidance_phi2_env@latest
\ No newline at end of file
diff --git a/azureml/components/jsonl_sample_lines_component.yaml b/azureml/components/jsonl_sample_lines_component.yaml
new file mode 100644
index 0000000..5c23a68
--- /dev/null
+++ b/azureml/components/jsonl_sample_lines_component.yaml
@@ -0,0 +1,52 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_sample_lines
+display_name: 'JSONL Sample Lines'
+type: command
+description: |
+  Samples lines (without replacement) from a JSONL file
+is_deterministic: true
+
+inputs:
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  n_samples:
+    type: integer
+    optional: false
+    description: Number of samples required
+  random_seed:
+    type: integer
+    optional: false
+    description: Seed for Pythons PRNG
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: Dataset containing sampled JSONL
+
+code: ./src
+
+command: >-
+  python ./jsonl_sample_lines.py
+  --input_dataset ${{ inputs.input_dataset }}
+  --input_encoding ${{ inputs.input_encoding }}
+  --n_samples ${{ inputs.n_samples }}
+  --random_seed ${{ inputs.random_seed }}
+  --output_dataset ${{ outputs.output_dataset }}
+  --output_encoding ${{ inputs.output_encoding }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file
diff --git a/azureml/components/jsonl_score_numeric_component.yaml b/azureml/components/jsonl_score_numeric_component.yaml
new file mode 100644
index 0000000..6098006
--- /dev/null
+++ b/azureml/components/jsonl_score_numeric_component.yaml
@@ -0,0 +1,56 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_score_numeric
+version: 0.0.1pre1
+display_name: JSONL Numeric Scorer
+type: command
+description: |
+  Takes a JSONL file of numeric questions and correct answers and responses
+  from a model, and produces the overall score.
+  Results are stored in JSON
+is_deterministic: true
+
+inputs:
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  correct_key:
+    type: string
+    optional: false
+    description: Which key contains the correct answer
+  response_key:
+    type: string
+    optional: false
+    description: Which key contains the answer produced by the model
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: JSON file containing score summary
+
+
+code: ./src/
+
+command: >-
+  python ./jsonl_score_numeric.py
+  --input_dataset ${{ inputs.input_dataset }}
+  --input_encoding ${{ inputs.input_encoding }}
+  --output_dataset ${{ outputs.output_dataset }}
+  --output_encoding ${{ inputs.output_encoding }}
+  --correct_key ${{ inputs.correct_key }}
+  --response_key ${{ inputs.response_key }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file
diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py
new file mode 100644
index 0000000..01cb15d
--- /dev/null
+++ b/azureml/components/src/jsonl_gsm8k_fetch.py
@@ -0,0 +1,96 @@
+import argparse
+import json
+import pathlib
+import re
+
+from typing import Any, Dict
+
+import requests
+
+
+from aether_utils.jsonl_file_utils import JSONLWriter, JSONLReader
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+BASE_DATA_URL = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/"
+
+SPLITS = ["train", "test"]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the ports
+    ports_group = parser.add_argument_group("Ports")
+    ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--output_encoding", type=str, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def extract_thought_parts(thought: str) -> Dict[str, Any]:
+    thought_re = r"(.*)<<(.*=.*)>>(.*)"
+    match = re.match(thought_re, thought)
+
+    result = dict()
+    if match:
+        result["step"] = match.group(1)
+        result["calculation"] = match.group(2)
+        result["result"] = match.group(3)
+    else:
+        result["step"] = thought
+    return result
+
+
+def process_line(item: Dict[str, Any]) -> Dict[str, Any]:
+    result = dict()
+    _logger.debug(f"Processing {item}")
+
+    result["question"] = item["question"]
+
+    # The answer embeds a chain of thought and the
+    # numeric result
+    split_answer = item["answer"].split("####")
+
+    result["thoughts"] = []
+    for thought in split_answer[0].splitlines():
+        result["thoughts"].append(extract_thought_parts(thought))
+
+    # The following is not how you're supposed to handle
+    # numbers with thousand separators.
+    # This is a work around, pending three-way negotiations
+    # with locale.atof() and the AzureML compute nodes
+    result["answer"] = float(split_answer[1].replace(",", ""))
+
+    return result
+
+
+def main():
+    args = parse_args()
+
+    for split in SPLITS:
+        _logger.info(f"Starting split {split}")
+        line_count = 0
+        target_url = f"{BASE_DATA_URL}{split}.jsonl"
+
+        _logger.info(f"Fetching {target_url}")
+        response = requests.get(target_url)
+        assert response.status_code == 200, f"Got response {response}"
+
+        with JSONLWriter(
+            args.output_dataset / f"{split}.jsonl", args.output_encoding
+        ) as jlw:
+            for line in response.text.splitlines():
+                nxt_item = json.loads(line)
+                output_item = process_line(nxt_item)
+                jlw.write_line(output_item)
+                line_count += 1
+        _logger.info(f"Completed split {split} ({line_count} lines)")
+
+    _logger.info("Complete")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
new file mode 100644
index 0000000..7fa5063
--- /dev/null
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -0,0 +1,132 @@
+import argparse
+import importlib.util
+import json
+import pathlib
+import time
+
+from typing import Any, Callable, Dict
+
+import guidance
+
+from huggingface_hub import hf_hub_download
+
+import mlflow
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from aether_utils.jsonl_utils import line_map
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+
+_logger = get_standard_logger_for_file(__file__)
+
+USER_MODULE = "user_module"
+GUIDANCE_FUNCTION = "guidance_generation"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the datasets
+    datasets_group = parser.add_argument_group("Datasets")
+    datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--input_encoding", type=str, required=True)
+    datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--output_encoding", type=str, required=True)
+    datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--error_encoding", type=str, required=True)
+    datasets_group.add_argument(
+        "--common_dataset", type=pathlib.Path, required=False, default=None
+    )
+    datasets_group.add_argument("--common_encoding", type=str, required=False)
+
+    # Information about the guidance program
+    parser.add_argument("--guidance_program", type=pathlib.Path, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+class LLMProcessor:
+    def __init__(
+        self,
+        program_path,
+        model: guidance.models.Model,
+        common_data: dict[str, any] | None,
+    ):
+        self._program_path = program_path
+        self._model = model
+        self._guidance_function = self._get_guidance_function()
+        self._common_data = common_data
+        self._step = 0
+
+    def __call__(self, item: Dict[str, Any]) -> dict[str, any]:
+        _logger.debug(f"__call__: {item}")
+        start = time.time()
+        result = self._guidance_function(self._model, item, common=self._common_data)
+        stop = time.time()
+        mlflow.log_metric("time_taken", value=stop - start, step=self._step)
+        _logger.debug(f"Checking keys")
+        for k in result.keys():
+            assert k not in item, f"Duplicate key: {k}"
+
+        _logger.debug(f"Updating item")
+        item.update(**result)
+        self._step += 1
+
+        return item
+
+    def _get_guidance_function(
+        self,
+    ) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
+        _logger.debug("Importing guidance file")
+        spec = importlib.util.spec_from_file_location(USER_MODULE, self._program_path)
+        module_definition = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module_definition)
+
+        guidance_func = getattr(module_definition, GUIDANCE_FUNCTION)
+
+        return guidance_func
+
+
+def main():
+    args = parse_args()
+
+    # Load the common data (if required)
+    common_data = None
+    if args.common_dataset is not None:
+        _logger.info("Loading common dataset")
+        with open(args.common_dataset, "r", encoding=args.common_encoding) as jf:
+            common_data = json.load(jf)
+    else:
+        _logger.info("No common dataset present")
+
+    repo_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+    filename = "mistral-7b-instruct-v0.2.Q8_0.gguf"
+    downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename)
+
+    guidance_model = guidance.models.LlamaCpp(
+        downloaded_file, verbose=True, n_gpu_layers=-1, n_ctx=4096
+    )
+    # _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
+
+    processor = LLMProcessor(
+        program_path=args.guidance_program,
+        model=guidance_model,
+        common_data=common_data,
+    )
+
+    _logger.info("Starting to process input")
+    s, f = line_map(
+        map_func=processor,
+        source_file=args.input_dataset,
+        dest_file=args.output_dataset,
+        source_encoding=args.input_encoding,
+        dest_encoding=args.output_encoding,
+    )
+
+    _logger.info(f"Complete with {s} successes and {f} failures")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/azureml/components/src/jsonl_sample_lines.py b/azureml/components/src/jsonl_sample_lines.py
new file mode 100644
index 0000000..224c877
--- /dev/null
+++ b/azureml/components/src/jsonl_sample_lines.py
@@ -0,0 +1,52 @@
+import argparse
+import pathlib
+import random
+
+from typing import Any, Dict, List
+
+from aether_utils.jsonl_file_utils import load_jsonl, save_jsonl
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the datasets
+    datasets_group = parser.add_argument_group("Datasets")
+    datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--input_encoding", type=str, required=True)
+    datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--output_encoding", type=str, required=True)
+
+    # Information about the sampling
+    sampling_group = parser.add_argument_group("Sampling")
+    sampling_group.add_argument("--n_samples", type=int, required=True)
+    sampling_group.add_argument("--random_seed", type=int, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    _logger.info("Loading input")
+    all_data = load_jsonl(args.input_dataset, args.input_encoding)
+    _logger.info(f"Loaded {len(all_data)} items")
+
+    random.seed(args.random_seed)
+    sampled_data = random.sample(all_data, k=args.n_samples)
+
+    _logger.info("Saving output")
+    save_jsonl(
+        file_path=args.output_dataset,
+        data=sampled_data,
+        destination_encoding=args.output_encoding,
+    )
+    _logger.info("Done")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py
new file mode 100644
index 0000000..c5dd211
--- /dev/null
+++ b/azureml/components/src/jsonl_score_numeric.py
@@ -0,0 +1,87 @@
+import argparse
+import functools
+import json
+import pathlib
+
+from typing import Any
+
+import mlflow
+
+from aether_utils.jsonl_utils import line_reduce
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+class Scorer:
+    def __init__(self, correct_key: str, response_key: str):
+        self.y_true = []
+        self.y_pred = []
+        self.dataset = []
+        self.subject = []
+        self.correct_key = correct_key
+        self.response_key = response_key
+
+    def __call__(self, line: dict[str, Any]):
+        correct_answer = line[self.correct_key]
+        response_answer = line[self.response_key]
+        self.y_true.append(correct_answer)
+        self.y_pred.append(response_answer)
+
+    def generate_summary(self) -> dict[str, Any]:
+        result = dict()
+
+        result["n_answers"] = len(self.y_true)
+        n_correct = 0
+        for y_t, y_p in zip(self.y_true, self.y_pred):
+            if y_t == y_p:
+                n_correct += 1
+        result["n_correct"] = n_correct
+        result["accuracy"] = 0
+        if len(self.y_true) > 0:
+            result["accuracy"] = float(n_correct) / len(self.y_true)
+
+        return result
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the ports
+    ports_group = parser.add_argument_group("Ports")
+    ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--input_encoding", type=str, required=True)
+    ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--output_encoding", type=str, required=True)
+
+    # Information about the keys
+    keys_group = parser.add_argument_group("Keys")
+    keys_group.add_argument("--correct_key", type=str, required=True)
+    keys_group.add_argument("--response_key", type=str, required=True)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key)
+    line_reduce(
+        reducer=scorer,
+        source_file=args.input_dataset,
+        source_encoding=args.input_encoding,
+    )
+    summary = scorer.generate_summary()
+
+    _logger.info("Logging with mlflow")
+    mlflow.log_metrics(summary)
+
+    _logger.info("Writing output file")
+    with open(args.output_dataset, encoding=args.output_encoding, mode="w") as jf:
+        json.dump(summary, jf, indent=4)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml
index f31b249..fe96e88 100644
--- a/azureml/environments/phi2transformer-env.yaml
+++ b/azureml/environments/phi2transformer-env.yaml
@@ -8,8 +8,11 @@ image: mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cuda11.8-gpu-inference
 conda_file:
   channels:
     - defaults
+    - conda-forge
   dependencies:
     - python=3.11
+    # Rust is now part of building the guidance wheel
+    - rust
     - pip
     - pip:
       # Note that we have to force torch to install from this index
@@ -19,6 +22,9 @@ conda_file:
       # ... so we have to add PyPI back in as an alternative index
       - --extra-index-url https://pypi.org/simple
       - accelerate
+      - azureml-mlflow
       - aether-utils==0.0.1.dev1
       - guidance>=0.1.13
+      - jsonschema
+      - pydantic
       - transformers
\ No newline at end of file
diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py
index 5483953..9dfce0d 100644
--- a/azureml/pipelines/azureml_utils.py
+++ b/azureml/pipelines/azureml_utils.py
@@ -15,15 +15,18 @@
 ALL_COMPONENTS = dict(
     jsonl_embeddings="jsonl_embeddings_aoai_component.yaml",
     jsonl_filter_correct_multiplechoice="jsonl_filter_correct_multiplechoice_component.yaml",
+    jsonl_gsm8k_fetch="jsonl_gsm8k_fetch_component.yaml",
     jsonl_guidance="jsonl_guidance_component.yaml",
     jsonl_key_filter="jsonl_key_filter_component.yaml",
     jsonl_key_rename="jsonl_key_rename_component.yaml",
     jsonl_knn_cosine_similarity="jsonl_knn_cosine_similarity_component.yaml",
     jsonl_mmlu_fetch="jsonl_mmlu_fetch_component.yaml",
     jsonl_random_examples="jsonl_random_examples_component.yaml",
+    jsonl_sample_lines="jsonl_sample_lines_component.yaml",
     jsonl_schema_checker="jsonl_schema_checker_component.yaml",
     jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml",
     jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml",
+    jsonl_score_numeric="jsonl_score_numeric_component.yaml",
     jsonl_to_json="jsonl_to_json_component.yaml",
     uri_folder_to_file="uri_folder_to_file_component.yaml",
 )
@@ -99,6 +102,12 @@ def prepare(self):
             environment=phi2_environment,
             version_string=self._version_string,
         )
+        self.jsonl_guidance_mistral7b = create_component_from_yaml(
+            self._client,
+            self._base_dir / "jsonl_guidance_mistral7b_component.yaml",
+            environment=phi2_environment,
+            version_string=self._version_string,
+        )
 
         _logger.info("Added all components")
 
diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index 11b0e2a..7fadb45 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -31,6 +31,11 @@ class Phi2Config:
     compute_target: str = str()
 
 
+@dataclass
+class LlamaCppConfig:
+    compute_target: str = str()
+
+
 @dataclass
 class ZeroShotRunConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
@@ -129,3 +134,14 @@ class Phi2BiosBiasJSONPipelineConfig:
     biosbias_dataset: str = str()
     json_guidance_programs: list[str] = field(default_factory=list)
     phi2_config: Phi2Config = field(default_factory=Phi2Config)
+
+
+@dataclass
+class GSM8KZeroOrFewShotConfig:
+    pipeline: PipelineConfig = field(default_factory=PipelineConfig)
+    json_guidance_programs: list[str] = field(default_factory=list)
+    llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig)
+    fewshot_random_seed: int = int()
+    n_fewshot: int = int()
+    sample_random_seed: int = int()
+    n_samples: int = int()
diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
new file mode 100644
index 0000000..20c1e59
--- /dev/null
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -0,0 +1,21 @@
+defaults:
+  - _self_
+  - aml_config
+  - aoai_config
+
+zeroorfewshot_config:
+  pipeline:
+    base_experiment_name: gsm8k_zeroorfewshot_debugging
+    tags:
+    default_compute_target: isolatedcompute
+  json_guidance_programs:
+    - gsm8k_zero_or_few_shot_plain.py
+    - gsm8k_zero_or_few_shot_regex_number.py
+    - gsm8k_zero_or_few_shot_basic_json.py
+    - gsm8k_zero_or_few_shot_json_response.py
+  llamacpp_config:
+    compute_target: gput4
+  fewshot_random_seed: 4521
+  n_fewshot: 5
+  sample_random_seed: 234891
+  n_samples: 400
\ No newline at end of file
diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
new file mode 100644
index 0000000..99a4569
--- /dev/null
+++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
@@ -0,0 +1,138 @@
+# Submit a run using:
+# python .\submit_mmlu_zeroshot.py -cn zeroshot_config
+
+import time
+
+from dataclasses import dataclass
+
+import hydra
+from hydra.core.config_store import ConfigStore
+
+import omegaconf
+
+from azure.identity import DefaultAzureCredential
+from azure.ai.ml import MLClient
+
+from azure.ai.ml import dsl, Input, MLClient
+from azure.ai.ml.entities import Pipeline
+
+from azureml_pipelines import create_zeroshot_pipeline
+from azureml_utils import get_component_collector
+from configs import AMLConfig, GSM8KZeroOrFewShotConfig
+from constants import GUIDANCE_PROGRAMS_DIR
+from logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+@dataclass
+class PipelineConfig:
+    zeroorfewshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING
+    azureml_config: AMLConfig = omegaconf.MISSING
+
+
+cs = ConfigStore.instance()
+cs.store(name="config", node=PipelineConfig)
+
+
+def create_gsm8k_zeroshot_pipeline(
+    ml_client: MLClient, run_config: GSM8KZeroOrFewShotConfig, version_string: str
+):
+    components = get_component_collector(ml_client, version_string)
+
+    guidance_inputs = dict()
+    for prog_filename in run_config.json_guidance_programs:
+        k = prog_filename[0:-3]
+        v = Input(
+            type="uri_file",
+            path=GUIDANCE_PROGRAMS_DIR / prog_filename,
+            model="download",
+        )
+        guidance_inputs[k] = v
+    _logger.info(f"Found {len(guidance_inputs)} guidance programs")
+
+    @dsl.pipeline()
+    def basic_pipeline() -> Pipeline:
+        mmlu_fetch_job = components.jsonl_gsm8k_fetch()
+        mmlu_fetch_job.name = f"fetch_gsm8k"
+
+        split_outputs = dict()
+        for s in ["train", "test"]:
+            get_split_job = components.uri_folder_to_file(
+                input_dataset=mmlu_fetch_job.outputs.output_dataset,
+                filename_pattern=f"{s}.jsonl",
+            )
+            get_split_job.name = f"extract_split_{s}"
+            split_outputs[s] = get_split_job.outputs.output_dataset
+
+        sample_lines_job = components.jsonl_sample_lines(
+            input_dataset=split_outputs["train"],
+            n_samples=run_config.n_samples,
+            random_seed=run_config.sample_random_seed,
+        )
+        sample_lines_job.name = f"sample_{run_config.n_samples}_lines"
+
+        random_examples_job = components.jsonl_random_examples(
+            input_dataset=sample_lines_job.outputs.output_dataset,
+            example_dataset=split_outputs["test"],
+            output_key="examples",
+            num_examples=run_config.n_fewshot,
+            random_seed=run_config.fewshot_random_seed,
+        )
+        random_examples_job.name = f"add_random_examples"
+
+        for progname, prog_input in guidance_inputs.items():
+            guidance_job = components.jsonl_guidance_mistral7b(
+                guidance_program=prog_input,
+                input_dataset=random_examples_job.outputs.output_dataset,
+            )
+            guidance_job.compute = run_config.llamacpp_config.compute_target
+            guidance_job.name = f"guidance_mistral7b_{progname}"
+
+            score_job = components.jsonl_score_numeric(
+                input_dataset=guidance_job.outputs.output_dataset,
+                correct_key="answer",
+                response_key="zero_or_few_shot_answer",
+            )
+            score_job.name = f"score_{progname}"
+
+    pipeline = basic_pipeline()
+    pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}"
+    pipeline.display_name = None
+    pipeline.compute = run_config.pipeline.default_compute_target
+    if run_config.pipeline.tags:
+        pipeline.tags.update(run_config.tags)
+    _logger.info("Pipeline created")
+
+    return pipeline
+
+
+@hydra.main(config_path="configs", version_base="1.1")
+def main(config: PipelineConfig):
+    version_string = str(int(time.time()))
+    _logger.info(f"AzureML object version for this run: {version_string}")
+
+    _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
+    _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
+    _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
+
+    credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
+
+    ws_client = MLClient(
+        credential=credential,
+        subscription_id=config.azureml_config.subscription_id,
+        resource_group_name=config.azureml_config.resource_group,
+        workspace_name=config.azureml_config.workspace_name,
+        logging_enable=False,
+    )
+
+    pipeline = create_gsm8k_zeroshot_pipeline(
+        ws_client, config.zeroorfewshot_config, version_string
+    )
+    _logger.info("Submitting pipeline")
+    submitted_job = ws_client.jobs.create_or_update(pipeline)
+    _logger.info(f"Submitted: {submitted_job.name}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
new file mode 100644
index 0000000..499a15e
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -0,0 +1,69 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:\n"
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
+            if "result" in t:
+                lm += " "
+                lm += t["calculation"]
+                lm += t["result"]
+            lm += "\n"
+        lm += f"Answer: {e['answer']}"
+        lm += "\n"
+    lm += "\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:"
+    lm += guidance.gen("reasons", max_tokens=100)
+    lm += "\n"
+    lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {result['result_string']}")
+
+    float_result = float(result["result_string"])
+
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
+    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
new file mode 100644
index 0000000..2d58c30
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -0,0 +1,92 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+from jsonschema import validate
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    response_schema = dict(
+        type="object",
+        properties=dict(
+            thoughts=dict(
+                type="array",
+                items=dict(
+                    type="object",
+                    properties=dict(
+                        step=dict(type="string"),
+                        calculation=dict(type="string"),
+                        result=dict(type="string"),
+                    ),
+                ),
+            ),
+            result=dict(type="number"),
+        ),
+    )
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+
+        nxt_obj = dict(thoughts=[])
+        for t in e["thoughts"]:
+            nxt_thought = dict(step=t["step"], calculation="", result="")
+            if "result" in t:
+                nxt_thought["calculation"] = t["calculation"]
+                nxt_thought["result"] = t["result"]
+            nxt_obj["thoughts"].append(nxt_thought)
+        nxt_obj["result"] = e["answer"]
+
+        validate(nxt_obj, schema=response_schema)
+        lm += guidance.library._json._to_compact_json(nxt_obj)
+        lm += "\n\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += guidance.json(name="response_json", schema=response_schema)
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    llm_result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {llm_result['response_json']}")
+
+    loaded_obj = json.loads(llm_result["response_json"])
+
+    result = dict(
+        zero_or_few_shot_answer=loaded_obj["result"],
+        zero_or_few_show_thoughts=loaded_obj["thoughts"],
+        final_lm=str(llm_result),
+    )
+    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
new file mode 100644
index 0000000..aa0bd90
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -0,0 +1,69 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:\n"
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
+            if "result" in t:
+                lm += " "
+                lm += t["calculation"]
+                lm += t["result"]
+            lm += "\n"
+        lm += f"Answer: {e['answer']}\n"
+        lm += "\n"
+    lm += "\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:\n"
+    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
+    lm += "\n"
+    lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10, stop="\n")
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {result['result_string']}")
+
+    float_result = float(result["result_string"])
+
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
+    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
new file mode 100644
index 0000000..ad07bca
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -0,0 +1,71 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:\n"
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
+            if "result" in t:
+                lm += " "
+                lm += t["calculation"]
+                lm += t["result"]
+            lm += "\n"
+        lm += f"Answer: {e['answer']}"
+        lm += "\n"
+    lm += "\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:"
+    lm += guidance.gen("reasons", max_tokens=100)
+    lm += "\n"
+    lm += f"Answer: " + guidance.gen(
+        name="result_string", regex=r"-?\d+\.?\d*", stop="\n"
+    )
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {result['result_string']}")
+
+    float_result = float(result["result_string"])
+
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
+    return result