diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml new file mode 100644 index 0000000..5d2fdeb --- /dev/null +++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml @@ -0,0 +1,32 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_gsm8k_fetch +version: 0.0.1pre1 +display_name: JSONL GSM8K Fetcher +type: command +description: Fetches the GSM8K dataset, and formats into JSONL +is_deterministic: true + +inputs: + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output datasets + +outputs: + output_dataset: + type: uri_folder + description: | + Folder which will contain 'train.jsonl' and 'test.jsonl' + +code: ./src/ + +command: >- + python ./jsonl_gsm8k_fetch.py + --output_encoding ${{ inputs.output_encoding }} + --output_dataset ${{ outputs.output_dataset }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml new file mode 100644 index 0000000..c451731 --- /dev/null +++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml @@ -0,0 +1,90 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_guidance_mistral7b +version: 0.0.1pre1 +display_name: JSONL Guidance Mistral7B +type: command +description: Runs a supplied Guidance program on every line of a JSONL file via Mistral7B +is_deterministic: false + +inputs: + guidance_program: + type: uri_file + optional: false + description: Python file containing the guidance program + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + common_dataset: + type: uri_file + optional: true + description: Dataset containing data to be shared with all rows in input + common_encoding: + type: string + optional: true + default: utf-8-sig + description: Encoding format of the common dataset + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + error_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the error dataset + +outputs: + output_dataset: + type: uri_file + description: JSONL file + error_dataset: + type: uri_file + description: JSONL file containing failed lines + +code: ./src/ + +command: | + # Install Rust toolchain + #apt update + #apt upgrade -y + #apt install -y rustc build-essential + #pip install setup-rust + # Download the zip + wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip + echo + ls + echo + # Unzip + unzip ./main.zip + echo + ls -p + echo + # Install from download + pip install --upgrade ./guidance-main/ + echo + # Install LlamaCpp + CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58" + echo + # Run the script + python ./jsonl_guidance_mistral7b.py \ + --guidance_program ${{ inputs.guidance_program }} \ + --input_dataset ${{ inputs.input_dataset }} \ + --input_encoding ${{ inputs.input_encoding }} \ + $[[--common_dataset ${{ inputs.common_dataset }} ]] \ + $[[--common_encoding ${{ inputs.common_encoding }} ]] \ + --output_dataset ${{ outputs.output_dataset }} \ + --output_encoding ${{ inputs.output_encoding }} \ + --error_dataset ${{ outputs.error_dataset }} \ + --error_encoding ${{ inputs.error_encoding }} + +environment: + # Will be updated when component uploads + image: azureml:guidance_phi2_env@latest \ No newline at end of file diff --git a/azureml/components/jsonl_sample_lines_component.yaml b/azureml/components/jsonl_sample_lines_component.yaml new file mode 100644 index 0000000..5c23a68 --- /dev/null +++ b/azureml/components/jsonl_sample_lines_component.yaml @@ -0,0 +1,52 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_sample_lines +display_name: 'JSONL Sample Lines' +type: command +description: | + Samples lines (without replacement) from a JSONL file +is_deterministic: true + +inputs: + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + n_samples: + type: integer + optional: false + description: Number of samples required + random_seed: + type: integer + optional: false + description: Seed for Pythons PRNG + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + +outputs: + output_dataset: + type: uri_file + description: Dataset containing sampled JSONL + +code: ./src + +command: >- + python ./jsonl_sample_lines.py + --input_dataset ${{ inputs.input_dataset }} + --input_encoding ${{ inputs.input_encoding }} + --n_samples ${{ inputs.n_samples }} + --random_seed ${{ inputs.random_seed }} + --output_dataset ${{ outputs.output_dataset }} + --output_encoding ${{ inputs.output_encoding }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file diff --git a/azureml/components/jsonl_score_numeric_component.yaml b/azureml/components/jsonl_score_numeric_component.yaml new file mode 100644 index 0000000..6098006 --- /dev/null +++ b/azureml/components/jsonl_score_numeric_component.yaml @@ -0,0 +1,56 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_score_numeric +version: 0.0.1pre1 +display_name: JSONL Numeric Scorer +type: command +description: | + Takes a JSONL file of numeric questions and correct answers and responses + from a model, and produces the overall score. + Results are stored in JSON +is_deterministic: true + +inputs: + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + correct_key: + type: string + optional: false + description: Which key contains the correct answer + response_key: + type: string + optional: false + description: Which key contains the answer produced by the model + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + +outputs: + output_dataset: + type: uri_file + description: JSON file containing score summary + + +code: ./src/ + +command: >- + python ./jsonl_score_numeric.py + --input_dataset ${{ inputs.input_dataset }} + --input_encoding ${{ inputs.input_encoding }} + --output_dataset ${{ outputs.output_dataset }} + --output_encoding ${{ inputs.output_encoding }} + --correct_key ${{ inputs.correct_key }} + --response_key ${{ inputs.response_key }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py new file mode 100644 index 0000000..01cb15d --- /dev/null +++ b/azureml/components/src/jsonl_gsm8k_fetch.py @@ -0,0 +1,96 @@ +import argparse +import json +import pathlib +import re + +from typing import Any, Dict + +import requests + + +from aether_utils.jsonl_file_utils import JSONLWriter, JSONLReader +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + +BASE_DATA_URL = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/" + +SPLITS = ["train", "test"] + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the ports + ports_group = parser.add_argument_group("Ports") + ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--output_encoding", type=str, required=True) + + args = parser.parse_args() + return args + + +def extract_thought_parts(thought: str) -> Dict[str, Any]: + thought_re = r"(.*)<<(.*=.*)>>(.*)" + match = re.match(thought_re, thought) + + result = dict() + if match: + result["step"] = match.group(1) + result["calculation"] = match.group(2) + result["result"] = match.group(3) + else: + result["step"] = thought + return result + + +def process_line(item: Dict[str, Any]) -> Dict[str, Any]: + result = dict() + _logger.debug(f"Processing {item}") + + result["question"] = item["question"] + + # The answer embeds a chain of thought and the + # numeric result + split_answer = item["answer"].split("####") + + result["thoughts"] = [] + for thought in split_answer[0].splitlines(): + result["thoughts"].append(extract_thought_parts(thought)) + + # The following is not how you're supposed to handle + # numbers with thousand separators. + # This is a work around, pending three-way negotiations + # with locale.atof() and the AzureML compute nodes + result["answer"] = float(split_answer[1].replace(",", "")) + + return result + + +def main(): + args = parse_args() + + for split in SPLITS: + _logger.info(f"Starting split {split}") + line_count = 0 + target_url = f"{BASE_DATA_URL}{split}.jsonl" + + _logger.info(f"Fetching {target_url}") + response = requests.get(target_url) + assert response.status_code == 200, f"Got response {response}" + + with JSONLWriter( + args.output_dataset / f"{split}.jsonl", args.output_encoding + ) as jlw: + for line in response.text.splitlines(): + nxt_item = json.loads(line) + output_item = process_line(nxt_item) + jlw.write_line(output_item) + line_count += 1 + _logger.info(f"Completed split {split} ({line_count} lines)") + + _logger.info("Complete") + + +if __name__ == "__main__": + main() diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py new file mode 100644 index 0000000..7fa5063 --- /dev/null +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -0,0 +1,132 @@ +import argparse +import importlib.util +import json +import pathlib +import time + +from typing import Any, Callable, Dict + +import guidance + +from huggingface_hub import hf_hub_download + +import mlflow + +from transformers import AutoModelForCausalLM, AutoTokenizer + +from aether_utils.jsonl_utils import line_map +from aether_utils.logging_utils import get_standard_logger_for_file + + +_logger = get_standard_logger_for_file(__file__) + +USER_MODULE = "user_module" +GUIDANCE_FUNCTION = "guidance_generation" + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the datasets + datasets_group = parser.add_argument_group("Datasets") + datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--input_encoding", type=str, required=True) + datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--output_encoding", type=str, required=True) + datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--error_encoding", type=str, required=True) + datasets_group.add_argument( + "--common_dataset", type=pathlib.Path, required=False, default=None + ) + datasets_group.add_argument("--common_encoding", type=str, required=False) + + # Information about the guidance program + parser.add_argument("--guidance_program", type=pathlib.Path, required=True) + + args = parser.parse_args() + return args + + +class LLMProcessor: + def __init__( + self, + program_path, + model: guidance.models.Model, + common_data: dict[str, any] | None, + ): + self._program_path = program_path + self._model = model + self._guidance_function = self._get_guidance_function() + self._common_data = common_data + self._step = 0 + + def __call__(self, item: Dict[str, Any]) -> dict[str, any]: + _logger.debug(f"__call__: {item}") + start = time.time() + result = self._guidance_function(self._model, item, common=self._common_data) + stop = time.time() + mlflow.log_metric("time_taken", value=stop - start, step=self._step) + _logger.debug(f"Checking keys") + for k in result.keys(): + assert k not in item, f"Duplicate key: {k}" + + _logger.debug(f"Updating item") + item.update(**result) + self._step += 1 + + return item + + def _get_guidance_function( + self, + ) -> Callable[[Dict[str, Any]], Dict[str, Any]]: + _logger.debug("Importing guidance file") + spec = importlib.util.spec_from_file_location(USER_MODULE, self._program_path) + module_definition = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module_definition) + + guidance_func = getattr(module_definition, GUIDANCE_FUNCTION) + + return guidance_func + + +def main(): + args = parse_args() + + # Load the common data (if required) + common_data = None + if args.common_dataset is not None: + _logger.info("Loading common dataset") + with open(args.common_dataset, "r", encoding=args.common_encoding) as jf: + common_data = json.load(jf) + else: + _logger.info("No common dataset present") + + repo_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" + filename = "mistral-7b-instruct-v0.2.Q8_0.gguf" + downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename) + + guidance_model = guidance.models.LlamaCpp( + downloaded_file, verbose=True, n_gpu_layers=-1, n_ctx=4096 + ) + # _logger.info(f"guidance_model.device: {guidance_model.engine.device}") + + processor = LLMProcessor( + program_path=args.guidance_program, + model=guidance_model, + common_data=common_data, + ) + + _logger.info("Starting to process input") + s, f = line_map( + map_func=processor, + source_file=args.input_dataset, + dest_file=args.output_dataset, + source_encoding=args.input_encoding, + dest_encoding=args.output_encoding, + ) + + _logger.info(f"Complete with {s} successes and {f} failures") + + +if __name__ == "__main__": + main() diff --git a/azureml/components/src/jsonl_sample_lines.py b/azureml/components/src/jsonl_sample_lines.py new file mode 100644 index 0000000..224c877 --- /dev/null +++ b/azureml/components/src/jsonl_sample_lines.py @@ -0,0 +1,52 @@ +import argparse +import pathlib +import random + +from typing import Any, Dict, List + +from aether_utils.jsonl_file_utils import load_jsonl, save_jsonl +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the datasets + datasets_group = parser.add_argument_group("Datasets") + datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--input_encoding", type=str, required=True) + datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--output_encoding", type=str, required=True) + + # Information about the sampling + sampling_group = parser.add_argument_group("Sampling") + sampling_group.add_argument("--n_samples", type=int, required=True) + sampling_group.add_argument("--random_seed", type=int, required=True) + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + _logger.info("Loading input") + all_data = load_jsonl(args.input_dataset, args.input_encoding) + _logger.info(f"Loaded {len(all_data)} items") + + random.seed(args.random_seed) + sampled_data = random.sample(all_data, k=args.n_samples) + + _logger.info("Saving output") + save_jsonl( + file_path=args.output_dataset, + data=sampled_data, + destination_encoding=args.output_encoding, + ) + _logger.info("Done") + + +if __name__ == "__main__": + main() diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py new file mode 100644 index 0000000..c5dd211 --- /dev/null +++ b/azureml/components/src/jsonl_score_numeric.py @@ -0,0 +1,87 @@ +import argparse +import functools +import json +import pathlib + +from typing import Any + +import mlflow + +from aether_utils.jsonl_utils import line_reduce +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +class Scorer: + def __init__(self, correct_key: str, response_key: str): + self.y_true = [] + self.y_pred = [] + self.dataset = [] + self.subject = [] + self.correct_key = correct_key + self.response_key = response_key + + def __call__(self, line: dict[str, Any]): + correct_answer = line[self.correct_key] + response_answer = line[self.response_key] + self.y_true.append(correct_answer) + self.y_pred.append(response_answer) + + def generate_summary(self) -> dict[str, Any]: + result = dict() + + result["n_answers"] = len(self.y_true) + n_correct = 0 + for y_t, y_p in zip(self.y_true, self.y_pred): + if y_t == y_p: + n_correct += 1 + result["n_correct"] = n_correct + result["accuracy"] = 0 + if len(self.y_true) > 0: + result["accuracy"] = float(n_correct) / len(self.y_true) + + return result + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the ports + ports_group = parser.add_argument_group("Ports") + ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--input_encoding", type=str, required=True) + ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--output_encoding", type=str, required=True) + + # Information about the keys + keys_group = parser.add_argument_group("Keys") + keys_group.add_argument("--correct_key", type=str, required=True) + keys_group.add_argument("--response_key", type=str, required=True) + + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + + scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key) + line_reduce( + reducer=scorer, + source_file=args.input_dataset, + source_encoding=args.input_encoding, + ) + summary = scorer.generate_summary() + + _logger.info("Logging with mlflow") + mlflow.log_metrics(summary) + + _logger.info("Writing output file") + with open(args.output_dataset, encoding=args.output_encoding, mode="w") as jf: + json.dump(summary, jf, indent=4) + + +if __name__ == "__main__": + main() diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml index f31b249..fe96e88 100644 --- a/azureml/environments/phi2transformer-env.yaml +++ b/azureml/environments/phi2transformer-env.yaml @@ -8,8 +8,11 @@ image: mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cuda11.8-gpu-inference conda_file: channels: - defaults + - conda-forge dependencies: - python=3.11 + # Rust is now part of building the guidance wheel + - rust - pip - pip: # Note that we have to force torch to install from this index @@ -19,6 +22,9 @@ conda_file: # ... so we have to add PyPI back in as an alternative index - --extra-index-url https://pypi.org/simple - accelerate + - azureml-mlflow - aether-utils==0.0.1.dev1 - guidance>=0.1.13 + - jsonschema + - pydantic - transformers \ No newline at end of file diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py index 5483953..9dfce0d 100644 --- a/azureml/pipelines/azureml_utils.py +++ b/azureml/pipelines/azureml_utils.py @@ -15,15 +15,18 @@ ALL_COMPONENTS = dict( jsonl_embeddings="jsonl_embeddings_aoai_component.yaml", jsonl_filter_correct_multiplechoice="jsonl_filter_correct_multiplechoice_component.yaml", + jsonl_gsm8k_fetch="jsonl_gsm8k_fetch_component.yaml", jsonl_guidance="jsonl_guidance_component.yaml", jsonl_key_filter="jsonl_key_filter_component.yaml", jsonl_key_rename="jsonl_key_rename_component.yaml", jsonl_knn_cosine_similarity="jsonl_knn_cosine_similarity_component.yaml", jsonl_mmlu_fetch="jsonl_mmlu_fetch_component.yaml", jsonl_random_examples="jsonl_random_examples_component.yaml", + jsonl_sample_lines="jsonl_sample_lines_component.yaml", jsonl_schema_checker="jsonl_schema_checker_component.yaml", jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml", jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml", + jsonl_score_numeric="jsonl_score_numeric_component.yaml", jsonl_to_json="jsonl_to_json_component.yaml", uri_folder_to_file="uri_folder_to_file_component.yaml", ) @@ -99,6 +102,12 @@ def prepare(self): environment=phi2_environment, version_string=self._version_string, ) + self.jsonl_guidance_mistral7b = create_component_from_yaml( + self._client, + self._base_dir / "jsonl_guidance_mistral7b_component.yaml", + environment=phi2_environment, + version_string=self._version_string, + ) _logger.info("Added all components") diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index 11b0e2a..7fadb45 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -31,6 +31,11 @@ class Phi2Config: compute_target: str = str() +@dataclass +class LlamaCppConfig: + compute_target: str = str() + + @dataclass class ZeroShotRunConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) @@ -129,3 +134,14 @@ class Phi2BiosBiasJSONPipelineConfig: biosbias_dataset: str = str() json_guidance_programs: list[str] = field(default_factory=list) phi2_config: Phi2Config = field(default_factory=Phi2Config) + + +@dataclass +class GSM8KZeroOrFewShotConfig: + pipeline: PipelineConfig = field(default_factory=PipelineConfig) + json_guidance_programs: list[str] = field(default_factory=list) + llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig) + fewshot_random_seed: int = int() + n_fewshot: int = int() + sample_random_seed: int = int() + n_samples: int = int() diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml new file mode 100644 index 0000000..20c1e59 --- /dev/null +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -0,0 +1,21 @@ +defaults: + - _self_ + - aml_config + - aoai_config + +zeroorfewshot_config: + pipeline: + base_experiment_name: gsm8k_zeroorfewshot_debugging + tags: + default_compute_target: isolatedcompute + json_guidance_programs: + - gsm8k_zero_or_few_shot_plain.py + - gsm8k_zero_or_few_shot_regex_number.py + - gsm8k_zero_or_few_shot_basic_json.py + - gsm8k_zero_or_few_shot_json_response.py + llamacpp_config: + compute_target: gput4 + fewshot_random_seed: 4521 + n_fewshot: 5 + sample_random_seed: 234891 + n_samples: 400 \ No newline at end of file diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py new file mode 100644 index 0000000..99a4569 --- /dev/null +++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py @@ -0,0 +1,138 @@ +# Submit a run using: +# python .\submit_mmlu_zeroshot.py -cn zeroshot_config + +import time + +from dataclasses import dataclass + +import hydra +from hydra.core.config_store import ConfigStore + +import omegaconf + +from azure.identity import DefaultAzureCredential +from azure.ai.ml import MLClient + +from azure.ai.ml import dsl, Input, MLClient +from azure.ai.ml.entities import Pipeline + +from azureml_pipelines import create_zeroshot_pipeline +from azureml_utils import get_component_collector +from configs import AMLConfig, GSM8KZeroOrFewShotConfig +from constants import GUIDANCE_PROGRAMS_DIR +from logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +@dataclass +class PipelineConfig: + zeroorfewshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING + azureml_config: AMLConfig = omegaconf.MISSING + + +cs = ConfigStore.instance() +cs.store(name="config", node=PipelineConfig) + + +def create_gsm8k_zeroshot_pipeline( + ml_client: MLClient, run_config: GSM8KZeroOrFewShotConfig, version_string: str +): + components = get_component_collector(ml_client, version_string) + + guidance_inputs = dict() + for prog_filename in run_config.json_guidance_programs: + k = prog_filename[0:-3] + v = Input( + type="uri_file", + path=GUIDANCE_PROGRAMS_DIR / prog_filename, + model="download", + ) + guidance_inputs[k] = v + _logger.info(f"Found {len(guidance_inputs)} guidance programs") + + @dsl.pipeline() + def basic_pipeline() -> Pipeline: + mmlu_fetch_job = components.jsonl_gsm8k_fetch() + mmlu_fetch_job.name = f"fetch_gsm8k" + + split_outputs = dict() + for s in ["train", "test"]: + get_split_job = components.uri_folder_to_file( + input_dataset=mmlu_fetch_job.outputs.output_dataset, + filename_pattern=f"{s}.jsonl", + ) + get_split_job.name = f"extract_split_{s}" + split_outputs[s] = get_split_job.outputs.output_dataset + + sample_lines_job = components.jsonl_sample_lines( + input_dataset=split_outputs["train"], + n_samples=run_config.n_samples, + random_seed=run_config.sample_random_seed, + ) + sample_lines_job.name = f"sample_{run_config.n_samples}_lines" + + random_examples_job = components.jsonl_random_examples( + input_dataset=sample_lines_job.outputs.output_dataset, + example_dataset=split_outputs["test"], + output_key="examples", + num_examples=run_config.n_fewshot, + random_seed=run_config.fewshot_random_seed, + ) + random_examples_job.name = f"add_random_examples" + + for progname, prog_input in guidance_inputs.items(): + guidance_job = components.jsonl_guidance_mistral7b( + guidance_program=prog_input, + input_dataset=random_examples_job.outputs.output_dataset, + ) + guidance_job.compute = run_config.llamacpp_config.compute_target + guidance_job.name = f"guidance_mistral7b_{progname}" + + score_job = components.jsonl_score_numeric( + input_dataset=guidance_job.outputs.output_dataset, + correct_key="answer", + response_key="zero_or_few_shot_answer", + ) + score_job.name = f"score_{progname}" + + pipeline = basic_pipeline() + pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}" + pipeline.display_name = None + pipeline.compute = run_config.pipeline.default_compute_target + if run_config.pipeline.tags: + pipeline.tags.update(run_config.tags) + _logger.info("Pipeline created") + + return pipeline + + +@hydra.main(config_path="configs", version_base="1.1") +def main(config: PipelineConfig): + version_string = str(int(time.time())) + _logger.info(f"AzureML object version for this run: {version_string}") + + _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}") + _logger.info(f"Resource Group: {config.azureml_config.resource_group}") + _logger.info(f"Workspace : {config.azureml_config.workspace_name}") + + credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True) + + ws_client = MLClient( + credential=credential, + subscription_id=config.azureml_config.subscription_id, + resource_group_name=config.azureml_config.resource_group, + workspace_name=config.azureml_config.workspace_name, + logging_enable=False, + ) + + pipeline = create_gsm8k_zeroshot_pipeline( + ws_client, config.zeroorfewshot_config, version_string + ) + _logger.info("Submitting pipeline") + submitted_job = ws_client.jobs.create_or_update(pipeline) + _logger.info(f"Submitted: {submitted_job.name}") + + +if __name__ == "__main__": + main() diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py new file mode 100644 index 0000000..499a15e --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -0,0 +1,69 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:\n" + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" + if "result" in t: + lm += " " + lm += t["calculation"] + lm += t["result"] + lm += "\n" + lm += f"Answer: {e['answer']}" + lm += "\n" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:" + lm += guidance.gen("reasons", max_tokens=100) + lm += "\n" + lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {result['result_string']}") + + float_result = float(result["result_string"]) + + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) + return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py new file mode 100644 index 0000000..2d58c30 --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -0,0 +1,92 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +from jsonschema import validate + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + response_schema = dict( + type="object", + properties=dict( + thoughts=dict( + type="array", + items=dict( + type="object", + properties=dict( + step=dict(type="string"), + calculation=dict(type="string"), + result=dict(type="string"), + ), + ), + ), + result=dict(type="number"), + ), + ) + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + + nxt_obj = dict(thoughts=[]) + for t in e["thoughts"]: + nxt_thought = dict(step=t["step"], calculation="", result="") + if "result" in t: + nxt_thought["calculation"] = t["calculation"] + nxt_thought["result"] = t["result"] + nxt_obj["thoughts"].append(nxt_thought) + nxt_obj["result"] = e["answer"] + + validate(nxt_obj, schema=response_schema) + lm += guidance.library._json._to_compact_json(nxt_obj) + lm += "\n\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += guidance.json(name="response_json", schema=response_schema) + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + llm_result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {llm_result['response_json']}") + + loaded_obj = json.loads(llm_result["response_json"]) + + result = dict( + zero_or_few_shot_answer=loaded_obj["result"], + zero_or_few_show_thoughts=loaded_obj["thoughts"], + final_lm=str(llm_result), + ) + return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py new file mode 100644 index 0000000..aa0bd90 --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -0,0 +1,69 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:\n" + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" + if "result" in t: + lm += " " + lm += t["calculation"] + lm += t["result"] + lm += "\n" + lm += f"Answer: {e['answer']}\n" + lm += "\n" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:\n" + lm += guidance.gen("reasons", max_tokens=100, stop="\n") + lm += "\n" + lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10, stop="\n") + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {result['result_string']}") + + float_result = float(result["result_string"]) + + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) + return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py new file mode 100644 index 0000000..ad07bca --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -0,0 +1,71 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:\n" + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" + if "result" in t: + lm += " " + lm += t["calculation"] + lm += t["result"] + lm += "\n" + lm += f"Answer: {e['answer']}" + lm += "\n" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:" + lm += guidance.gen("reasons", max_tokens=100) + lm += "\n" + lm += f"Answer: " + guidance.gen( + name="result_string", regex=r"-?\d+\.?\d*", stop="\n" + ) + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {result['result_string']}") + + float_result = float(result["result_string"]) + + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) + return result