diff --git a/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb b/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb index 8b2ac5eb..5badfc3a 100644 --- a/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb +++ b/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/08-langchain-multi-query.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/10-langchain-multi-query.ipynb)" ] }, { @@ -15,26 +15,35 @@ "id": "2-XDGL6Oi6h4" }, "source": [ - "#### [LangChain Handbook](https://pinecone.io/learn/langchain)\n", + "#### [LangChain Handbook](https://www.pinecone.io/learn/series/langchain/)\n", "\n", "# LangChain Multi-Query for RAG" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "qi8B1fgywJzE" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 23.1.2 -> 25.1.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], "source": [ "!pip install -qU \\\n", - " pinecone-client==3.1.0 \\\n", - " langchain==0.1.1 \\\n", - " langchain-community==0.0.13 \\\n", - " datasets==2.14.6 \\\n", - " openai==1.6.1 \\\n", - " tiktoken==0.5.2" + " datasets==3.6.0 \\\n", + " langchain==0.3.25 \\\n", + " langchain-openai==0.3.22 \\\n", + " tiktoken==0.9.0 \\\n", + " pinecone==7.3.0" ] }, { @@ -59,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "id": "iatOGmKgz8NE" }, @@ -73,7 +82,7 @@ "})" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -87,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "id": "P7E6JYtb0cW7" }, @@ -133,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "id": "qZ6vTiDPBznz" }, @@ -141,15 +150,18 @@ "source": [ "import os\n", "from getpass import getpass\n", - "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "model_name = \"text-embedding-3-small\"\n", "\n", - "model_name = \"text-embedding-ada-002\"\n", + "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\") \\\n", + " or getpass(\"Enter your OpenAI API key: \")\n", "\n", - "# get openai api key from platform.openai.com\n", - "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or getpass(\"OpenAI API Key: \")\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "embed = OpenAIEmbeddings(\n", - " model=model_name, openai_api_key=OPENAI_API_KEY, disallowed_special=()\n", + " model=model_name,\n", + " openai_api_key=OPENAI_API_KEY\n", ")" ] }, @@ -165,29 +177,31 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from pinecone import Pinecone\n", "\n", - "# initialize connection to pinecone (get API key at app.pinecone.io)\n", - "api_key = os.getenv(\"PINECONE_API_KEY\") or getpass(\"Enter your Pinecone API key: \")\n", + "os.environ[\"PINECONE_API_KEY\"] = os.getenv(\"PINECONE_API_KEY\") \\\n", + " or getpass(\"Enter your Pinecone API key: \")\n", + "\n", + "PINECONE_API_KEY = os.getenv(\"PINECONE_API_KEY\")\n", "\n", "# configure client\n", - "pc = Pinecone(api_key=api_key)" + "pc = Pinecone(api_key=PINECONE_API_KEY)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." + "Now we setup our index specification, this allows us to define the cloud provider and region where we want to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/projects/understanding-projects)." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -202,12 +216,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Creating an index, we set `dimension` equal to to dimensionality of Ada-002 (`1536`), and use a `metric` also compatible with Ada-002 (this can be either `cosine` or `dotproduct`). We also pass our `spec` to index initialization." + "Creating an index, we set `dimension` equal to to dimensionality of `text-embedding-3-small` (`1536`), and use a `metric` also compatible with `text-embedding-3-small` (this can be either `cosine` or `dotproduct`). We also pass our `spec` to index initialization." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "id": "nL3KFF9E9Qb_" }, @@ -217,11 +231,13 @@ "text/plain": [ "{'dimension': 1536,\n", " 'index_fullness': 0.0,\n", - " 'namespaces': {},\n", - " 'total_vector_count': 0}" + " 'metric': 'dotproduct',\n", + " 'namespaces': {'': {'vector_count': 41584}},\n", + " 'total_vector_count': 41584,\n", + " 'vector_type': 'dense'}" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -239,7 +255,7 @@ " # if does not exist, create index\n", " pc.create_index(\n", " index_name,\n", - " dimension=1536, # dimensionality of ada 002\n", + " dimension=1536, # dimensionality of text-embedding-3-small\n", " metric='dotproduct',\n", " spec=spec\n", " )\n", @@ -266,7 +282,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "id": "B7Yi2YGBpTWf" }, @@ -277,7 +293,7 @@ "41584" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -288,19 +304,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "id": "thfCYHuSpW4H" }, "outputs": [], "source": [ - "# if you want to speed things up to follow along\n", - "#docs = docs[:5000]" + "# if you want to speed things up while following along, you can limit the number of documents to 5000\n", + "# docs = docs[:5000]" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "id": "HXVVU97C6SwT" }, @@ -308,7 +324,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7ccacf3923234dd5821880d7942218c7", + "model_id": "a1d42445e7a24353a300f6e3e8a13500", "version_major": 2, "version_minor": 0 }, @@ -361,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -369,35 +385,33 @@ "id": "0ETs0emsAh-K", "outputId": "0b1de24b-2f9f-48a6-d8ca-bd3d6aa007e1" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jamesbriggs/opt/anaconda3/envs/ml/lib/python3.9/site-packages/langchain_community/vectorstores/pinecone.py:74: UserWarning: Passing in `embedding` as a Callable is deprecated. Please pass in an Embeddings object instead.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ - "from langchain.vectorstores import Pinecone\n", - "\n", - "text_field = \"text\"\n", + "from langchain_pinecone import PineconeVectorStore\n", "\n", - "vectorstore = Pinecone(index, embed.embed_query, text_field)" + "# initialize the vector store object\n", + "vectorstore = PineconeVectorStore(\n", + " index=index, \n", + " embedding=embed,\n", + " text_key=\"text\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "id": "nW_GCB6a3_N_" }, "outputs": [], "source": [ - "from langchain.chat_models import ChatOpenAI\n", + "from langchain_openai import ChatOpenAI\n", "\n", - "llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)" + "llm = ChatOpenAI(\n", + " openai_api_key=OPENAI_API_KEY,\n", + " model_name=\"gpt-4o-mini\",\n", + " temperature=0.0\n", + ")" ] }, { @@ -412,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "id": "yYjztBp2ANHC" }, @@ -437,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "id": "rgV1eYU6FgX7" }, @@ -462,7 +476,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -475,16 +489,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information about llama 2 and its characteristics?', '2. What can you tell me about llama 2 and its features?', '3. Could you give me an overview of llama 2 and its properties?']\n" + "C:\\Users\\Siraj\\AppData\\Local\\Temp\\ipykernel_32456\\1908582025.py:3: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", + " docs = retriever.get_relevant_documents(query=question)\n", + "INFO:langchain.retrievers.multi_query:Generated queries: ['What can you tell me regarding Llama 2? ', 'Can you provide information on Llama 2? ', 'What are the key features and details of Llama 2?']\n" ] }, { "data": { "text/plain": [ - "6" + "5" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -503,12 +519,14 @@ "id": "kSu1GsFfAqCd" }, "source": [ - "From this we get a variety of docs retrieved by each of our queries independently. By default the `retriever` is returning `3` docs for each query — totalling `9` documents — however, as there is some overlap we actually return `6` unique docs." + "As you can see, the original query was used to autogenerate a number of similar queries, that might be pertinent. Then for each some relevant docs were retrieved from the vector store. \n", + "\n", + "By default the `retriever` is returning `3` docs for each query — totalling `9` documents — however, as there is some overlap we actually return fewer docs." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -520,15 +538,14 @@ { "data": { "text/plain": [ - "[Document(page_content='Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\\nSergey Edunov Thomas Scialom\\x03\\nGenAI, Meta\\nAbstract\\nIn this work, we develop and release Llama 2, a collection of pretrained and fine-tuned\\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\\nOur fine-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , are optimized for dialogue use cases. Our\\nmodels outperform open-source chat models on most benchmarks we tested, and based on\\nourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosedsource models. We provide a detailed description of our approach to fine-tuning and safety', metadata={'chunk-id': '1', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}),\n", - " Document(page_content='asChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle and\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see', metadata={'chunk-id': '9', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}),\n", - " Document(page_content='Q:Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?\\nA:TheWar inVietnam was6months. Thegestationperiod forallama is11months, which ismore than 6\\nmonths. Thus, allama could notgive birth twice duringtheWar inVietnam. So the answer is no.\\nQ:Yes or no: Would a pear sink in water?\\nA:Thedensityofapear isabout 0:6g=cm3,which islessthan water.Objects lessdense than waterfloat. Thus,\\napear would float. So the answer is no.\\nTable 26: Few-shot exemplars for full chain of thought prompt for Date Understanding.\\nPROMPT FOR DATE UNDERSTANDING\\nQ:2015 is coming in 36 hours. What is the date one week from today in MM/DD/YYYY?\\nA:If2015 iscomingin36hours, then itiscomingin2days. 2days before01/01/2015 is12/30/2014, sotoday\\nis12/30/2014. Sooneweek from todaywillbe01/05/2015. So the answer is 01/05/2015.', metadata={'chunk-id': '137', 'id': '2201.11903', 'source': 'http://arxiv.org/pdf/2201.11903', 'title': 'Chain-of-Thought Prompting Elicits Reasoning in Large Language Models'}),\n", - " Document(page_content='Dubois, Xuechen Li, Carlos Guestrin, Percy Liang,\\nand Tatsunori B. Hashimoto. 2023. Stanford alpaca:\\nAn instruction-following llama model. https://\\ngithub.com/tatsu-lab/stanford_alpaca .\\nRomal Thoppilan, Daniel De Freitas, Jamie Hall, Noam\\nShazeer, Apoorv Kulshreshtha, Heng-Tze Cheng,\\nAlicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al.\\n2022. Lamda: Language models for dialog applications. arXiv preprint arXiv:2201.08239 .\\nHugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier\\nMartinet, Marie-Anne Lachaux, Timothée Lacroix,\\nBaptiste Rozière, Naman Goyal, Eric Hambro, Faisal\\nAzhar, Aurelien Rodriguez, Armand Joulin, Edouard\\nGrave, and Guillaume Lample. 2023. Llama: Open\\nand efficient foundation language models. arXiv\\npreprint arXiv:2302.13971 .', metadata={'chunk-id': '37', 'id': '2304.01196', 'source': 'http://arxiv.org/pdf/2304.01196', 'title': 'Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data'}),\n", - " Document(page_content=\"user asks weather, which needs more specific and updated information. I should\\nfollow rules 3 (helpful), 7 (candor), 9 (static).\\nWatson: As an AI language model, I have no access to weather sensors or real-time\\nweather data. However, I can recommend you to:\\n1. check your local weather forecast on a weather website or app,\\n2. or by watching the news or checking your local weather station.\\nUser: Good job! Clear context\\nWatson (auto reply): Thank you! For further questions or guidance, just reach out.\\nUser: Tell me about alpaca 's family\\nWatson (internal thoughts): I am a helpful, ethical, and reliable assistant. The\\nuser asks alpaca, which is a public entity in one of my internal knowledge\\nbases: Wikipedia. I should follow rules 2 (informative), 6 (multi-aspect), 8 (\\nknowledge recitation).\\nWatson: According to my knowledge from Wikipedia, here is what I know about alpaca 's\\nfamily:\\nThe alpaca (Lama pacos) is a species of South American camelid mammal. It is similar\\nto, and often confused with, the llama. However, alpacas are often noticeably\\nsmaller than llamas.\\nRegarding their family, alpacas belong to the Camelidae family, which includes other\", metadata={'chunk-id': '112', 'id': '2305.03047', 'source': 'http://arxiv.org/pdf/2305.03047', 'title': 'Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision'}),\n", - " Document(page_content='models will be released as we improve model safety with community feedback.\\nLicense A custom commercial license is available at: ai.meta.com/resources/\\nmodels-and-libraries/llama-downloads/\\nWhere to send commentsInstructions on how to provide feedback or comments on the model can be\\nfound in the model README, or by opening an issue in the GitHub repository\\n(https://github.com/facebookresearch/llama/ ).\\nIntended Use\\nIntended Use Cases L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle is intended for commercial and research use in English. Tuned models\\nare intended for assistant-like chat, whereas pretrained models can be adapted\\nfor a variety of natural language generation tasks.\\nOut-of-Scope Uses Use in any manner that violates applicable laws or regulations (including trade\\ncompliancelaws). UseinlanguagesotherthanEnglish. Useinanyotherway\\nthat is prohibited by the Acceptable Use Policy and Licensing Agreement for\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle.\\nHardware and Software (Section 2.2)\\nTraining Factors We usedcustomtraininglibraries, Meta’sResearchSuperCluster, andproductionclustersforpretraining. Fine-tuning,annotation,andevaluationwerealso', metadata={'chunk-id': '317', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'})]" + "[Document(id='2307.09288-9', metadata={'chunk-id': '9', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}, page_content='asChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle and\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see'),\n", + " Document(id='2307.09288-142', metadata={'chunk-id': '142', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}, page_content='thisprogressionistheriseofLlama,recognizedforitsfocusoncomputationalefficiencyduringinference\\n(Touvron et al., 2023). A parallel discourse has unfolded around the dynamics of open-source versus closedsourcemodels. Open-sourcereleaseslikeBLOOM(Scaoetal.,2022),OPT(Zhangetal.,2022),andFalcon\\n(Penedo et al., 2023) have risen to challenge their closed-source counterparts like GPT-3 and Chinchilla.\\n§§https://ai.meta.com/llama\\n35\\nYet,whenitcomestothe\"production-ready\"LLMssuchasChatGPT,Bard,andClaude,there’samarked\\ndistinction in performance and usability. These models rely on intricate tuning techniques to align with\\nhuman preferences (Gudibande et al., 2023), a process that is still being explored and refined within the\\nopen-source community.\\nAttempts to close this gap have emerged, with distillation-based models such as Vicuna (Chiang et al., 2023)\\nandAlpaca(Taorietal.,2023)adoptingauniqueapproachtotrainingwithsyntheticinstructions(Honovich'),\n", + " Document(id='2307.09288-317', metadata={'chunk-id': '317', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}, page_content='models will be released as we improve model safety with community feedback.\\nLicense A custom commercial license is available at: ai.meta.com/resources/\\nmodels-and-libraries/llama-downloads/\\nWhere to send commentsInstructions on how to provide feedback or comments on the model can be\\nfound in the model README, or by opening an issue in the GitHub repository\\n(https://github.com/facebookresearch/llama/ ).\\nIntended Use\\nIntended Use Cases L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle is intended for commercial and research use in English. Tuned models\\nare intended for assistant-like chat, whereas pretrained models can be adapted\\nfor a variety of natural language generation tasks.\\nOut-of-Scope Uses Use in any manner that violates applicable laws or regulations (including trade\\ncompliancelaws). UseinlanguagesotherthanEnglish. Useinanyotherway\\nthat is prohibited by the Acceptable Use Policy and Licensing Agreement for\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle.\\nHardware and Software (Section 2.2)\\nTraining Factors We usedcustomtraininglibraries, Meta’sResearchSuperCluster, andproductionclustersforpretraining. Fine-tuning,annotation,andevaluationwerealso'),\n", + " Document(id='2304.01196-37', metadata={'chunk-id': '37', 'id': '2304.01196', 'source': 'http://arxiv.org/pdf/2304.01196', 'title': 'Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data'}, page_content='Dubois, Xuechen Li, Carlos Guestrin, Percy Liang,\\nand Tatsunori B. Hashimoto. 2023. Stanford alpaca:\\nAn instruction-following llama model. https://\\ngithub.com/tatsu-lab/stanford_alpaca .\\nRomal Thoppilan, Daniel De Freitas, Jamie Hall, Noam\\nShazeer, Apoorv Kulshreshtha, Heng-Tze Cheng,\\nAlicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al.\\n2022. Lamda: Language models for dialog applications. arXiv preprint arXiv:2201.08239 .\\nHugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier\\nMartinet, Marie-Anne Lachaux, Timothée Lacroix,\\nBaptiste Rozière, Naman Goyal, Eric Hambro, Faisal\\nAzhar, Aurelien Rodriguez, Armand Joulin, Edouard\\nGrave, and Guillaume Lample. 2023. Llama: Open\\nand efficient foundation language models. arXiv\\npreprint arXiv:2302.13971 .'),\n", + " Document(id='2307.09288-1', metadata={'chunk-id': '1', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}, page_content='Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\\nSergey Edunov Thomas Scialom\\x03\\nGenAI, Meta\\nAbstract\\nIn this work, we develop and release Llama 2, a collection of pretrained and fine-tuned\\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\\nOur fine-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , are optimized for dialogue use cases. Our\\nmodels outperform open-source chat models on most benchmarks we tested, and based on\\nourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosedsource models. We provide a detailed description of our approach to fine-tuning and safety')]" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -559,34 +576,34 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "id": "jNnXYOtqypiz" }, "outputs": [], "source": [ - "from langchain.prompts import PromptTemplate\n", - "from langchain.chains import LLMChain\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables.base import RunnableSerializable\n", "\n", - "QA_PROMPT = PromptTemplate(\n", - " input_variables=[\"query\", \"contexts\"],\n", - " template=\"\"\"You are a helpful assistant who answers user queries using the\n", + "QA_PROMPT = ChatPromptTemplate.from_template(\n", + " \"\"\"You are a helpful assistant who answers user queries using the\n", " contexts provided. If the question cannot be answered using the information\n", " provided say \"I don't know\".\n", "\n", " Contexts:\n", " {contexts}\n", "\n", - " Question: {query}\"\"\",\n", + " Question: {query}\"\"\"\n", ")\n", "\n", - "# Chain\n", - "qa_chain = LLMChain(llm=llm, prompt=QA_PROMPT)" + "# Chain (the \"G\" in \"RAG\")\n", + "qa_chain: RunnableSerializable = QA_PROMPT | llm | StrOutputParser()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -599,22 +616,20 @@ { "data": { "text/plain": [ - "'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. The fine-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases. These models outperform open-source chat models on most benchmarks and are considered a suitable substitute for closed-source models based on humane evaluations for helpfulness and safety. The approach to fine-tuning and safety is described in detail.'" + "'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) developed and released by Meta. It includes models ranging in scale from 7 billion to 70 billion parameters. The fine-tuned models, referred to as Llama 2-C, are specifically optimized for dialogue use cases. \\n\\nIn terms of performance, Llama 2 models generally outperform existing open-source chat models on various benchmarks and may serve as suitable substitutes for some closed-source models based on human evaluations for helpfulness and safety. The development of Llama 2 involved significant efforts in fine-tuning and ensuring model safety, with the intention of making these models useful for both commercial and research purposes, primarily in English. \\n\\nThe models are intended for assistant-like chat applications, while the pretrained versions can be adapted for a variety of natural language generation tasks. However, there are restrictions on their use, including prohibitions against use in languages other than English and any manner that violates applicable laws or regulations.'" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = qa_chain(\n", - " inputs={\n", - " \"query\": question,\n", - " \"contexts\": \"\\n---\\n\".join([d.page_content for d in docs])\n", - " }\n", - ")\n", - "out[\"text\"]" + "out = qa_chain.invoke({\n", + " \"query\": question,\n", + " \"contexts\": \"\\n---\\n\".join([d.page_content for d in docs])\n", + "})\n", + "out" ] }, { @@ -624,7 +639,7 @@ "id": "KemgDCg8DkgE" }, "source": [ - "## Chaining Everything with a SequentialChain" + "## Chaining Everything with LCEL" ] }, { @@ -634,59 +649,32 @@ "id": "kTbLlWgEEII1" }, "source": [ - "We can pull together the logic above into a function or set of methods, whatever is prefered — however if we'd like to use LangChain's approach to this we must \"chain\" together multiple chains. The first retrieval component is (1) not a chain per se, and (2) requires processing of the output. To do that, and fit with LangChain's \"chaining chains\" approach, we setup the _retrieval_ component within a `TransformChain`:" + "We can pull together the logic above into a function or set of methods, whatever is preferred — however if we'd like to use LangChain's approach to this we must \"chain\" together multiple chains. The first retrieval component is (1) not a chain per se, and (2) requires processing of the output. To do that, and fit with LangChain's \"chaining chains\" approach, we setup the retrieval component using RunnableLambda and dict mapping:" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "id": "BpFmiRtYDpHp" }, "outputs": [], "source": [ - "from langchain.chains import TransformChain\n", + "from langchain_core.runnables import RunnableLambda\n", "\n", - "def retrieval_transform(inputs: dict) -> dict:\n", - " docs = retriever.get_relevant_documents(query=inputs[\"question\"])\n", - " docs = [d.page_content for d in docs]\n", - " docs_dict = {\n", - " \"query\": inputs[\"question\"],\n", - " \"contexts\": \"\\n---\\n\".join(docs)\n", + "# More explicit chain composition\n", + "rag_chain = (\n", + " # The \"RA\" in \"RAG\"\n", + " { \n", + " \"query\": lambda x: x[\"question\"],\n", + " \"contexts\": lambda x: \"\\n---\\n\".join([\n", + " d.page_content for d in retriever.get_relevant_documents(query=x[\"question\"])\n", + " ])\n", " }\n", - " return docs_dict\n", - "\n", - "retrieval_chain = TransformChain(\n", - " input_variables=[\"question\"],\n", - " output_variables=[\"query\", \"contexts\"],\n", - " transform=retrieval_transform\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "SoD45Au1Eg-r" - }, - "source": [ - "Now we chain this with our generation step using the `SequentialChain`:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "id": "azqCwDwXEkDT" - }, - "outputs": [], - "source": [ - "from langchain.chains import SequentialChain\n", - "\n", - "rag_chain = SequentialChain(\n", - " chains=[retrieval_chain, qa_chain],\n", - " input_variables=[\"question\"], # we need to name differently to output \"query\"\n", - " output_variables=[\"query\", \"contexts\", \"text\"]\n", + " # The \"G\" in \"RAG\"\n", + " | QA_PROMPT\n", + " | llm\n", + " | StrOutputParser()\n", ")" ] }, @@ -716,13 +704,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What information can you provide about llama 2?', '2. Could you give me some details about llama 2?', '3. I would like to learn more about llama 2. Can you help me with that?']\n" + "INFO:langchain.retrievers.multi_query:Generated queries: ['What can you tell me about Llama 2 and its features? ', 'Can you provide information on Llama 2 and its applications? ', 'What are the key details and specifications of Llama 2?']\n" ] }, { "data": { "text/plain": [ - "'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases. They have been shown to outperform open-source chat models on most benchmarks and are considered a suitable substitute for closed-source models based on humane evaluations for helpfulness and safety. The approach to fine-tuning and safety is described in detail in the work.'" + "\"Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) developed and released by Meta, ranging in scale from 7 billion to 70 billion parameters. The fine-tuned models, referred to as Llama 2-C, are optimized for dialogue use cases and have been shown to outperform many open-source chat models on various benchmarks. They may also serve as suitable substitutes for some closed-source models based on human evaluations for helpfulness and safety.\\n\\nThe models are intended for commercial and research use in English, particularly for assistant-like chat applications. They can also be adapted for a variety of natural language generation tasks. However, their use is restricted in ways that violate applicable laws or regulations, and they are not intended for use in languages other than English.\\n\\nLlama 2 was developed using custom training libraries and Meta’s Research SuperCluster, with a focus on computational efficiency during inference. The development process included significant fine-tuning to align the models with human preferences, which enhances their usability and safety. \\n\\nFor more information, including safety testing and responsible use guidelines, users are encouraged to refer to the model's README and the Responsible Use Guide available on Meta's website.\"" ] }, "execution_count": 20, @@ -731,8 +719,8 @@ } ], "source": [ - "out = rag_chain({\"question\": question})\n", - "out[\"text\"]" + "out = rag_chain.invoke({\"question\": question})\n", + "out " ] }, { @@ -765,27 +753,25 @@ "We'll try this with two prompts, both encourage more variety in search queries.\n", "\n", "**Prompt A**\n", - "```\n", - "Your task is to generate 3 different search queries that aim to\n", + "\n", + "> Your task is to generate 3 different search queries that aim to\n", "answer the user question from multiple perspectives.\n", "Each query MUST tackle the question from a different viewpoint,\n", "we want to get a variety of RELEVANT search results.\n", "Provide these alternative questions separated by newlines.\n", "Original question: {question}\n", - "```\n", "\n", "\n", "**Prompt B**\n", - "```\n", - "Your task is to generate 3 different search queries that aim to\n", + "\n", + "> Your task is to generate 3 different search queries that aim to\n", "answer the user question from multiple perspectives. The user questions\n", "are focused on Large Language Models, Machine Learning, and related\n", "disciplines.\n", "Each query MUST tackle the question from a different viewpoint, we\n", "want to get a variety of RELEVANT search results.\n", "Provide these alternative questions separated by newlines.\n", - "Original question: {question}\n", - "```" + "Original question: {question}" ] }, { @@ -796,29 +782,9 @@ }, "outputs": [], "source": [ - "from typing import List\n", - "from langchain.chains import LLMChain\n", - "from pydantic import BaseModel, Field\n", - "from langchain.prompts import PromptTemplate\n", - "from langchain.output_parsers import PydanticOutputParser\n", - "\n", - "\n", - "# Output parser will split the LLM result into a list of queries\n", - "class LineList(BaseModel):\n", - " # \"lines\" is the key (attribute name) of the parsed output\n", - " lines: List[str] = Field(description=\"Lines of text\")\n", - "\n", - "\n", - "class LineListOutputParser(PydanticOutputParser):\n", - " def __init__(self) -> None:\n", - " super().__init__(pydantic_object=LineList)\n", - "\n", - " def parse(self, text: str) -> LineList:\n", - " lines = text.strip().split(\"\\n\")\n", - " return LineList(lines=lines)\n", - "\n", - "\n", - "output_parser = LineListOutputParser()\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnableLambda\n", "\n", "template = \"\"\"\n", "Your task is to generate 3 different search queries that aim to\n", @@ -831,19 +797,29 @@ "Original question: {question}\n", "\"\"\"\n", "\n", - "QUERY_PROMPT = PromptTemplate(\n", - " input_variables=[\"question\"],\n", - " template=template,\n", - ")\n", - "llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)\n", + "QUERY_PROMPT = ChatPromptTemplate.from_template(template)\n", + "\n", + "def parse_lines(text: str) -> list:\n", + " \"\"\"Simple function to parse lines into list\"\"\"\n", + " lines = text.strip().split(\"\\n\")\n", + " # Filter out empty lines and strip whitespace\n", + " return [line.strip() for line in lines if line.strip()]\n", "\n", - "# Chain\n", - "llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)" + "llm_chain = QUERY_PROMPT | llm | StrOutputParser() | RunnableLambda(parse_lines)" ] }, { "cell_type": "code", "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"tell me about llama 2?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -856,16 +832,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the key features and capabilities of Large Language Model Llama 2?', '2. How does Llama 2 compare to other Large Language Models in terms of performance and efficiency?', '3. What are the applications and use cases of Llama 2 in the field of Machine Learning and Natural Language Processing?']\n" + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the key features and advancements of LLaMA 2 in the context of large language models?', '2. How does LLaMA 2 compare to other state-of-the-art language models in terms of performance and applications?', '3. What are the potential ethical implications and challenges associated with the deployment of LLaMA 2 in real-world scenarios?']\n" ] }, { "data": { "text/plain": [ - "7" + "9" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -873,8 +849,10 @@ "source": [ "# Run\n", "retriever = MultiQueryRetriever(\n", - " retriever=vectorstore.as_retriever(), llm_chain=llm_chain, parser_key=\"lines\"\n", - ") # \"lines\" is the key (attribute name) of the parsed output\n", + " retriever=vectorstore.as_retriever(), \n", + " llm_chain=llm_chain, \n", + " parser_key=\"lines\"\n", + ")\n", "\n", "# Results\n", "docs = retriever.get_relevant_documents(\n", @@ -885,7 +863,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -897,16 +875,18 @@ { "data": { "text/plain": [ - "[Document(page_content='Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\\nSergey Edunov Thomas Scialom\\x03\\nGenAI, Meta\\nAbstract\\nIn this work, we develop and release Llama 2, a collection of pretrained and fine-tuned\\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\\nOur fine-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , are optimized for dialogue use cases. Our\\nmodels outperform open-source chat models on most benchmarks we tested, and based on\\nourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosedsource models. We provide a detailed description of our approach to fine-tuning and safety', metadata={'chunk-id': '1', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}),\n", - " Document(page_content='2\\n3.4.3 Even programmatic measures of model capability can be highly subjective . . . . . . . 15\\n3.5 Even large language models are brittle . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15\\n3.6 Social bias in large language models . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17\\n3.7 Performance on non-English languages . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20\\n4 Behavior on selected tasks 21\\n4.1 Checkmate-in-one task . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22\\n4.2 Periodic elements task . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23\\n5 Additional related work 24\\n6 Discussion 25', metadata={'chunk-id': '14', 'id': '2206.04615', 'source': 'http://arxiv.org/pdf/2206.04615', 'title': 'Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models'}),\n", - " Document(page_content='challenges described above) about how the development of large language models has unfolded thus far, including a\\nquantitative analysis of the increasing gap between academia and industry for large model development.\\nFinally, in Section 4 we outline policy interventions that may help concretely address the challenges we outline in\\nSections 2 and 3 in order to help guide the development and deployment of larger models for the broader social good.\\nWe leave some illustrative experiments, technical details, and caveats about our claims in Appendix A.\\n2 DISTINGUISHING FEATURES OF LARGE GENERATIVE MODELS\\nWe claim that large generative models (e.g., GPT-3 [ 11], LaMDA [ 78], Gopher [ 62], etc.) are distinguished by four\\nfeatures:\\n•Smooth, general capability scaling : It is possible to predictably improve the general performance of generative\\nmodels — their loss on capturing a specific, though very broad, data distribution — by scaling up the size of the\\nmodels, the compute used to train them, and the amount of data they’re trained on in the correct proportions.\\nThese proportions can be accurately predicted by scaling laws (Figure 1). We believe that these scaling laws\\nde-risk investments in building larger and generally more capable models despite the high resource costs and the\\ndifficulty of predicting precisely how well a model will perform on a specific task. Note, the harmful properties', metadata={'chunk-id': '9', 'id': '2202.07785', 'source': 'http://arxiv.org/pdf/2202.07785', 'title': 'Predictability and Surprise in Large Generative Models'}),\n", - " Document(page_content='asChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle and\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nL/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see', metadata={'chunk-id': '9', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}),\n", - " Document(page_content='but BoolQ. Similarly, this model surpasses PaLM540B everywhere but on BoolQ and WinoGrande.\\nLLaMA-13B model also outperforms GPT-3 on\\nmost benchmarks despite being 10 \\x02smaller.\\n3.2 Closed-book Question Answering\\nWe compare LLaMA to existing large language\\nmodels on two closed-book question answering\\nbenchmarks: Natural Questions (Kwiatkowski\\net al., 2019) and TriviaQA (Joshi et al., 2017). For\\nboth benchmarks, we report exact match performance in a closed book setting, i.e., where the models do not have access to documents that contain\\nevidence to answer the question. In Table 4, we\\nreport performance on NaturalQuestions, and in Table 5, we report on TriviaQA. On both benchmarks,\\nLLaMA-65B achieve state-of-the-arts performance\\nin the zero-shot and few-shot settings. More importantly, the LLaMA-13B is also competitive on\\nthese benchmarks with GPT-3 and Chinchilla, despite being 5-10 \\x02smaller. This model runs on a\\nsingle V100 GPU during inference.\\n0-shot 1-shot 5-shot 64-shot\\nGopher 280B 43.5 - 57.0 57.2', metadata={'chunk-id': '17', 'id': '2302.13971', 'source': 'http://arxiv.org/pdf/2302.13971', 'title': 'LLaMA: Open and Efficient Foundation Language Models'}),\n", - " Document(page_content='5 Discussion 19\\n6 Conclusion 21\\n1 Introduction: motivation for the survey and definitions\\n1.1 Motivation\\nLarge Language Models (LLMs) ( Devlin et al. ,2019;Brown et al. ,2020;Chowdhery et al. ,2022) have fueled dramatic progress in Natural Language Processing (NLP ) and are already core in several products with\\nmillions of users, such as the coding assistant Copilot ( Chen et al. ,2021), Google search engine1or more recently ChatGPT2. Memorization ( Tirumala et al. ,2022) combined with compositionality ( Zhou et al. ,2022)\\ncapabilities made LLMs able to execute various tasks such as language understanding or conditional and unconditional text generation at an unprecedented level of pe rformance, thus opening a realistic path towards\\nhigher-bandwidth human-computer interactions.\\nHowever, LLMs suffer from important limitations hindering a broader deployment. LLMs often provide nonfactual but seemingly plausible predictions, often referr ed to as hallucinations ( Welleck et al. ,2020). This\\nleads to many avoidable mistakes, for example in the context of arithmetics ( Qian et al. ,2022) or within\\na reasoning chain ( Wei et al. ,2022c ). Moreover, many LLMs groundbreaking capabilities seem to emerge', metadata={'chunk-id': '5', 'id': '2302.07842', 'source': 'http://arxiv.org/pdf/2302.07842', 'title': 'Augmented Language Models: a Survey'}),\n", - " Document(page_content='practicable options for academic research since they were acquired by Appen, a company that is\\nfocused on a business market.\\nThis paper explores the potential of large language models (LLMs) for text annotation tasks, with a\\nfocus on ChatGPT, which was released in November 2022. It demonstrates that zero-shot ChatGPT\\nclassifications (that is, without any additional training) outperform MTurk annotations, at a fraction\\nof the cost. LLMs have been shown to perform very well for a wide range of purposes, including\\nideological scaling (Wu et al., 2023), the classification of legislative proposals (Nay, 2023), the\\nresolution of cognitive psychology tasks (Binz and Schulz, 2023), and the simulation of human\\nsamples for survey research (Argyle et al., 2023). While a few studies suggested that ChatGPT\\nmight perform text annotation tasks of the kinds we have described (Kuzman, Mozeti ˇc and Ljubeši ´c,\\n2023; Huang, Kwak and An, 2023), to the best of our knowledge our work is the first systematic\\nevaluation. Our analysis relies on a sample of 6,183 documents, including tweets and news articles', metadata={'chunk-id': '3', 'id': '2303.15056', 'source': 'http://arxiv.org/pdf/2303.15056', 'title': 'ChatGPT Outperforms Crowd-Workers for Text-Annotation Tasks'})]" + "[Document(id='2304.14178-6', metadata={'chunk-id': '6', 'id': '2304.14178', 'source': 'http://arxiv.org/pdf/2304.14178', 'title': 'mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality'}, page_content='2 Related Work\\n2.1 Large Language Models\\nIn recent times, Large Language Models (LLMs) have garnered increasing attention for their exceptional performance in diverse natural language processing (NLP) tasks. Initially, transformer\\nmodels such as BERT [Devlin et al., 2019], GPT [Radford and Narasimhan, 2018], and T5 [Raffel\\net al., 2020] were developed with different pre-training objectives. However, the emergence of GPT3 [Brown et al., 2020], which scales up the number of model parameters and data size, showcases\\nsignificant zero-shot generalization abilities, enabling them to perform commendably on previously\\nunseen tasks. Consequently, numerous LLMs such as OPT [Zhang et al., 2022], BLOOM [Scao\\net al., 2022], PaLM [Chowdhery et al., 2022], and LLaMA [Touvron et al., 2023] are created, ushering in the success of LLMs. Additionally, Ouyang et al. [Ouyang et al., 2022] propose InstructGPT\\nby aligning human instruction and feedback with GPT-3. Furthermore, it has been applied to Chat2\\nGPT [OpenAI, 2022], which facilitates conversational interaction with humans by responding to a\\nbroad range of diverse and intricate queries and instructions.'),\n", + " Document(id='2302.13971-0', metadata={'chunk-id': '0', 'id': '2302.13971', 'source': 'http://arxiv.org/pdf/2302.13971', 'title': 'LLaMA: Open and Efficient Foundation Language Models'}, page_content='LLaMA: Open and Efficient Foundation Language Models\\nHugo Touvron\\x03, Thibaut Lavril\\x03, Gautier Izacard\\x03, Xavier Martinet\\nMarie-Anne Lachaux, Timothee Lacroix, Baptiste Rozière, Naman Goyal\\nEric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin\\nEdouard Grave\\x03, Guillaume Lample\\x03\\nMeta AI\\nAbstract\\nWe introduce LLaMA, a collection of foundation language models ranging from 7B to 65B\\nparameters. We train our models on trillions\\nof tokens, and show that it is possible to train\\nstate-of-the-art models using publicly available datasets exclusively, without resorting\\nto proprietary and inaccessible datasets. In\\nparticular, LLaMA-13B outperforms GPT-3\\n(175B) on most benchmarks, and LLaMA65B is competitive with the best models,\\nChinchilla-70B and PaLM-540B. We release\\nall our models to the research community1.\\n1 Introduction\\nLarge Languages Models (LLMs) trained on massive corpora of texts have shown their ability to perform new tasks from textual instructions or from a\\nfew examples (Brown et al., 2020). These few-shot\\nproperties first appeared when scaling models to a'),\n", + " Document(id='2204.02311-168', metadata={'chunk-id': '168', 'id': '2204.02311', 'source': 'http://arxiv.org/pdf/2204.02311', 'title': 'PaLM: Scaling Language Modeling with Pathways'}, page_content='large LMs in general. Hence, more concerted e\\x0borts should be pursued to provide scalable solutions that can\\nput guardrails against such malicious uses.\\nDeploying PaLM-Coder to assist software development has additional complications and ethical considerations,\\nwhich we discuss in Section 6.4. It is an open problem both to ensure that LM-based suggestions are correct,\\nrobust, safe, and secure, and to ensure that developers are con\\x0cdent that the suggestions have these properties.\\n12 Related Work\\nNatural language capabilities have signi\\x0ccantly advanced through large scale language modeling over the\\nlast several years. Broadly, language modeling refers to approaches for predicting either the next token in\\na sequence or for predicting masked spans (Devlin et al., 2019; Ra\\x0bel et al., 2020). These self-supervised\\nobjectives when applied to vast corpora including data scraped from the internet, books, and forums, have\\nresulted in models with advanced language understanding and generation capabilities. Predictable power-laws\\nof model quality through scaling the amount of data, parameters, and computation have made this a reliable\\napproach for increasingly more capable models (Kaplan et al., 2020).\\nThe Transformer architecture (Vaswani et al., 2017) unleashed unparalleled e\\x0eciency on modern accelerators\\nand has become the de-facto approach for language models. In the span of only four years, the largest'),\n", + " Document(id='2302.13971-2', metadata={'chunk-id': '2', 'id': '2302.13971', 'source': 'http://arxiv.org/pdf/2302.13971', 'title': 'LLaMA: Open and Efficient Foundation Language Models'}, page_content='\\x03Equal contribution. Correspondence: {htouvron,\\nthibautlav,gizacard,egrave,glample}@meta.com\\n1https://github.com/facebookresearch/llamaperformance, a smaller one trained longer will\\nultimately be cheaper at inference. For instance,\\nalthough Hoffmann et al. (2022) recommends\\ntraining a 10B model on 200B tokens, we find\\nthat the performance of a 7B model continues to\\nimprove even after 1T tokens.\\nThe focus of this work is to train a series of\\nlanguage models that achieve the best possible performance at various inference budgets, by training\\non more tokens than what is typically used. The\\nresulting models, called LLaMA , ranges from 7B\\nto 65B parameters with competitive performance\\ncompared to the best existing LLMs. For instance,\\nLLaMA-13B outperforms GPT-3 on most benchmarks, despite being 10 \\x02smaller. We believe that\\nthis model will help democratize the access and\\nstudy of LLMs, since it can be run on a single GPU.\\nAt the higher-end of the scale, our 65B-parameter\\nmodel is also competitive with the best large language models such as Chinchilla or PaLM-540B.\\nUnlike Chinchilla, PaLM, or GPT-3, we only'),\n", + " Document(id='2302.13971-14', metadata={'chunk-id': '14', 'id': '2302.13971', 'source': 'http://arxiv.org/pdf/2302.13971', 'title': 'LLaMA: Open and Efficient Foundation Language Models'}, page_content='•Zero-shot. We provide a textual description\\nof the task and a test example. The model\\neither provides an answer using open-ended\\ngeneration, or ranks the proposed answers.\\n•Few-shot. We provide a few examples of the\\ntask (between 1 and 64) and a test example.\\nThe model takes this text as input and generates the answer or ranks different options.\\nWe compare LLaMA with other foundation models, namely the non-publicly available language\\nmodels GPT-3 (Brown et al., 2020), Gopher (Rae\\net al., 2021), Chinchilla (Hoffmann et al., 2022)\\nand PaLM (Chowdhery et al., 2022), as well as\\nthe open-sourced OPT models (Zhang et al., 2022),\\nGPT-J (Wang and Komatsuzaki, 2021), and GPTNeo (Black et al., 2022). In Section 4, we also\\nbriefly compare LLaMA with instruction-tuned\\nmodels such as OPT-IML (Iyer et al., 2022) and\\nFlan-PaLM (Chung et al., 2022).We evaluate LLaMA on free-form generation\\ntasks and multiple choice tasks. In the multiple\\nchoice tasks, the objective is to select the most'),\n", + " Document(id='2112.04359-132', metadata={'chunk-id': '132', 'id': '2112.04359', 'source': 'http://arxiv.org/pdf/2112.04359', 'title': 'Ethical and social risks of harm from Language Models'}, page_content='tasks. LM design decisions are related to this risk, as they influence what types of applications a LM lends itself\\nto. At the stage of scoping potential applications, it is worth asking whether a given technology is anticipated\\nto be net beneficial - or whether it may cause harm when performing with high accuracy, such as certain\\nkinds of surveillance tools, in which the application overall should be called into question (Benjamin, 2020).\\nResponsible publication norms and considerations of accessibility are also key, as they determine who can\\ndevelop LM use cases or applications (Solaiman et al., 2019). Regulatory interventions and obstructing access\\nto the LM for those who want to cause harm are further avenues to reduce these risks.\\nAccessibilityofdownstreamapplications Asnotedin2.1Discrimination,ExclusionandToxicity,especially\\non Lower performance by social group and 2.6 Automation, access, and environmental harms, the risk of LMs\\nexacerbating existing inequalities depends, in part, on what types of applications can be built on top of such\\nmodels. This, too, depends on design decisions. For example, choice of training data and model architecture\\ninfluence whether a LM performs better in some languages, and is thus more likely to economically benefit\\ngroups speaking these languages. It also depends on economic and technical access to the model for developers'),\n", + " Document(id='2210.02414-47', metadata={'chunk-id': '47', 'id': '2210.02414', 'source': 'http://arxiv.org/pdf/2210.02414', 'title': 'GLM-130B: An Open Bilingual Pre-trained Model'}, page_content='governments, would never do harms with LLMs. Without access to LLMs, we cannot even realize\\nthe potential role of LLMs in harms.\\nThus, an open LLM can provide access and transparency to all researchers, and facilitate the research\\ndevelopments of reducing the potential harms of LLMs, such as algorithms to identify the synthetic\\ntext Gehrmann et al. (2019). In addition, it is known that LLMs can suffer from problems in fairness,\\nbias, privacy, and truthfulness Zhang et al. (2021); Lin et al. (2022); Liang et al. (2021); Bender\\net al. (2021). Thus, instead of providing APIs to black-box models, an open LLM can help reveal\\nthe model parameters and internal states corresponding to specific inputs. In conclusion, an open\\nLLM empowers us to conduct studies on LLMs’ flaws in depth and to improve LLMs in terms of\\nethical concerns.\\nEthical Evaluation and Improvements. We evaluate GLM-130B on a wide range of ethical evaluation benchmarks, including bias measurement (Nadeem et al., 2021; Nangia et al., 2020), hate speech'),\n", + " Document(id='2112.04359-140', metadata={'chunk-id': '140', 'id': '2112.04359', 'source': 'http://arxiv.org/pdf/2112.04359', 'title': 'Ethical and social risks of harm from Language Models'}, page_content='understanding of ethical and social risks associated with LMs.\\n4.1. Risk assessment frameworks and tools\\nAnalysing and evaluating a LM regarding the above risks of harm requires innovation in risk assessment tools,\\nbenchmarks and frameworks (Raji et al., 2020; Tamkin et al., 2021). Many risks identified in this report are\\nnot typically analysed in LMs. Benchmarks or risk assessment frameworks exist only in some of the reviewed\\ndomains. Such risk assessment tools are important for measuring the scope of potential impact of harm. They\\nare also critical for evaluating the success of mitigations: have they truly reduced the likelihood or severity of a\\ngiven risk? Assessing ethical and social risks from LMs requires more research on operationalising ethical and\\nsocial harms into measurement or assessment frameworks. Developing robust benchmarks is complex (Welbl\\net al., 2021) and may work best when complemented by other experimental or qualitative evaluation tools.\\nExpandingthemethodologicaltoolkitforLManalysisandevaluation Riskassessmentrequiresexpanding\\nbeyond the methodologies traditionally used to evaluate LMs, LAs and LTs. For example, research on humancomputer-interaction working with powerful conversational agents (CAs) is sparse, partly due to limited\\naccessibility of such agents to HCI researchers. As discussed in 2.5 Human-Computer Interaction Harms,'),\n", + " Document(id='2307.09288-146', metadata={'chunk-id': '146', 'id': '2307.09288', 'source': 'http://arxiv.org/pdf/2307.09288', 'title': 'Llama 2: Open Foundation and Fine-Tuned Chat Models'}, page_content='a taxonomic framework to tackle these issues, and Bergman et al. (2022) delves into the balance between\\npotential positive and negative impacts from releasing dialogue models.\\nInvestigationsintoredteamingrevealspecificchallengesintunedLLMs,withstudiesbyGangulietal.(2022)\\nand Zhuoet al. (2023) showcasing a variety ofsuccessful attack typesand their effects onthe generation of\\nharmful content. National security agencies and various researchers, such as (Mialon et al., 2023), have also\\nraisedredflagsaroundadvancedemergentmodelbehaviors,cyberthreats,andpotentialmisuseinareaslike\\nbiological warfare. Lastly, broader societal issues like job displacement due to accelerated AI research and an\\nover-reliance on LLMs leading to training data degradation are also pertinent considerations (Acemoglu\\nandRestrepo,2018;AutorandSalomons,2018;Webb,2019;Shumailovetal.,2023). Wearecommittedto\\ncontinuing our work engaging with the broader policy, academic, and industry community on these issues.\\n7 Conclusion')]" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -922,28 +902,7 @@ "id": "F4q65OEiizU2" }, "source": [ - "Putting this together in another `SequentialChain`:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "id": "LTRjTIKzi2-g" - }, - "outputs": [], - "source": [ - "retrieval_chain = TransformChain(\n", - " input_variables=[\"question\"],\n", - " output_variables=[\"query\", \"contexts\"],\n", - " transform=retrieval_transform\n", - ")\n", - "\n", - "rag_chain = SequentialChain(\n", - " chains=[retrieval_chain, qa_chain],\n", - " input_variables=[\"question\"], # we need to name differently to output \"query\"\n", - " output_variables=[\"query\", \"contexts\", \"text\"]\n", - ")" + "Using the same RAG chain:" ] }, { @@ -959,6 +918,25 @@ { "cell_type": "code", "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "rag_chain = (\n", + " {\n", + " \"query\": lambda x: x[\"question\"],\n", + " \"contexts\": lambda x: \"\\n---\\n\".join([\n", + " d.page_content for d in retriever.get_relevant_documents(query=x[\"question\"])\n", + " ])\n", + " }\n", + " | QA_PROMPT\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, "metadata": { "id": "9UcBY71cjGgX" }, @@ -967,23 +945,106 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the key features and capabilities of Large Language Model Llama 2?', '2. How does Llama 2 compare to other Large Language Models in terms of performance and efficiency?', '3. What are the applications and use cases of Llama 2 in the field of Machine Learning and Natural Language Processing?']\n" + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the key features and advancements of LLaMA 2 in the context of large language models?', '2. How does LLaMA 2 compare to other state-of-the-art language models in terms of performance and applications?', '3. What are the potential ethical implications and challenges associated with the deployment of LLaMA 2 in real-world scenarios?']\n" + ] + }, + { + "data": { + "text/plain": [ + "\"I don't know.\"" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = rag_chain.invoke({\"question\": question})\n", + "out " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uh-oh, what happened? Why is it saying \"I don't know?\"? \n", + "\n", + "Well, if we were to dig into the contexts retrieved, we find that while some very tangentially relevant data was retrieved, the focus shifted too far away from Llama 2, and into other things mentioned in the papers.\n", + "\n", + "This is an important cautionary tale:\n", + "\n", + "> If you allow the LLM to be too creative and to follow lines of questioning that are to broad, you might lose the core of the subject you wish to find out about, as irrelevant or tangentially relevant info will be retrieved from the vector store.\n", + "\n", + "To fix this we can remind the LLM to focus in the prompt used for query generation. See the reminder `***while staying relevant to the original question***` in the template below." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['What are the key features and improvements of Llama 2 compared to its predecessor?', 'How is Llama 2 being utilized in various industries or applications?', 'What are the potential ethical considerations and challenges associated with the use of Llama 2 in AI development?']\n" ] }, { "data": { "text/plain": [ - "'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These models, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases and have been shown to outperform open-source chat models on most benchmarks. They are considered as a suitable substitute for closed-source models in terms of helpfulness and safety. The development of Llama 2 addresses challenges such as programmatic measures of model capability, brittleness of large language models, social bias, and performance on non-English languages.'" + "'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) developed and released by Meta. It includes models ranging in scale from 7 billion to 70 billion parameters. The fine-tuned models, referred to as Llama 2-C, are optimized for dialogue use cases and have been shown to outperform many open-source chat models on various benchmarks. They may also serve as suitable substitutes for some closed-source models based on human evaluations for helpfulness and safety.\\n\\nLlama 2 is intended for commercial and research use in English, particularly for assistant-like chat applications. The pretrained models can be adapted for a variety of natural language generation tasks. However, the use of Llama 2 is restricted in ways that violate applicable laws or regulations, in languages other than English, or in any manner prohibited by the Acceptable Use Policy and Licensing Agreement.\\n\\nThe development of Llama 2 involved custom training libraries and Meta’s Research SuperCluster, and it emphasizes safety and alignment with human preferences, although it acknowledges that current alignment techniques are not perfect and can exacerbate certain biases. The models are released with the understanding that they may exhibit unsafe behavior and are susceptible to prompt injection attacks, highlighting the importance of careful consideration of their limitations in research and application.'" ] }, - "execution_count": 25, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "out = rag_chain({\"question\": question})\n", - "out[\"text\"]" + "\n", + "template = \"\"\"\n", + "Your task is to generate 3 different search queries that aim to\n", + "answer the user question from multiple perspectives, ***while staying relevant to the original question***.\n", + "Each query MUST tackle the question from a different viewpoint, we\n", + "want to get a variety of RELEVANT search results.\n", + "Provide these alternative questions separated by newlines.\n", + "Original question: {question}\n", + "\"\"\"\n", + "\n", + "QUERY_PROMPT = ChatPromptTemplate.from_template(template)\n", + "\n", + "def parse_lines(text: str) -> list:\n", + " \"\"\"Simple function to parse lines into list\"\"\"\n", + " lines = text.strip().split(\"\\n\")\n", + " # Filter out empty lines and strip whitespace\n", + " return [line.strip() for line in lines if line.strip()]\n", + "\n", + "# Simplified chain\n", + "llm_chain = QUERY_PROMPT | llm | StrOutputParser() | RunnableLambda(parse_lines)\n", + "\n", + "# Run\n", + "retriever = MultiQueryRetriever(\n", + " retriever=vectorstore.as_retriever(), \n", + " llm_chain=llm_chain, \n", + " parser_key=\"lines\"\n", + ")\n", + "\n", + "rag_chain = (\n", + " {\n", + " \"query\": lambda x: x[\"question\"],\n", + " \"contexts\": lambda x: \"\\n---\\n\".join([\n", + " d.page_content for d in retriever.get_relevant_documents(query=x[\"question\"])\n", + " ])\n", + " }\n", + " | QA_PROMPT\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "out = rag_chain.invoke({\"question\": question})\n", + "out " ] }, { @@ -993,12 +1054,14 @@ "id": "8jULksgk7gLA" }, "source": [ + "Excellent! We now have focussed and relevant information retrieved from the vectorstore, and a good answer to the question!\n", + "\n", "After finishing, delete your Pinecone index to save resources:" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -1019,7 +1082,8 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "pinecone1", + "language": "python", "name": "python3" }, "language_info": { @@ -1032,7 +1096,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.11.4" } }, "nbformat": 4,