|
1 | | -# from langchain_community.tools import DuckDuckGoSearchResults |
2 | | -# import asyncio |
3 | | -# async def get_search_results(question): |
4 | | -# search = DuckDuckGoSearchResults() |
5 | | -# results=search.run(question) |
6 | | -# return results |
7 | | - |
8 | | -# async def duckduck_search(question): |
9 | | -# ddgs_results = await get_search_results(question) |
10 | | -# return ddgs_results |
11 | | - |
12 | | - |
13 | | -# from duckduckgo_search import ddg |
14 | | -# r = ddg("the latest cnn news", max_results=5) |
15 | | -# for page in r: |
16 | | -# print(page) |
17 | | -# question='the latest cnn news' |
18 | | -# search_result=asyncio.run(duckduck_search(question)) |
19 | | -# print(search_result) |
| 1 | +import random |
| 2 | +import time |
20 | 3 | from duckduckgo_search import DDGS |
| 4 | +from langchain_community.document_loaders import WebBaseLoader |
| 5 | +import re |
| 6 | +from langchain_groq import ChatGroq |
| 7 | +import requests |
| 8 | +import streamlit as st |
| 9 | +from langchain_core.messages import AIMessage, HumanMessage |
| 10 | +from langchain_core.prompts import ChatPromptTemplate |
| 11 | +from langchain_core.output_parsers import StrOutputParser |
| 12 | +from langchain_groq import ChatGroq |
| 13 | +from langchain_core.prompts import PromptTemplate |
| 14 | +from langchain_google_genai import ChatGoogleGenerativeAI |
| 15 | +from langchain_cohere import ChatCohere |
| 16 | +from langchain_community.chat_models import QianfanChatEndpoint |
| 17 | +from langchain_community.tools import DuckDuckGoSearchResults |
| 18 | +from dotenv import find_dotenv, load_dotenv |
| 19 | + |
| 20 | +def format_text(text): |
| 21 | + # 用正则表达式将连续多个制表符替换为一个制表符 |
| 22 | + text = re.sub(r'\t+', '\t', text) |
| 23 | + # 用正则表达式将连续多个空格替换为一个空格 |
| 24 | + text = re.sub(r' +', ' ', text) |
| 25 | + # 用正则表达式将多个换行符和空白字符的组合替换为一个换行符 |
| 26 | + text = re.sub(r'\n\s*\n+', '\n', text) |
| 27 | + # 用正则表达式将单个换行符和空白字符的组合替换为一个换行符 |
| 28 | + text = re.sub(r'\n\s+', '\n', text) |
| 29 | + return text |
| 30 | + |
| 31 | + |
| 32 | +def duckduck_search(question): |
| 33 | + headers = { |
| 34 | + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', |
| 35 | + } |
| 36 | + search = DuckDuckGoSearchResults() |
| 37 | + results=search.run(question) |
| 38 | + time.sleep(2) |
| 39 | + content=[] |
| 40 | + content.append(results) |
| 41 | + links = re.findall(r'link: (https?://[^\s\]]+)', results) |
| 42 | + count=0 |
| 43 | + for url in links: |
| 44 | + response = requests.get(url,headers=headers) |
| 45 | + if response.status_code == 200: |
| 46 | + loader = WebBaseLoader(url) |
| 47 | + docs = loader.load() |
| 48 | + for doc in docs: |
| 49 | + page_text=format_text(doc.page_content) |
| 50 | + page_text=page_text[:2000] |
| 51 | + content.append(page_text) |
| 52 | + count+=1 |
| 53 | + if count>=3: |
| 54 | + break |
| 55 | + return content |
| 56 | + |
| 57 | + |
| 58 | + |
| 59 | + |
| 60 | + |
| 61 | +def judge_search(question,chat_history,llm): |
| 62 | + prompt_tempate=""" |
| 63 | + Give you a question, you need to judge whether you need real-time information to answer.\n |
| 64 | + If you think you need more real-time information (It may include today,weather,location,new conceptsnames,non-existent concept etc.) to answer the question better, |
| 65 | + you need to output "[search]" with a standalone query. The query can be understood without the Chat History.\n |
| 66 | + If you can answer the question using the chat history without needing real-time information, just output your answer.\n |
| 67 | + Do not explain your decision process. |
| 68 | + Output format: "[search]: your query" or your answer. |
| 69 | + User Question: {question}. |
| 70 | + Chat History: {chat_history}. |
| 71 | + """ |
| 72 | + prompt = PromptTemplate.from_template(prompt_tempate) |
| 73 | + chain = prompt | llm | StrOutputParser() |
| 74 | + response = chain.invoke({ |
| 75 | + "chat_history": chat_history, |
| 76 | + "question": question, |
| 77 | + }) |
| 78 | + return response |
| 79 | + |
| 80 | + |
| 81 | + |
| 82 | +def generate_based_history_query(question,chat_history,llm): |
| 83 | + based_history_prompt=""" |
| 84 | + Use the following latest User Question to formulate a standalone query. |
| 85 | + The query can be understood without the Chat History. |
| 86 | + The output should just be the sentence sutiable for query. |
| 87 | + If you feel confused, just output the latest User Question. |
| 88 | + Do not provide any answer. |
| 89 | + User Question: '''{question}''' |
| 90 | + Chat History: '''{chat_history}''' |
| 91 | + query: |
| 92 | + """ |
| 93 | + rag_chain = PromptTemplate.from_template(based_history_prompt) | llm | StrOutputParser() |
| 94 | + result=rag_chain.invoke( |
| 95 | + { |
| 96 | + "chat_history":chat_history , |
| 97 | + "question": question |
| 98 | + } |
| 99 | + ) |
| 100 | + return result |
| 101 | + |
| 102 | +def chat_response(question,chat_history,llm,content): |
| 103 | + try: |
| 104 | + # chatBot_template_prompt=""" |
| 105 | + # You are a helpful assistant. Answer all questions to the best of your ability. |
| 106 | + # You can also use Chat History to help you understand User Questions. |
| 107 | + # You should use the Search Context to help you answer the User Questions. |
| 108 | + # If your cognition conflicts with the content in Search Context, please give priority to the content in Search Context. |
| 109 | + |
| 110 | + # If the User Questions are asked in Chinese, then your answers must also be in Chinese. |
| 111 | + # User Questions: {question}. |
| 112 | + # Chat History:{chat_history}. |
| 113 | + # Search Context:{content}. |
| 114 | + # """ |
| 115 | + chatBot_template_prompt=""" |
| 116 | + You are a chat assistant. Please answer User Questions to the best of your ability. |
| 117 | + If the User Questions are asked in Chinese, then your answers must also be in Chinese. |
| 118 | + You can use the context of the Chat History to help you understand the user's question. |
| 119 | + If your understanding conflicts with the Search Context, please use the Search Context first to answer the question. |
| 120 | + If you think the Search Context is not helpful, please answer based on your understanding. |
| 121 | + If necessary, please output useful links from the Search Context at the end. |
| 122 | + User Questions: {question}. |
| 123 | + Chat History:{chat_history}. |
| 124 | + Search Context:{content}. |
| 125 | + """ |
| 126 | + |
| 127 | + prompt = ChatPromptTemplate.from_template(chatBot_template_prompt) |
| 128 | + chain = prompt | llm | StrOutputParser() |
| 129 | + result=chain.invoke({ |
| 130 | + "chat_history": chat_history, |
| 131 | + "question": question, |
| 132 | + "content": content |
| 133 | + }) |
| 134 | + return result |
| 135 | + # return chain.stream({ |
| 136 | + # "chat_history": chat_history, |
| 137 | + # "question": question, |
| 138 | + # "content": content |
| 139 | + # }) |
| 140 | + except Exception as e: |
| 141 | + return f"当前模型暂不可用,请稍后尝试。" |
| 142 | + |
| 143 | + |
| 144 | +if __name__ == "__main__": |
| 145 | + question=input("请输入问题:") |
| 146 | + load_dotenv(find_dotenv()) |
| 147 | + chat_history=[] |
| 148 | + while True: |
| 149 | + llm = QianfanChatEndpoint(model='ERNIE-Lite-8K') |
| 150 | + judge_result=judge_search(question,chat_history,llm) |
| 151 | + if '[search]' in judge_result: |
| 152 | + query=judge_result.split(":")[1] |
| 153 | + content=duckduck_search(query) |
| 154 | + llm=ChatGroq(model_name='mixtral-8x7b-32768',temperature=0.1) |
| 155 | + response=chat_response(question,chat_history,llm,content) |
| 156 | + else: |
| 157 | + response=judge_result |
| 158 | + chat_history.extend([HumanMessage(content=question), response]) |
| 159 | + question=input("请输入问题:") |
| 160 | + |
| 161 | + |
21 | 162 |
|
22 | | -# results = DDGS().text("德国时间", max_results=5) |
23 | | -# print(results) |
24 | | -results = DDGS().text("NBA比赛") |
25 | | -print(results) |
|
0 commit comments