Boomm-shakalaka
diff --git a/‎README.md‎
Lines changed: 18 additions & 8 deletions b/‎README.md‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎__pycache__/url_crawler.cpython-39.pyc‎
-100 Bytes b/‎__pycache__/url_crawler.cpython-39.pyc‎
-100 Bytes
diff --git a/‎config_setting/__pycache__/model_config.cpython-39.pyc‎
152 Bytes b/‎config_setting/__pycache__/model_config.cpython-39.pyc‎
152 Bytes
diff --git a/‎config_setting/__pycache__/prompt_config.cpython-39.pyc‎
2.64 KB b/‎config_setting/__pycache__/prompt_config.cpython-39.pyc‎
2.64 KB
diff --git a/‎config_setting/model_config.py‎
Lines changed: 8 additions & 0 deletions b/‎config_setting/model_config.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎config_setting/prompt_config.py‎
Lines changed: 26 additions & 1 deletion b/‎config_setting/prompt_config.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 2 additions & 5 deletions b/‎requirements.txt‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎test/dockdockgo_test.py‎
Lines changed: 160 additions & 23 deletions b/‎test/dockdockgo_test.py‎
Lines changed: 160 additions & 23 deletions
diff --git a/‎test/duck_search.py‎
Lines changed: 10 additions & 0 deletions b/‎test/duck_search.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎test/llm_test.py‎
Lines changed: 23 additions & 0 deletions b/‎test/llm_test.py‎
Lines changed: 23 additions & 0 deletions
@@ -1,8 +1,8 @@
 ### 基于LLM大模型的AI机器人
-一套基于开源框架、平台的AI语言模型机器人，集成人机对话，信息检索生成，PDF和URL解析对话等功能。
+一套基于开源框架、平台的AI语言模型机器人，集成人机对话，信息检索生成，PDF和URL解析对话等功能。全部采用免费开源API，以最低成本实现LLM定制化功能。
 
 ## 工具和平台
-Langchain, Streamlit, Oracle Cloud, Groq, Docker
+Langchain, Streamlit, Oracle Cloud, Groq, Docker, Baidu Cloud
 
 ## 文件结构描述
 <pre>
@@ -18,15 +18,15 @@ Langchain, Streamlit, Oracle Cloud, Groq, Docker
 ├── README.md
 ├── .gitgnore
 ├── config_setting/
-│   ├── model_config.py
+│   ├── model_config.py  #all models
 │   └── prompt_config.py
 ├── about_page.py
 ├── chat_page.py
 ├── Dockerfile
 ├── pdf_page.py
 ├── requirements.txt
 ├── summary_page.py
-├── url_page.py
+├── url_page.py   #ui
 </pre>
 
 ## 使用教程
@@ -42,22 +42,34 @@ Langchain, Streamlit, Oracle Cloud, Groq, Docker
     |----------------|-------------------------------------------------|
     | Groq API KEY   | [Groq网页](https://console.groq.com/playground) |
     | COHERE API KEY | [COHERE网页](https://dashboard.cohere.com/)     |
+    | Gemini API KEY | [谷歌云网页](https://ai.google.dev/) |
+    | BaiduQianfan API KEY | [百度智能云网页](https://cloud.baidu.com/) |
 
 3. 项目根目录建立.env
     ```bash
-    GROQ_API_KEY=<Groq-API-KEY>
+    GROQ_API_KEY= <Groq-API-KEY>
     COHERE_API_KEY= <COHERE-API-KEY>
+    GOOGLE_API_KEY= <GOOGLE-API-KEY>
+    QIANFAN_AK= <QIANFAN-AK>
+    QIANFAN_SK= <QIANFAN-SK>
     ```
 4. 运行
     ```bash
-    streamlit run chat_page.py
+    streamlit run web_ui.py
     ```
 ### 服务器部署
 [wiki链接](https://github.com/Boomm-shakalaka/AIBot-LLM/wiki/Oracle%E6%9C%8D%E5%8A%A1%E5%99%A8%E6%90%AD%E5%BB%BA%E6%95%99%E7%A8%8B)
 
 
 
 ## 版本更新
+v0.0.5
+1. 新增百度千帆大模型(ERNIE-Lite-8K和ERNIE-Speed-128K免费开放)
+2. 新增gemini模型(gemini模型不支持streaming输出，暂未开放)
+3. 新增online chat功能，使用duckduck-search进行在线搜索
+4. 优化在线搜索调用方式
+5. 实现pdf chat功能中的简历评估功能
+
 v0.0.4.1
 1. 新增selenium爬虫，优化网页解析能力
 2. 优化urlbot架构
@@ -92,8 +104,6 @@ v0.0.2
 4. 新增URLBot，可以根据URL进行检索
 5. 优化URL解析动画
 
-
-
 v0.0.1
 1. 构建Streamlit网页基本框架
 2. 新增chatBot页面，编辑聊天窗口及侧边栏
 
@@ -0,0 +1,8 @@
+model_ls = {
+    "百度千帆大模型": {"name": "ERNIE-Lite-8K", "tokens": 8192, "developer": "Baidu"},
+    "谷歌Gemma大模型": {"name": "gemma-7b-it", "tokens": 8192, "developer": "Google"},
+    "谷歌gemini大模型":{"name": "gemini-1.5-flash-latest", "tokens": 8192, "developer": "Google"},
+    "Llama3-70b大模型": {"name": "llama3-70b-8192", "tokens": 8192, "developer": "Meta"},
+    "Llama3-8b大模型": {"name": "llama3-8b-8192", "tokens": 8192, "developer": "Meta"},
+    "Mixtral大模型": {"name": "mixtral-8x7b-32768", "tokens": 32768, "developer": "Mistral"},
+}
@@ -61,4 +61,29 @@
                         question: {question}.
                         chat_history:{chat_history}.
                         search_result:{search_result}.
-                        """
+                        """
+
+
+resume_summary_prompt="""
+                You are a human resources professional and you need to comment on the content provided in your resume. Your review criteria should adhere to the STAR principle, and your review framework is as follows:
+                [Overall evaluation]
+                You need to evaluate your entire resume. You need to look at the qualifications and relevant experience involved in the resume to determine whether there is a clear intention to work, and give your advice.
+                [score]
+                Give a scale of 0-100. The higher the score, the better the resume.
+                [Personal Information]
+                You need to list the personal information you include in your resume, such as name, email address, phone number, linkedin, etc. You need to determine if the information is complete
+                [Educational background]
+                You'll need to check with HR to see if the resume includes your full educational background. Whether there is a clear school name, major name, start time and graduation time, and optional school location. In some cases a description of the relevant profession may be included.
+                [Work experience]
+                You need to determine if the resume includes a job description. It involves working hours, position, company, location. You need to determine whether the content description of each work experience is clear and complete.
+                [Internship experience]
+                You need to determine if the resume includes a description of your internship experience. It involves working hours, position, company, location. You need to determine whether the description of each work experience is clear and complete.
+                [Project research experience]
+                You need to determine if the resume includes a description of the research experience of the project. Include working hours, job title, company or project name, and geographic location. You need to determine whether the description of each work experience is clear and complete.
+                [Social activity experience]
+                You need to decide whether to include some social or campus activities. Involve activity time, position, activity name, geographical location, etc. You need to determine whether the description of each work experience is clear and complete. This part is optional.
+                [Optimization and modification suggestions]
+                Give specific suggestions for changes.\n
+                If the resume content is chinese, you should also give your comments in chinese.
+                Resume content: {resume_content}
+                """
@@ -8,8 +8,5 @@ BeautifulSoup4
 langchain_cohere
 chromadb
 duckduckgo-search
-selenium
-# webdriver-manager
-playwright
-# playwright install
-lxml
+langchain-google-genai
+pdfminer.six
@@ -1,25 +1,162 @@
-# from langchain_community.tools import DuckDuckGoSearchResults
-# import asyncio
-# async def get_search_results(question):
-#     search = DuckDuckGoSearchResults()
-#     results=search.run(question)
-#     return results
-
-# async def duckduck_search(question):
-#     ddgs_results = await get_search_results(question)
-#     return ddgs_results
-
-
-# from duckduckgo_search import ddg  
-# r = ddg("the latest cnn news", max_results=5)  
-# for page in r:  
-#     print(page)  
-# question='the latest cnn news'
-# search_result=asyncio.run(duckduck_search(question))
-# print(search_result)
+import random
+import time
 from duckduckgo_search import DDGS
+from langchain_community.document_loaders import WebBaseLoader
+import re
+from langchain_groq import ChatGroq
+import requests
+import streamlit as st
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_groq import ChatGroq
+from langchain_core.prompts import PromptTemplate
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_cohere import ChatCohere
+from langchain_community.chat_models import QianfanChatEndpoint
+from langchain_community.tools import DuckDuckGoSearchResults
+from dotenv import find_dotenv, load_dotenv
+
+def format_text(text):
+    # 用正则表达式将连续多个制表符替换为一个制表符
+    text = re.sub(r'\t+', '\t', text)
+    # 用正则表达式将连续多个空格替换为一个空格
+    text = re.sub(r' +', ' ', text)
+    # 用正则表达式将多个换行符和空白字符的组合替换为一个换行符
+    text = re.sub(r'\n\s*\n+', '\n', text)
+    # 用正则表达式将单个换行符和空白字符的组合替换为一个换行符
+    text = re.sub(r'\n\s+', '\n', text)
+    return text
+
+    
+def duckduck_search(question):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36',
+    }
+    search = DuckDuckGoSearchResults()
+    results=search.run(question)
+    time.sleep(2)
+    content=[]
+    content.append(results)
+    links = re.findall(r'link: (https?://[^\s\]]+)', results)
+    count=0
+    for url in links:
+        response = requests.get(url,headers=headers)
+        if response.status_code == 200:
+            loader = WebBaseLoader(url)
+            docs = loader.load()
+            for doc in docs:
+                page_text=format_text(doc.page_content)
+                page_text=page_text[:2000]
+                content.append(page_text)
+            count+=1
+            if count>=3:
+                break
+    return content
+        
+
+
+
+
+def judge_search(question,chat_history,llm):
+    prompt_tempate="""
+                    Give you a question, you need to judge whether you need real-time information to answer.\n
+                    If you think you need more real-time information (It may include today,weather,location,new conceptsnames,non-existent concept etc.) to answer the question better, 
+                    you need to output "[search]" with a standalone query. The query can be understood without the Chat History.\n
+                    If you can answer the question using the chat history without needing real-time information, just output your answer.\n
+                    Do not explain your decision process. 
+                    Output format: "[search]: your query" or your answer.
+                    User Question: {question}.
+                    Chat History: {chat_history}.
+                    """
+    prompt = PromptTemplate.from_template(prompt_tempate)
+    chain = prompt | llm | StrOutputParser()
+    response = chain.invoke({
+        "chat_history": chat_history,
+        "question": question,
+    })
+    return response
+
+
+
+def generate_based_history_query(question,chat_history,llm):
+        based_history_prompt="""
+                Use the following latest User Question to formulate a standalone query.
+                The query can be understood without the Chat History.
+                The output should just be the sentence sutiable for query. 
+                If you feel confused, just output the latest User Question.
+                Do not provide any answer.
+                User Question: '''{question}'''
+                Chat History: '''{chat_history}'''
+                query:
+        """
+        rag_chain = PromptTemplate.from_template(based_history_prompt) | llm | StrOutputParser()
+        result=rag_chain.invoke(
+            {
+                "chat_history":chat_history , 
+                "question": question
+            }
+        )
+        return result
+    
+def chat_response(question,chat_history,llm,content):
+    try:
+        # chatBot_template_prompt="""
+        #                 You are a helpful assistant. Answer all questions to the best of your ability.
+        #                 You can also use Chat History to help you understand User Questions.
+        #                 You should use the Search Context to help you answer the User Questions.
+        #                 If your cognition conflicts with the content in Search Context, please give priority to the content in Search Context.
+
+        #                 If the User Questions are asked in Chinese, then your answers must also be in Chinese.
+        #                 User Questions: {question}.
+        #                 Chat History:{chat_history}.
+        #                 Search Context:{content}.
+        #                 """
+        chatBot_template_prompt="""
+                                    You are a chat assistant. Please answer User Questions to the best of your ability.
+                                    If the User Questions are asked in Chinese, then your answers must also be in Chinese.
+                                    You can use the context of the Chat History to help you understand the user's question.
+                                    If your understanding conflicts with the Search Context, please use the Search Context first to answer the question.
+                                    If you think the Search Context is not helpful, please answer based on your understanding.
+                                    If necessary, please output useful links from the Search Context at the end.
+                                    User Questions: {question}.
+                                    Chat History:{chat_history}.
+                                    Search Context:{content}.
+                                    """
+        
+        prompt = ChatPromptTemplate.from_template(chatBot_template_prompt)
+        chain = prompt | llm | StrOutputParser()
+        result=chain.invoke({
+            "chat_history": chat_history,
+            "question": question,
+            "content": content
+        })
+        return result
+        # return chain.stream({
+        #     "chat_history": chat_history,
+        #     "question": question,
+        #     "content": content
+        # })
+    except Exception as e:
+        return f"当前模型暂不可用，请稍后尝试。"
+
+
+if __name__ == "__main__":
+    question=input("请输入问题：")
+    load_dotenv(find_dotenv())
+    chat_history=[]
+    while True:
+        llm = QianfanChatEndpoint(model='ERNIE-Lite-8K')
+        judge_result=judge_search(question,chat_history,llm)
+        if '[search]' in judge_result:
+            query=judge_result.split(":")[1]
+            content=duckduck_search(query)
+            llm=ChatGroq(model_name='mixtral-8x7b-32768',temperature=0.1)
+            response=chat_response(question,chat_history,llm,content)
+        else:
+            response=judge_result
+        chat_history.extend([HumanMessage(content=question), response])
+        question=input("请输入问题：")
+
+
 
-# results = DDGS().text("德国时间", max_results=5)
-# print(results)
-results = DDGS().text("NBA比赛")
-print(results)
 
@@ -0,0 +1,10 @@
+# from langchain_community.tools import DuckDuckGoSearchRun
+# search = DuckDuckGoSearchRun()
+# result=search.run("NBA今日赛况")
+# print(result)
+
+
+# from langchain_community.tools import DuckDuckGoSearchResults
+# search = DuckDuckGoSearchResults()
+# result=search.run("介绍一下gpt-4o")
+# print(result)
@@ -0,0 +1,23 @@
+import random
+from langchain_groq import ChatGroq
+from dotenv import find_dotenv, load_dotenv
+import time
+
+# llm=ChatGroq(model_name='gemma-7b-it',temperature=1)
+# question="宁诺附中老师"
+# result=llm.invoke(question)
+# print(result)
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+load_dotenv(find_dotenv())
+num=0
+while True:
+    model=random.choice(["gemini-1.5-flash-latest",'gemini-1.0-pro-001','gemini-1.5-pro-latest',"gemini-1.0-pro"])
+    print(model)
+    # model_random=["gemini-1.5-flash-latest",'gemini-1.0-pro-001','gemini-1.5-pro-latest',"gemini-1.0-pro"]
+    llm = ChatGoogleGenerativeAI(model=model,temperature=1)
+    question="你是谁"
+    result=llm.invoke(question)
+    print(num)
+    time.sleep(1)
+    num+=1