Skip to content
This repository was archived by the owner on Jan 5, 2025. It is now read-only.

Commit b0b49eb

Browse files
authored
Merge pull request #239 from lvalics/main
Ollama LLM and conversational retrieval...
2 parents cc35064 + 6ef2df6 commit b0b49eb

File tree

11 files changed

+92
-101
lines changed

11 files changed

+92
-101
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ dj_backend_server/nginx/nginx.conf
88
dj_backend_server.code-workspace
99
.aider*
1010
.aiderignore
11+
dj_backend_server/.vscode/settings.json
12+

dj_backend_server/.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,5 @@ pip-delete-this-directory.txt
3737
website_data_sources/*
3838
venv
3939
open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin
40-
llama-2-7b-chat.ggmlv3.q4_K_M.bin
40+
llama-2-7b-chat.ggmlv3.q4_K_M.bin
41+
.vscode/

dj_backend_server/CHANGELOG.MD

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
2.18.2024
2+
- The conversational retrieval functionality is now operating as expected. It successfully sends the conversation history to the language model, allowing the context from previous interactions to be utilized effectively.
3+
- Added support for Ollama as the Language Model (LLM). Ensure Ollama is specified in the .env configuration and the model is preloaded on the server.
4+
15
2.17.2024
26
- Incorporate 'Ollama' into your example.env configuration and make sure to reflect these changes in your .env file for compatibility.
37
- We've expanded the logging capabilities within settings.py by deploying logging.debug for more detailed insights, although it remains inactive when the DEBUG mode is off.

dj_backend_server/api/utils/get_embeddings.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,23 @@ def get_azure_embedding():
1818
deployment = os.environ.get("AZURE_OPENAI_EMBEDDING_MODEL_NAME")
1919
openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
2020
client = os.environ.get("AZURE_OPENAI_API_TYPE")
21-
openai_api_base = os.environ['AZURE_OPENAI_API_BASE']
22-
openai_api_version = os.environ['AZURE_OPENAI_API_VERSION']
21+
openai_api_base = os.environ["AZURE_OPENAI_API_BASE"]
22+
openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"]
2323

2424
return OpenAIEmbeddings(
2525
openai_api_key=openai_api_key,
2626
deployment=deployment,
2727
client=client,
2828
chunk_size=8,
2929
openai_api_base=openai_api_base,
30-
openai_api_version=openai_api_version
30+
openai_api_version=openai_api_version,
3131
)
3232

3333

3434
def get_openai_embedding():
3535
"""Gets embeddings using the OpenAI embedding provider."""
3636
openai_api_key = os.environ.get("OPENAI_API_KEY")
37-
return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1)
37+
return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1)
3838

3939

4040
def get_llama2_embedding():
@@ -48,15 +48,17 @@ def choose_embedding_provider():
4848

4949
if embedding_provider == EmbeddingProvider.azure.value:
5050
return get_azure_embedding()
51-
51+
5252
elif embedding_provider == EmbeddingProvider.OPENAI.value:
5353
return get_openai_embedding()
54-
54+
5555
elif embedding_provider == EmbeddingProvider.llama2.value:
5656
return get_llama2_embedding()
5757

5858
else:
59-
available_providers = ", ".join([service.value for service in EmbeddingProvider])
59+
available_providers = ", ".join(
60+
[service.value for service in EmbeddingProvider]
61+
)
6062
raise ValueError(
6163
f"Embedding service '{embedding_provider}' is not currently available. "
6264
f"Available services: {available_providers}"
@@ -66,4 +68,4 @@ def choose_embedding_provider():
6668
# Main function to get embeddings
6769
def get_embeddings() -> Embeddings:
6870
"""Gets embeddings using the chosen embedding provider."""
69-
return choose_embedding_provider()
71+
return choose_embedding_provider()

dj_backend_server/api/utils/get_openai_llm.py

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,11 @@
55
from django.utils.timezone import make_aware
66
from datetime import datetime, timezone
77
from uuid import uuid4
8-
from ollama import Client
9-
from openai import OpenAI
108
from django.conf import settings
119
from langchain_openai.chat_models import ChatOpenAI
12-
from langchain_community.llms import Ollama
10+
from langchain_community.chat_models import ChatOllama
1311
from langchain_community.llms import AzureOpenAI
1412
from langchain_community.llms import LlamaCpp
15-
from langchain.prompts import PromptTemplate
16-
from langchain.chains import LLMChain
1713
from langchain.callbacks.manager import CallbackManager
1814
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
1915
from web.models.failed_jobs import FailedJob
@@ -62,12 +58,7 @@ def get_llama_llm():
6258
def get_azure_openai_llm():
6359
"""Returns AzureOpenAI instance configured from environment variables"""
6460
try:
65-
if settings.DEBUG:
66-
openai_api_type = "openai" # JUST FOR DEVELOPMENT
67-
logging.debug(f"DEVELOPMENT Using API Type: {openai_api_type}")
68-
else:
69-
openai_api_type = os.environ["AZURE_OPENAI_API_TYPE"]
70-
61+
openai_api_type = os.environ["AZURE_OPENAI_API_TYPE"]
7162
openai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
7263
openai_deployment_name = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"]
7364
openai_model_name = os.environ["AZURE_OPENAI_COMPLETION_MODEL"]
@@ -134,30 +125,26 @@ def get_openai_llm():
134125
traceback.print_exc()
135126

136127

137-
def get_ollama_llm(sanitized_question):
138-
"""Returns an Ollama Server instance configured from environment variables"""
139-
llm = Client(host=os.environ.get("OLLAMA_URL"))
140-
# Use the client to make a request
128+
def get_ollama_llm():
129+
"""Returns an Ollama instance configured from environment variables"""
141130
try:
142-
if sanitized_question:
143-
response = llm.chat(
144-
model=os.environ.get("OLLAMA_MODEL_NAME"),
145-
messages=[{"role": "user", "content": sanitized_question}],
146-
)
147-
else:
148-
raise ValueError("Question cannot be None.")
149-
if response:
150-
return response
151-
else:
152-
raise ValueError("Invalid response from Ollama.")
131+
base_url = os.environ.get("OLLAMA_URL")
132+
model = os.environ.get("OLLAMA_MODEL_NAME", "llama2")
133+
134+
llm = ChatOllama(
135+
base_url=base_url,
136+
model=model,
137+
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
138+
)
139+
return llm
153140

154141
except Exception as e:
155142
logger.debug(f"Exception in get_ollama_llm: {e}")
156143
failed_job = FailedJob(
157144
uuid=str(uuid4()),
158145
connection="default",
159146
queue="default",
160-
payload="get_openai_llm",
147+
payload="get_ollama_llm",
161148
exception=str(e),
162149
failed_at=make_aware(datetime.now(), timezone.utc),
163150
)
@@ -176,29 +163,26 @@ def get_llm():
176163
"ollama": lambda: get_ollama_llm(),
177164
}
178165

166+
# DEVENV
167+
# if settings.DEBUG:
168+
# api_type = "ollama"
179169
api_type = os.environ.get("OPENAI_API_TYPE", "openai")
170+
180171
if api_type not in clients:
181172
raise ValueError(f"Invalid OPENAI_API_TYPE: {api_type}")
182173

183174
logging.debug(f"Using LLM: {api_type}")
184175

185176
if api_type in clients:
186-
if api_type == "ollama":
187-
return clients[api_type]()
188-
elif api_type != "ollama":
189-
return clients[api_type]()
177+
llm_instance = clients[api_type]()
178+
if llm_instance is None:
179+
logger.error(f"LLM instance for {api_type} could not be created.")
180+
return None
181+
return llm_instance
190182
else:
191183
raise ValueError(f"Invalid OPENAI_API_TYPE: {api_type}")
192184

193185
except Exception as e:
194-
failed_job = FailedJob(
195-
uuid=str(uuid4()),
196-
connection="default",
197-
queue="default",
198-
payload="get_llm",
199-
exception=str(e),
200-
failed_at=datetime.now(),
201-
)
202186
failed_job = FailedJob(
203187
uuid=str(uuid4()),
204188
connection="default",

dj_backend_server/api/utils/make_chain.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def getConversationRetrievalChain(
9898
retriever=vector_store.as_retriever(),
9999
verbose=True,
100100
combine_docs_chain_kwargs={"prompt": prompt},
101+
return_source_documents=True,
101102
)
102103
logger.debug(f"ConversationalRetrievalChain {llm}, created: {chain}")
103104
return chain

dj_backend_server/api/views/views_chat.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,21 +165,15 @@ def get_completion_response(
165165
elif chain_type == "conversation_retrieval":
166166
chain = getConversationRetrievalChain(vector_store, mode, initial_prompt)
167167
logger.debug("getConversationRetrievalChain")
168-
chat_history_json = json.dumps(
169-
get_chat_history_for_retrieval_chain(
170-
session_id, limit=20, initial_prompt=initial_prompt
171-
),
172-
ensure_ascii=False,
168+
chat_history = get_chat_history_for_retrieval_chain(
169+
session_id, limit=20, initial_prompt=initial_prompt
173170
)
174-
chat_history_json = ""
175-
logger.debug(f"Formatted Chat_history {chat_history_json}")
171+
logger.debug(f"Formatted Chat_history {chat_history}")
176172

177173
response = chain.invoke(
178-
{"question": sanitized_question, "chat_history": chat_history_json}
174+
{"question": sanitized_question, "chat_history": chat_history},
179175
)
180-
logger.debug(f"response from chain.invoke: {response}")
181176
response_text = response.get("answer")
182-
logger.debug(f"response_text : {response_text}")
183177
try:
184178
# Attempt to parse the response_text as JSON
185179
response_text = json.loads(response_text)

dj_backend_server/api/views/views_message.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -170,26 +170,23 @@ def send_chat(request):
170170
"""
171171
try:
172172

173-
if settings.DEBUG:
174-
logger.debug("Entering send_chat function")
173+
logger.debug("Entering send_chat function")
175174
# You can add additional validation for 'history' and 'content_type' if needed.
176175

177176
bot_token = request.headers.get("X-Bot-Token")
178177
bot = get_object_or_404(Chatbot, token=bot_token)
179178

180179
data = json.loads(request.body)
181-
if settings.DEBUG:
182-
logger.debug(
183-
f"Request data: {data}"
184-
) # {'from': 'user', 'type': 'text', 'content': 'input text from chat'}
180+
logger.debug(
181+
f"Request data: {data}"
182+
) # {'from': 'user', 'type': 'text', 'content': 'input text from chat'}
185183
# Validate the request data
186184
content = data.get("content")
187185
history = data.get("history")
188-
if settings.DEBUG:
189-
logger.debug(f"Content: {content}")
190-
logger.debug(
191-
f"History: {history}"
192-
) # history is a list of chat history - None????
186+
logger.debug(f"Content: {content}")
187+
logger.debug(
188+
f"History: {history}"
189+
) # history is a list of chat history - None????
193190
content_type = data.get("type")
194191

195192
session_id = get_session_id(request=request, bot_id=bot.id)
@@ -198,10 +195,9 @@ def send_chat(request):
198195
{"message": entry.message, "from_user": entry.from_user}
199196
for entry in history
200197
]
201-
if settings.DEBUG:
202-
logger.debug(
203-
f"History entries in JSON: {history_entries} - and history in text from DB: {history}"
204-
)
198+
logger.debug(
199+
f"History entries in JSON: {history_entries} - and history in text from DB: {history}"
200+
)
205201

206202
# Implement the equivalent logic for validation
207203
if not content:
@@ -211,8 +207,7 @@ def send_chat(request):
211207
)
212208

213209
# Implement the equivalent logic to send the HTTP request to the external API
214-
if settings.DEBUG:
215-
logger.debug(f"External API response START")
210+
logger.debug(f"External API response START")
216211
response = requests.post(
217212
os.getenv("APP_URL") + "/api/chat/",
218213
json={
@@ -226,8 +221,7 @@ def send_chat(request):
226221
},
227222
timeout=200,
228223
)
229-
if settings.DEBUG:
230-
logger.debug(f"External API response: {response.text} and {response}")
224+
logger.debug(f"External API response: {response.text} and {response}")
231225

232226
"""
233227
This block will first check if the response content is not empty. If it is empty,

dj_backend_server/example.env

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ OPENAI_API_TYPE=openai
1919
OPENAI_API_MODEL=gpt-4-1106-preview
2020
OPENAI_API_TEMPERATURE=1
2121

22-
# azure | openai | llama2 | ollama
22+
# azure | openai | llama2 - change only if you know what you do
2323
EMBEDDING_PROVIDER=openai
2424

2525
# If using azure
@@ -30,22 +30,20 @@ EMBEDDING_PROVIDER=openai
3030
# AZURE_OPENAI_DEPLOYMENT_NAME=
3131
# AZURE_OPENAI_COMPLETION_MODEL=gpt-35-turbo
3232

33-
33+
# OLLAMA_URL="" #no trailing slash at the end or will not work.
34+
# OLLAMA_MODEL_NAME="" # ex openchat, llama2 - Be sure you have this on server downloaded "ollama pull openchat"
3435

3536
# Vector Store, PINECONE|QDRANT
3637
STORE=QDRANT
3738

38-
3939
# if using pinecone
4040
# PINECONE_API_KEY=
4141
# PINECONE_ENV=
4242
# VECTOR_STORE_INDEX_NAME=
4343

44-
4544
# if using qdrant
4645
QDRANT_URL=http://qdrant:6333
4746

48-
4947
# optional, defaults to 15
5048
MAX_PAGES_CRAWL=150
5149

@@ -73,5 +71,4 @@ OCR_LLM = '1'
7371

7472
# retrieval_qa | conversation_retrieval, retrieval_qa works better with azure openai
7573
# if you want to use the conversation_retrieval | retrieval_qa chain
76-
CHAIN_TYPE=conversation_retrieval
77-
74+
CHAIN_TYPE=conversation_retrieval

dj_backend_server/requirements.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ drf-spectacular==0.27.1
2525
drf_spectacular.extensions==0.0.2
2626
exceptiongroup==1.1.2
2727
frozenlist==1.4.0
28+
filelock==3.13.1
29+
fsspec==2024.2.0
30+
huggingface-hub==0.20.3
2831
grpcio==1.56.2
2932
grpcio-tools==1.56.2
3033
h11==0.14.0
@@ -71,6 +74,7 @@ qdrant-client==1.7.0
7174
redis==4.6.0
7275
regex==2023.6.3
7376
requests==2.31.0
77+
safetensors==0.4.2
7478
six==1.16.0
7579
sniffio==1.3.0
7680
soupsieve==2.4.1
@@ -79,6 +83,8 @@ sqlparse==0.4.4
7983
tenacity==8.2.2
8084
tiktoken==0.6.0
8185
tqdm==4.65.0
86+
tokenizers==0.15.2
87+
transformers==4.37.2
8288
typing-inspect==0.9.0
8389
typing_extensions==4.7.1
8490
tzdata==2023.3
@@ -88,4 +94,3 @@ wcwidth==0.2.6
8894
yarl==1.9.2
8995
django-cors-headers==4.3.1
9096

91-

0 commit comments

Comments
 (0)