From d789f1bd315e21d9ae5fe89bc873e7f804638826 Mon Sep 17 00:00:00 2001
From: azambrano <azambrano.etraid@grupoetra.com>
Date: Wed, 26 Nov 2025 13:53:02 +0100
Subject: [PATCH] RAG build parallelization, Mongo connection, alternative UI
 and knowledge domain management

---
 .env.example                                  |  20 +
 .gitignore                                    |  13 +
 Dockerfile                                    |  37 +
 README.md                                     | 534 ++++++++++--
 document_manager_ui.py                        | 777 ++++++++++++++++++
 energy_bench.py                               |   9 +-
 kb_creator.py                                 |  12 +-
 kb_creator_Europe.py                          |   8 +-
 kb_creator_Italy.py                           |   8 +-
 kb_creator_Switzerland.py                     |   8 +-
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../files_Europe/chroma_db/chroma.sqlite3     | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../files_Europe/graph_documents.joblib       | Bin
 .../files_Europe/preprocessed_chunks.joblib   | Bin
 .../{ => kbs}/files_Europe/raw_docs.joblib    | Bin
 .../{ => kbs}/files_Europe/rdf_graph.ttl      |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../index_metadata.pickle                     | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../files_Generic/chroma_db/chroma.sqlite3    | Bin
 .../files_Generic/graph_documents.joblib      | Bin
 .../files_Generic/preprocessed_chunks.joblib  | Bin
 .../{ => kbs}/files_Generic/raw_docs.joblib   | Bin
 .../{ => kbs}/files_Generic/rdf_graph.ttl     |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../index_metadata.pickle                     | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../index_metadata.pickle                     | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../index_metadata.pickle                     | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            | Bin
 .../files_Italy/chroma_db/chroma.sqlite3      | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../files_Italy/graph_documents.joblib        | Bin
 .../files_Italy/preprocessed_chunks.joblib    | Bin
 .../{ => kbs}/files_Italy/raw_docs.joblib     | Bin
 .../{ => kbs}/files_Italy/rdf_graph.ttl       |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../index_metadata.pickle                     | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../chroma_db/chroma.sqlite3                  | Bin
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../data_level0.bin                           | Bin
 .../header.bin                                | Bin
 .../length.bin                                | Bin
 .../link_lists.bin                            |   0
 .../files_Switzerland/graph_documents.joblib  | Bin
 .../preprocessed_chunks.joblib                | Bin
 .../files_Switzerland/raw_docs.joblib         | Bin
 .../{ => kbs}/files_Switzerland/rdf_graph.ttl |   0
 knowledge_base/knowledge_extractor.py         | 539 +++++++++---
 knowledge_base/knowledge_manager.py           |  22 +-
 knowledge_base/utils/graph_helpers.py         |   1 -
 llm/__init__.py                               |   2 +-
 llm/langchain.py                              | 101 ++-
 requirements.txt                              |   3 +
 run_document_manager.sh                       |   3 +
 src/__init__.py                               |   3 +
 src/chat_manager.py                           |  70 ++
 src/database.py                               |  32 +
 src/document_manager.py                       | 143 ++++
 src/domain_manager.py                         |  34 +
 src/kb_builder.py                             | 428 ++++++++++
 src/orchestrator_manager.py                   |  34 +
 src/utils.py                                  |   8 +
 streamlit_ui.py                               |  42 +-
 115 files changed, 2701 insertions(+), 190 deletions(-)
 create mode 100644 .env.example
 create mode 100644 Dockerfile
 create mode 100644 document_manager_ui.py
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/chroma.sqlite3 (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Europe/graph_documents.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Europe/preprocessed_chunks.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Europe/raw_docs.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Europe/rdf_graph.ttl (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/index_metadata.pickle (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Generic/chroma_db/chroma.sqlite3 (100%)
 rename knowledge_base/{ => kbs}/files_Generic/graph_documents.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Generic/preprocessed_chunks.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Generic/raw_docs.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Generic/rdf_graph.ttl (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/index_metadata.pickle (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/index_metadata.pickle (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/index_metadata.pickle (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/chroma.sqlite3 (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Italy/graph_documents.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Italy/preprocessed_chunks.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Italy/raw_docs.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Italy/rdf_graph.ttl (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/index_metadata.pickle (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/chroma.sqlite3 (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/data_level0.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/header.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/length.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/link_lists.bin (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/graph_documents.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/preprocessed_chunks.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/raw_docs.joblib (100%)
 rename knowledge_base/{ => kbs}/files_Switzerland/rdf_graph.ttl (100%)
 create mode 100755 run_document_manager.sh
 create mode 100644 src/__init__.py
 create mode 100644 src/chat_manager.py
 create mode 100644 src/database.py
 create mode 100644 src/document_manager.py
 create mode 100644 src/domain_manager.py
 create mode 100644 src/kb_builder.py
 create mode 100644 src/orchestrator_manager.py
 create mode 100644 src/utils.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..fca89a7
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,20 @@
+# LLM Configuration
+# Set to "true" for local LLM (e.g., Ollama), "false" for remote LLM (e.g., OpenAI)
+LLM_LOCAL=false
+
+# Base URL for local LLM API (only used when LLM_LOCAL=true)
+LLM_BASE_URL=http://llmserver:11434/
+
+# API Keys for different LLM providers
+OPENAI_API_KEY=your-openai-api-key-here
+OLLAMA_API_KEY=
+ANTHROPIC_API_KEY=
+DEEPSEEK_API_KEY=
+
+# MongoDB Configuration
+MONGODB_URI=mongodb://localhost:27017/
+MONGODB_DATABASE=energenius
+MONGODB_COLLECTION=documents
+# Application Settings
+STREAMLIT_SERVER_PORT=8501
+STREAMLIT_SERVER_ADDRESS=0.0.0.0
diff --git a/.gitignore b/.gitignore
index e5bb792..2298f5c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -176,5 +176,18 @@ private_settings.py
 # Mac 
 .DS_Store
 
+# Temporary files
+*.tmp
+*.bak
+*.log
 # Traing data 
 /knowledge_base/files/
+
+
+*/.streamlit/
+*/.vscode/
+*/prompt.txt
+
+# Project-specific CI/CD and analysis files
+sonar-project.properties
+.gitlab-ci.yml
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..9c6bbf4
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+FROM python:3.13-slim
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements files
+COPY requirements.txt /app/
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . /app/
+
+# Create directories for knowledge base storage
+RUN mkdir -p /app/knowledge_base/temp_pdfs
+
+# Expose Streamlit default port
+EXPOSE 8501
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl --fail http://localhost:8501/_stcore/health || exit 1
+
+# Run the Streamlit app
+CMD ["streamlit", "run", "document_manager_ui.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/README.md b/README.md
index e6cc8f2..bac0408 100644
--- a/README.md
+++ b/README.md
@@ -1,89 +1,511 @@
-# EnergeniusRAG
+# EnergeniusRAG Platform
 
-# Docs and Diagram
+[![Python Version](https://img.shields.io/badge/python-3.13-blue.svg)](https://www.python.org/downloads/)
+[![Streamlit](https://img.shields.io/badge/streamlit-1.45.0-FF4B4B.svg)](https://streamlit.io)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
 
-The folder `docs` is prepared to include relevant documentation and diagrams for the project.
+A comprehensive Retrieval-Augmented Generation (RAG) platform for energy efficiency consultation. EnergeniusRAG combines a document management system with an AI-powered conversational agent (GURU) to provide personalized energy efficiency recommendations.
 
-Right now it contains the current _architecture diagram_.
+## 📋 Table of Contents
 
-## Setup
+- [Overview](#overview)
+- [Features](#features)
+- [Architecture](#architecture)
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [Usage](#usage)
+- [Knowledge Base Creation](#knowledge-base-creation)
+- [API & Integration](#api--integration)
+- [Development](#development)
+- [Docker Deployment](#docker-deployment)
+- [Troubleshooting](#troubleshooting)
+- [Citation](#citation)
+- [License](#license)
 
-Install [conda](https://docs.conda.io/projects/conda/en/23.1.x/user-guide/install/) and [homebrew](https://brew.sh/) if needed.
+## 🎯 Overview
 
-_Note to self_: conda is only needed to use the same version of python as the server. Is that necessary?
+GURU is an advanced platform designed to enhance human-AI collaboration for energy efficiency consulting. It combines:
 
-create a conda environment using
+- **Document Management**: Upload, organize, and manage energy-related documents (PDFs, URLs) by domain
+- **Knowledge Base Creation**: Automatically build vector databases from documents using RAG techniques
+- **AI Conversational Agent (GURU)**: Chat with an intelligent assistant powered by LLMs and your knowledge base
+- **Multi-Provider Support**: Works with both OpenAI and local Ollama models
+- **Domain-Based Organization**: Separate knowledge bases for different regions or topics (Europe, Italy, Switzerland, etc.)
 
-```shell
-conda create -n energenius python=3.13
+## ✨ Features
+
+### Document Manager
+- 📤 **Upload PDFs and URLs**: Add documents from multiple sources
+- 🗂️ **Domain Management**: Organize documents into logical domains (regions, topics)
+- 🏷️ **Tagging System**: Tag and categorize documents for easy retrieval
+- 🔍 **Search & Filter**: Find documents by name, type, or tags
+- 👁️ **PDF Preview**: In-browser PDF viewer
+- ⚠️ **Duplicate Detection**: Prevents uploading duplicate files (by name or content)
+- 📊 **Statistics Dashboard**: View document counts and domain statistics
+
+### Knowledge Base Builder
+- 🧠 **Automated KB Creation**: Build vector databases from domain documents
+- ⚡ **Parallel Processing**: Optional multi-threaded processing for faster KB creation
+- 🔄 **Rebuild & Update**: Refresh knowledge bases when documents change
+- 📈 **Progress Tracking**: Real-time logs and status updates
+- 🎯 **Multi-Provider Support**: Compatible with OpenAI and Ollama embeddings
+- 💾 **Persistent Storage**: Knowledge bases stored in ChromaDB with RDF graph support
+
+### Energenius GURU (Chat Interface)
+- 💬 **Conversational AI**: Natural language interaction with energy efficiency expert
+- 🌐 **Multi-Language Support**: English and Spanish
+- 🔧 **Configurable Models**: Choose from various LLM models and embeddings
+- 🎚️ **Temperature Control**: Adjust response creativity
+- 📚 **Knowledge-Enhanced Responses**: Leverage domain-specific knowledge bases
+- 💾 **Chat History**: Download and upload conversation histories
+- 🔄 **Session Management**: Clear and restart conversations
+
+## 🏗️ Architecture
+
+```
+EnergeniusRAG-shared/
+├── document_manager_ui.py      # Main Streamlit application
+├── src/                         # Core application modules
+│   ├── database.py             # MongoDB connection
+│   ├── domain_manager.py       # Domain CRUD operations
+│   ├── document_manager.py     # Document upload/management
+│   ├── kb_builder.py           # Knowledge base creation
+│   ├── chat_manager.py         # Chat session management
+│   └── orchestrator_manager.py # LLM orchestrator wrapper
+├── knowledge_base/             # KB extraction & management
+│   ├── knowledge_extractor.py
+│   ├── knowledge_manager.py
+│   └── kbs/               # Knowledge base storage
+├── orchestrator/          # Chat orchestration
+├── abstract_orchestrator.py
+├── live_orchestrator.py
+├── guru.py
+├── llm/                   # LLM provider interfaces
+├── benchmark/             # Evaluation tools
+├── requirements.txt           # Root dependencies
+└── Dockerfile                # Docker containerization
+```
+
+### Technology Stack
+
+- **Frontend**: Streamlit 1.45.0
+- **Database**: MongoDB (document storage)
+- **Vector Store**: ChromaDB (embeddings)
+- **LLM Providers**: OpenAI, Ollama
+- **Knowledge Graph**: RDFLib
+- **Document Processing**: pdfminer.six, pypdf
+- **Web Scraping**: BeautifulSoup4, html2text
+- **Orchestration**: LangChain 0.3.25
+
+## 🚀 Installation
+
+### Prerequisites
+
+- Python 3.13
+- MongoDB (local or remote instance)
+- Ollama (optional, for local LLM models)
+- Conda (recommended for environment management)
+
+### Step 1: Clone Repository
+
+```bash
+git clone https://github.com/DataSciencePolimi/EnergeniusRAG-shared.git
+cd EnergeniusRAG-shared
 ```
 
-activate it using
+### Step 2: Create Python Environment
 
-```shell
+Using Conda (recommended):
+
+```bash
+conda create -n energenius python=3.13
 conda activate energenius
 ```
 
-install the pip packages from the _requirements_ in the env
+Or using venv:
+
+```bash
+python3.13 -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+### Step 3: Install Dependencies
 
-```shell
+```bash
+# Install root dependencies
 pip install -r requirements.txt
+
+```
+
+### Step 4: Setup MongoDB
+
+**Local MongoDB**:
+```bash
+# Install MongoDB (Ubuntu/Debian)
+sudo apt-get install -y mongodb
+
+# Start MongoDB service
+sudo systemctl start mongodb
+```
+
+**Or use MongoDB Atlas** (cloud):
+- Sign up at [MongoDB Atlas](https://www.mongodb.com/cloud/atlas)
+- Create a cluster and get your connection string
+
+### Step 5: Configure Environment
+
+Create a `.env` file in the root directory:
+
+```bash
+# MongoDB Configuration
+MONGODB_URI=mongodb://localhost:27017
+MONGODB_DATABASE=energenius
+MONGODB_COLLECTION=documents
+
+# OpenAI Configuration (optional)
+OPENAI_API_KEY=your_openai_api_key_here
+OPENAI_BASE_URL=https://api.openai.com/v1
+
+# Ollama Configuration (optional, for local models)
+OLLAMA_BASE_URL=http://localhost:11434
+```
+
+### Step 6: Setup Ollama (Optional)
+
+For local LLM models:
+
+```bash
+# Install Ollama
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Pull recommended models
+ollama pull llama3.2
+ollama pull mistral
+ollama pull mxbai-embed-large
+ollama pull nomic-embed-text
+```
+
+## ⚙️ Configuration
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `MONGODB_URI` | MongoDB connection string | `mongodb://localhost:27017` |
+| `MONGODB_DATABASE` | Database name | `energenius` |
+| `MONGODB_COLLECTION` | Collection name | `documents` |
+| `OPENAI_API_KEY` | OpenAI API key | - |
+| `OPENAI_BASE_URL` | OpenAI API endpoint | `https://api.openai.com/v1` |
+| `OLLAMA_BASE_URL` | Ollama server URL | `http://localhost:11434` |
+
+### Recommended Models
+
+**LLMs**:
+- OpenAI: `gpt-4.1-nano`, `gpt-5-nano`, `gpt-4o-mini-2024-07-18`
+- Ollama: `llama3.2`, `mistral`, `gpt-oss`
+
+**Embeddings**:
+- OpenAI: `text-embedding-3-small`, `text-embedding-3-large`
+- Ollama: `mxbai-embed-large`, `nomic-embed-text`
+
+## 📖 Usage
+
+### Starting the Application
+
+```bash
+# Using the shell script
+./run_document_manager.sh
+
+# Or directly with Streamlit
+streamlit run document_manager_ui.py --server.port 8501
+```
+
+Access the application at: `http://localhost:8501`
+
+### 1. Document Management
+
+#### Create a Domain
+1. Navigate to **Document Manager** tab
+2. In the sidebar, expand "➕ Create New Domain"
+3. Enter domain name (e.g., "Italy", "Europe", "Switzerland")
+4. Add optional description
+5. Click "Create Domain"
+
+#### Upload Documents
+1. Select **Upload Documents** tab
+2. Choose your domain from dropdown
+3. Upload PDF files or add URLs:
+   - **PDF**: Select file(s), add description/tags, click "Upload"
+   - **URL**: Enter URL, title, description, and tags, click "Add URL"
+
+#### Manage Documents
+1. Select **Manage Documents** tab
+2. Choose domain to view
+3. Use search and filters to find documents
+4. Actions available:
+   - 👁️ Preview PDFs
+   - ⬇️ Download PDFs
+   - 🗑️ Delete documents
+
+### 2. Knowledge Base Creation
+
+#### Build a Knowledge Base
+1. Navigate to **Knowledge Base** tab
+2. Select domain
+3. Configure provider and models:
+   - **Provider**: OpenAI or Ollama
+   - **Model**: Choose LLM model
+   - **Embedding**: Select embedding model
+4. Optionally configure performance settings:
+   - Enable parallel processing (recommended)
+   - Adjust max workers (4-8 recommended)
+5. Click "🚀 Create Knowledge Base"
+
+#### Monitor Progress
+- Real-time logs show progress
+- Process runs in background (you can switch tabs)
+- Check status anytime in the Knowledge Base tab
+
+#### Rebuild/Delete KB
+- **Rebuild**: Updates KB with latest documents
+- **Delete**: Removes KB but keeps documents
+
+### 3. Chat with GURU
+
+#### Start a Conversation
+1. Navigate to **GURU Chat** tab
+2. Configure in sidebar:
+   - **Domain**: Select knowledge base to use
+   - **Provider**: OpenAI or Ollama
+   - **Model**: Choose LLM
+   - **Language**: English or Español
+   - **Temperature**: Adjust creativity (0.0-1.0)
+   - **Use Knowledge Base**: Toggle to use domain KB
+3. Type your question in chat input
+4. Receive streaming AI responses
+
+#### Chat Management
+- **Clear Chat**: Reset conversation
+- **Download Chat**: Save history as JSON
+- **Upload Chat**: Continue previous conversation
+
+## 🧠 Knowledge Base Creation
+
+### Process Overview
+
+1. **Document Collection**: Fetches all documents from selected domain
+2. **URL Processing**: Downloads and extracts text from web pages
+3. **PDF Processing**: Extracts text from PDF files
+4. **Text Chunking**: Splits content into manageable chunks
+5. **Embedding Generation**: Creates vector embeddings
+6. **Vector Storage**: Stores in ChromaDB
+7. **Graph Creation**: Builds RDF knowledge graph
+
+### Performance Optimization
+
+**Parallel Processing** (Recommended):
+- Processes multiple documents simultaneously
+- 4-8 workers optimal for most systems
+- Significantly faster KB creation
+
+**Sequential Processing**:
+- Lower resource usage
+- Better for limited hardware
+- More stable for very large documents
+
+### Storage Structure
+
+```
+knowledge_base/kbs/
+└── files_{domain}/
+    ├── raw_docs.joblib              # Original documents
+    ├── preprocessed_chunks.joblib   # Text chunks
+    ├── graph_documents.joblib       # Graph structure
+    ├── rdf_graph.ttl               # RDF knowledge graph
+    └── chroma_db/                  # Vector embeddings
+        └── ...
 ```
 
-## Private Settings
+## 🔌 API & Integration
 
-In order to run the server, you need to create a file called private_settings.py in the same directory as settings.py. This file should contain the following variables:
+### MongoDB Schema
 
-```python
-PRIVATE_SETINGS = {
-    "LLM_LOCAL": True, # Set to True if you are using a local LLM or False if you are using a remote LLM
-    "LLM_KEY": {
-        "openai": "" # OpenAI API key
-        "ollama": "", # ollama API key
-        "anthropic": "", # Anthropic API key
-        "deepseek": "", # DeepSeeker API key
-    },
-    "LLM_BASE_URL": "", # Base URL for the LLM local API
+**Domain Document**:
+```json
+{
+  "type": "domain",
+  "name": "Italy",
+  "description": "Italian energy regulations",
+  "created_at": "2025-01-01T12:00:00"
 }
 ```
 
-You can use standard urls for local deployment:
+**Document Entry**:
+```json
+{
+  "type": "document",
+  "domain": "Italy",
+  "doc_type": "pdf",  // or "url"
+  "filename": "energy_guide.pdf",
+  "content": "<binary>",  // For PDFs
+  "url": "https://...",   // For URLs
+  "size": 1048576,
+  "description": "Energy efficiency guide",
+  "tags": ["solar", "residential"],
+  "uploaded_at": "2025-01-01T12:00:00"
+}
+```
+
+**Knowledge Base Metadata**:
+```json
+{
+  "type": "knowledge_base",
+  "domain": "Italy",
+  "folder": "files_Italy",
+  "provider": "ollama",
+  "model": "llama3.2",
+  "embedding": "mxbai-embed-large",
+  "status": "completed",  // or "creating", "error"
+  "document_count": 15,
+  "pdf_count": 10,
+  "url_count": 5,
+  "created_at": "2025-01-01T12:00:00",
+  "completed_at": "2025-01-01T12:30:00",
+  "logs": ["Started...", "Processing..."],
+  "error_message": null
+}
+```
+## 🐳 Docker Deployment
+
+### Build Image
+
+```bash
+docker build -t energenius-rag .
+```
+
+### Run Container
+
+```bash
+docker run -d \
+  -p 8501:8501 \
+  -e MONGODB_URI=mongodb://host.docker.internal:27017 \
+  -e OPENAI_API_KEY=your_key_here \
+  -v $(pwd)/knowledge_base/kbs:/app/knowledge_base/kbs \
+  --name energenius \
+  energenius-rag
+```
+
+### Docker Compose
 
--   Ollama: `"LLM_BASE_URL": "http://localhost:11434"`
--   LM Studio: `"LLM_BASE_URL": "http://localhost:1234/v1"`
+Create `docker-compose.yml`:
 
-## Local LLMs
+```yaml
+version: '3.8'
 
-Right now, test locally with [Ollama](https://ollama.com/)
+services:
+  mongodb:
+    image: mongo:latest
+    ports:
+      - "27017:27017"
+    volumes:
+      - mongodb_data:/data/db
 
-Models tried:
+  energenius:
+    build: .
+    ports:
+      - "8501:8501"
+    environment:
+      - MONGODB_URI=mongodb://mongodb:27017
+      - MONGODB_DATABASE=energenius
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    volumes:
+      - ./knowledge_base/kbs:/app/knowledge_base/kbs
+    depends_on:
+      - mongodb
 
--   gpt-oss
--   llama3.2
--   mistral
+volumes:
+  mongodb_data:
+```
+
+Run with:
+```bash
+docker-compose up -d
+```
 
-Embeddings:
+## 🔧 Troubleshooting
 
--   mxbai-embed-large
--   nomic-embed-text
+### Common Issues
 
-In order to run Ollama, launch the Ollama server in a separate terminal:
+**MongoDB Connection Failed**
+```bash
+# Check if MongoDB is running
+sudo systemctl status mongodb
 
-```shell
-ollama run gpt-oss #llama3.2 or mistral
+# Test connection
+mongosh mongodb://localhost:27017
 ```
 
-## UI
+**Ollama Models Not Loading**
+```bash
+# Check Ollama service
+ollama list
 
-To run the UI
+# Restart Ollama
+systemctl restart ollama
 
-```shell
-streamlit run streamlit_ui.py
+# Pull model again
+ollama pull llama3.2
 ```
 
-# Citation
+**Knowledge Base Creation Stuck**
+- Check logs in the UI
+- Ensure no other process is using ChromaDB
+- Try rebuilding with parallel processing disabled
+- Check available disk space
 
+**PDF Upload Fails**
+- Verify PDF is not corrupted
+- Check file size (very large files may timeout)
+- Ensure MongoDB has sufficient storage
+
+**Chat Not Using Knowledge Base**
+- Verify KB status is "completed" in Knowledge Base tab
+- Check embedding model matches KB embedding
+- Ensure "Use Knowledge Base" toggle is enabled
+
+### Debug Mode
+
+Enable detailed logging:
+
+```bash
+export STREAMLIT_LOG_LEVEL=debug
+streamlit run document_manager_ui.py
 ```
+
+### Performance Tuning
+
+**For Large Knowledge Bases**:
+- Increase parallel workers: 6-8
+- Use faster embedding models
+- Consider SSD storage for ChromaDB
+
+**For Limited Resources**:
+- Disable parallel processing
+- Reduce max workers to 2-3
+- Use smaller embedding models
+
+## 📚 Documentation
+
+Additional documentation available in:
+- `README.md` - Components overview
+- `README2.md` - Extended documentation
+- `docs/` - Architecture diagrams and specifications
+
+## 📝 Citation
+
+If you use GURU in your research, please cite:
+
+```bibtex
 @article{Campi_Giudici_Pinciroli_Vago_Brambilla_Fraternali_2025,
     title={Enhancing Human-AI Collaboration through a Conversational Agent for Energy Efficiency},
     author={Campi, Riccardo and Giudici, Mathyas and Pinciroli Vago, Nicolò Oreste and Brambilla, Marco and Fraternali, Piero},
@@ -96,3 +518,23 @@ streamlit run streamlit_ui.py
     DOI={10.1609/aaaiss.v5i1.35554}
 }
 ```
+
+## 📄 License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+
+## 🤝 Contributing
+
+Contributions are welcome! Please:
+
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+
+## 📧 Contact
+
+For questions or support:
+- **Repository**: [DataSciencePolimi/EnergeniusRAG-shared](https://github.com/DataSciencePolimi/EnergeniusRAG-shared)
+- **Issues**: [GitHub Issues](https://github.com/DataSciencePolimi/EnergeniusRAG-shared/issues)
\ No newline at end of file
diff --git a/document_manager_ui.py b/document_manager_ui.py
new file mode 100644
index 0000000..54a3198
--- /dev/null
+++ b/document_manager_ui.py
@@ -0,0 +1,777 @@
+"""Integrated UI - Document Manager and Energenius GURU."""
+
+import streamlit as st
+from datetime import datetime
+import time
+
+# Import from src modules
+from src.database import get_mongodb_client, get_collection
+from src.domain_manager import create_domain, get_all_domains, delete_domain
+from src.document_manager import (
+    upload_pdf_to_mongodb,
+    add_url_to_mongodb,
+    get_documents_by_domain,
+    delete_document,
+    download_pdf_from_mongodb,
+    check_duplicate_pdf,
+    check_duplicate_url,
+    get_document_stats
+)
+from src.chat_manager import (
+    initialize_chat_messages,
+    clear_chat_messages,
+    add_message,
+    get_chat_messages,
+    prepare_chat_download_data,
+    process_chat_upload
+)
+from src.orchestrator_manager import (
+    cached_get_ollama_models,
+    cached_get_ollama_embeddings,
+    initialize_orchestrator
+)
+from src.kb_builder import (
+    build_knowledge_base_for_domain,
+    get_knowledge_base_info,
+    delete_knowledge_base
+)
+
+# Page Functions
+def render_guru_page():
+    """Render the Energenius GURU chat interface."""
+    st.title("Energenius GURU")
+
+    # Initialize chat messages
+    initialize_chat_messages()
+
+    # Sidebar for model parameters
+    st.sidebar.title("Model Parameters")
+    
+    # First, get available domains and let user select
+    domains = get_all_domains()
+    domain_names = [d["name"] for d in domains]
+    default_domain_index = 0
+    
+    region = st.sidebar.selectbox(
+        "Domain", domain_names, index=default_domain_index
+    )
+    
+    # Get KB info for the selected domain to auto-select embedding model
+    kb_info = get_knowledge_base_info(region)
+    
+    # Provider selection
+    provider = st.sidebar.selectbox("Provider", ["openai", "ollama"], index=0)
+
+    if provider == "ollama":
+        # Add refresh button for model list
+        col1, col2 = st.sidebar.columns([4, 1])
+        with col1:
+            st.write("")  # Spacing
+        with col2:
+            if st.button("🔄", help="Refresh model list"):
+                st.cache_data.clear()
+                st.rerun()
+        
+        # Get available models dynamically
+        available_models = cached_get_ollama_models()
+        available_embeddings = cached_get_ollama_embeddings()
+        
+        # Try to find a default model, otherwise use the first one
+        default_model_index = 0
+        
+        model = st.sidebar.selectbox("Model", available_models, index=default_model_index)
+        
+        # Auto-select embedding based on KB info if available
+        if kb_info and kb_info.get("embedding"):
+            embedding = kb_info.get("embedding")
+            if embedding not in available_embeddings:
+                st.sidebar.warning(f"⚠️ KB uses '{embedding}' which is not available")
+        else:
+            # Try to find a default embedding
+            if "mxbai-embed-large" in available_embeddings:
+                embedding = "mxbai-embed-large"
+            else:
+                embedding = available_embeddings[0] if available_embeddings else "mxbai-embed-large"
+    elif provider == "openai":
+        model = st.sidebar.selectbox("Model", ["gpt-5-nano", "gpt-4.1-nano"], index=1)
+        
+        # Auto-select embedding based on KB info if available
+        openai_embeddings = ["text-embedding-3-small", "text-embedding-3-large"]
+        if kb_info and kb_info.get("embedding"):
+            embedding = kb_info.get("embedding")
+            if embedding not in openai_embeddings:
+                st.sidebar.warning(f"⚠️ KB uses '{embedding}' which is not available for OpenAI")
+                embedding = openai_embeddings[0]  # Fallback to default
+        else:
+            embedding = openai_embeddings[0]  # Default to first embedding
+    else:
+        model = "None"
+        embedding = "None"
+
+    language = st.sidebar.selectbox(
+        "Language", ["English", "Español"], index=0
+    )
+    
+    # Fix temperature to 1.0 for gpt-5 models
+    if model.startswith("gpt-5"):
+        temperature = 1.0
+        st.sidebar.slider(
+            "Temperature", min_value=0.0, max_value=1.0, value=1.0, step=0.1, disabled=True,
+            help="Temperature is fixed to 1.0 for gpt-5 models"
+        )
+    else:
+        temperature = st.sidebar.slider(
+            "Temperature", min_value=0.0, max_value=1.0, value=0.75, step=0.1
+        )
+    
+    use_knowledge_base = st.sidebar.toggle(
+        "Use Knowledge Base", value=True, help="Use the knowledge base to answer questions."
+    )
+    
+    # Show KB status for selected domain
+    if use_knowledge_base:
+        if kb_info:
+            kb_status = kb_info.get("status", "completed")
+            if kb_status == "completed":
+                st.sidebar.info(f"✅ KB available for '{region}'")
+            elif kb_status == "creating":
+                st.sidebar.warning(f"⚠️ KB for '{region}' is still being created. Please wait or check Document Manager.")
+            elif kb_status == "error":
+                error_msg = kb_info.get("error_message", "Unknown error")
+                st.sidebar.error(f"❌ KB creation failed: {error_msg}")
+            else:
+                st.sidebar.info(f"✅ KB available for '{region}'")
+        else:
+            st.sidebar.warning(f"⚠️ No KB found for '{region}'. Create one in Document Manager.")
+
+    # Orchestrator initialization
+    orchestrator = initialize_orchestrator(
+        provider=provider,
+        model=model,
+        embedding=embedding,
+        language=language,
+        temperature=temperature,
+        user_type="Medium income",
+        house_type="Apartment",
+        region=region,
+        use_knowledge_base=use_knowledge_base,
+    )
+
+    # Support functions
+    st.sidebar.button(
+        "Clear chat",
+        icon=":material/delete:",
+        on_click=clear_chat_messages,
+    )
+
+    st.sidebar.download_button(
+        label="Download chat",
+        help="Download the chat history as a JSON file.",
+        icon=":material/download:",
+        file_name="chat.json",
+        mime="application/json",
+        data=prepare_chat_download_data(get_chat_messages()),
+    )
+    
+    chat_upload = st.sidebar.file_uploader(
+        label="Upload chat",
+        help="Upload a chat file to continue from a conversation.",
+        type=["json"],
+    )
+    
+    if chat_upload is not None:
+        messages, error = process_chat_upload(chat_upload)
+        
+        if error:
+            st.error(error)
+        elif messages:
+            # Erase the current chat and load uploaded messages
+            clear_chat_messages()
+            st.session_state["messages"] = messages
+            
+            # Call to the orchestrator to load the messages
+            orchestrator.load_past_messages(messages)
+            st.rerun()
+
+    # Update the interface with the previous messages
+    for message in get_chat_messages():
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+
+    # Create the chat interface
+    if prompt := st.chat_input("Enter your query"):
+        # Store and display the current prompt
+        add_message("user", prompt)
+        st.chat_message("user").markdown(prompt)
+
+        # Streaming: create an empty placeholder for assistant message
+        with st.chat_message("assistant"):
+            response_placeholder = st.empty()
+            full_response = ""
+
+            # Show spinner while streaming response
+            with st.spinner(""):
+                for chunk in orchestrator.user_message(prompt):
+                    full_response += chunk
+                    response_placeholder.markdown(full_response)
+
+            response_placeholder.markdown(full_response)
+
+            # Save final assistant message to history
+            add_message("assistant", full_response)
+
+
+def render_document_manager_page():
+    """Render the Document Manager interface."""    
+    # Display connection status
+    try:
+        get_mongodb_client().server_info()
+    except Exception as e:
+        st.error(f"❌ Failed to connect to MongoDB: {str(e)}")
+        return
+    
+    # Sidebar for domain management
+    with st.sidebar:
+        st.header("🗂️ Domain Management")
+        
+        # Statistics button - toggle on/off
+        if st.button("📊 View Statistics", use_container_width=True):
+            st.session_state["show_stats"] = not st.session_state.get("show_stats", False)
+            st.rerun()
+        
+        # Show statistics in a dialog/popup
+        if st.session_state.get("show_stats", False):
+            with st.container():
+                stats = get_document_stats()
+                st.metric("Total Domains", stats["total_domains"])
+                st.metric("Total Documents", stats["total_docs"])
+                st.metric("PDFs", stats["total_pdfs"])
+                st.metric("URLs", stats["total_urls"])
+        
+        st.divider()
+        
+        # Create new domain
+        with st.expander("➕ Create New Domain", expanded=False):
+            new_domain_name = st.text_input("Domain Name", key="new_domain_name")
+            new_domain_desc = st.text_area("Description (optional)", key="new_domain_desc")
+            
+            if st.button("Create Domain", type="primary"):
+                if new_domain_name:
+                    try:
+                        # Check if domain already exists
+                        existing = get_collection().find_one({"type": "domain", "name": new_domain_name})
+                        if existing:
+                            st.error(f"Domain '{new_domain_name}' already exists!")
+                        else:
+                            create_domain(new_domain_name, new_domain_desc)
+                            st.success(f"Domain '{new_domain_name}' created successfully!")
+                            st.rerun()
+                    except Exception as e:
+                        st.error(f"Error creating domain: {str(e)}")
+                else:
+                    st.warning("Please enter a domain name")
+        
+        # List all domains
+        st.subheader("📁 Existing Domains")
+        domains = get_all_domains()
+        
+        if not domains:
+            st.info("No domains found. Create one to get started!")
+        else:
+            for domain in domains:
+                doc_count = get_collection().count_documents({"type": "document", "domain": domain["name"]})
+                st.write(f"**{domain['name']}** ({doc_count} docs)")
+    
+    # Main content area - tabs for different operations
+    tab1, tab2, tab3, tab4 = st.tabs(["📤 Upload Documents", "📋 Manage Documents", "🧠 Knowledge Base", "🗑️ Delete Domain"])
+    
+    # Tab 1: Upload Documents
+    with tab1:        
+        domains = get_all_domains()
+        if not domains:
+            st.warning("⚠️ Please create a domain first before uploading documents!")
+        else:
+            domain_names = [d["name"] for d in domains]
+            selected_domain = st.selectbox("Select Domain", domain_names, key="upload_domain")
+            
+            doc_type = st.radio("Document Type", ["PDF File", "URL"], horizontal=True)
+            
+            if doc_type == "PDF File":
+                st.subheader("📄 Upload PDF")
+                uploaded_files = st.file_uploader(
+                    "Choose PDF file(s)",
+                    type=["pdf"],
+                    accept_multiple_files=True,
+                    key="pdf_uploader"
+                )
+                
+                pdf_description = st.text_area("Description (optional)", key="pdf_desc")
+                pdf_tags = st.text_input("Tags (comma-separated)", key="pdf_tags")
+                
+                if st.button("Upload PDF(s)", type="primary"): 
+                    if uploaded_files:
+                        tags_list = [tag.strip() for tag in pdf_tags.split(",")] if pdf_tags else []
+                        
+                        # Check for duplicates before uploading
+                        duplicates = []
+                        files_to_upload = []
+                        
+                        for pdf_file in uploaded_files:
+                            pdf_content = pdf_file.read()
+                            pdf_file.seek(0)  # Reset file pointer for actual upload
+                            
+                            dup_check = check_duplicate_pdf(selected_domain, pdf_file.name, pdf_content)
+                            
+                            if dup_check["is_duplicate"]:
+                                duplicates.append({
+                                    "name": pdf_file.name,
+                                    "type": dup_check["duplicate_type"],
+                                    "existing": dup_check["existing_doc"]
+                                })
+                            else:
+                                files_to_upload.append(pdf_file)
+                        
+                        # Show duplicate warnings
+                        if duplicates:
+                            st.warning(f"⚠️ Found {len(duplicates)} duplicate(s):")
+                            for dup in duplicates:
+                                if dup["type"] == "filename":
+                                    st.write(f"- **{dup['name']}**: File with same name already exists")
+                                else:
+                                    st.write(f"- **{dup['name']}**: File with identical content already exists as '{dup['existing']['filename']}'")
+                        
+                        # Upload non-duplicate files
+                        if files_to_upload:
+                            progress_bar = st.progress(0)
+                            uploaded_count = 0
+                            
+                            for idx, pdf_file in enumerate(files_to_upload):
+                                try:
+                                    upload_pdf_to_mongodb(selected_domain, pdf_file, pdf_description, tags_list)
+                                    uploaded_count += 1
+                                    progress_bar.progress((idx + 1) / len(files_to_upload))
+                                except Exception as e:
+                                    st.error(f"Error uploading {pdf_file.name}: {str(e)}")
+                            
+                            if uploaded_count > 0:
+                                st.success(f"✅ Successfully uploaded {uploaded_count} PDF(s) to domain '{selected_domain}'!")
+                                time.sleep(2)
+                            st.rerun()
+                        elif not files_to_upload and duplicates:
+                            st.info("No new files to upload. All files are duplicates.")
+                    else:
+                        st.warning("Please select at least one PDF file")
+            
+            else:  # URL
+                st.subheader("🔗 Add URL")
+                url_input = st.text_input("URL", key="url_input")
+                url_title = st.text_input("Title (optional)", key="url_title")
+                url_description = st.text_area("Description (optional)", key="url_desc")
+                url_tags = st.text_input("Tags (comma-separated)", key="url_tags")
+                
+                if st.button("Add URL", type="primary"):
+                    if url_input:
+                        # Check for duplicate URL
+                        dup_check = check_duplicate_url(selected_domain, url_input)
+                        
+                        if dup_check["is_duplicate"]:
+                            existing_doc = dup_check["existing_doc"]
+                            st.warning(f"⚠️ This URL already exists in domain '{selected_domain}'")
+                            st.write(f"**Title:** {existing_doc.get('title', 'N/A')}")
+                            st.write(f"**Added:** {existing_doc.get('uploaded_at').strftime('%Y-%m-%d %H:%M')}")
+                            st.info("URL was not added again.")
+                        else:
+                            try:
+                                tags_list = [tag.strip() for tag in url_tags.split(",")] if url_tags else []
+                                add_url_to_mongodb(selected_domain, url_input, url_title, url_description, tags_list)
+                                st.success(f"✅ URL added successfully to domain '{selected_domain}'!")
+                                time.sleep(2)
+                                st.rerun()
+                            except Exception as e:
+                                st.error(f"Error adding URL: {str(e)}")
+                    else:
+                        st.warning("Please enter a URL")
+    
+    # Tab 2: View & Manage
+    with tab2:        
+        domains = get_all_domains()
+        if not domains:
+            st.info("No domains found.")
+        else:
+            domain_names = [d["name"] for d in domains]
+            view_domain = st.selectbox("Select Domain to View", domain_names, key="view_domain")
+            
+            if view_domain:
+                documents = get_documents_by_domain(view_domain)
+                
+                if not documents:
+                    st.info(f"No documents found in domain '{view_domain}'")
+                else:
+                    st.write(f"**{len(documents)} document(s) in '{view_domain}'**")
+                    
+                    # Filter options
+                    col1, col2, col3 = st.columns([2, 1, 1])
+                    with col1:
+                        search_query = st.text_input("🔍 Search documents", key="search_docs")
+                    with col2:
+                        filter_type = st.selectbox("Filter by type", ["All", "PDF", "URL"], key="filter_type")
+                    with col3:
+                        # Get all unique tags
+                        all_tags = set()
+                        for doc in documents:
+                            if doc.get("tags"):
+                                all_tags.update(doc.get("tags"))
+                        tag_options = ["All"] + sorted(list(all_tags))
+                        filter_tag = st.selectbox("Filter by tag", tag_options, key="filter_tag")
+                    
+                    # Apply filters
+                    filtered_docs = documents
+                    if search_query:
+                        filtered_docs = [
+                            doc for doc in filtered_docs
+                            if search_query.lower() in doc.get("filename", "").lower()
+                            or search_query.lower() in doc.get("title", "").lower()
+                            or search_query.lower() in doc.get("description", "").lower()
+                        ]
+                    
+                    if filter_type != "All":
+                        filtered_docs = [doc for doc in filtered_docs if doc.get("doc_type") == filter_type.lower()]
+                    
+                    if filter_tag != "All":
+                        filtered_docs = [doc for doc in filtered_docs if doc.get("tags") and filter_tag in doc.get("tags")]
+                    
+                    st.write(f"Showing {len(filtered_docs)} document(s)")
+                    
+                    # Display documents
+                    for doc in filtered_docs:
+                        with st.container():
+                            col1, col2, col3 = st.columns([5, 2, 2])
+                            
+                            with col1:
+                                if doc.get("doc_type") == "pdf":
+                                    st.write(f"📄 **{doc.get('filename')}**")
+                                    size_mb = doc.get('size', 0) / (1024 * 1024)
+                                    st.caption(f"Size: {size_mb:.2f} MB")
+                                else:
+                                    st.write(f"🔗 **{doc.get('title', 'Untitled')}**")
+                                    st.caption(f"URL: {doc.get('url')}")
+                                
+                                if doc.get("description"):
+                                    st.write(doc.get("description"))
+                                
+                                if doc.get("tags"):
+                                    tags_html = " ".join([f"`{tag}`" for tag in doc.get("tags")])
+                                    st.markdown(tags_html)
+                            
+                            with col2:
+                                st.caption(f"Uploaded: {doc.get('uploaded_at').strftime('%Y-%m-%d %H:%M')}")
+                            
+                            with col3:
+                                # For PDFs: Preview, Download, Delete buttons
+                                if doc.get("doc_type") == "pdf":
+                                    button_col1, button_col2, button_col3 = st.columns(3)
+                                    
+                                    with button_col1:
+                                        if st.button("👁️", key=f"preview_{doc['_id']}", help="Preview PDF"):
+                                            st.session_state[f"show_preview_{doc['_id']}"] = True
+                                            st.rerun()
+                                    
+                                    with button_col2:
+                                        pdf_data = download_pdf_from_mongodb(str(doc['_id']))
+                                        if pdf_data:
+                                            st.download_button(
+                                                label="⬇️",
+                                                data=pdf_data["content"],
+                                                file_name=pdf_data["filename"],
+                                                mime="application/pdf",
+                                                key=f"download_{doc['_id']}",
+                                                help="Download PDF"
+                                            )
+                                    
+                                    with button_col3:
+                                        if st.button("🗑️", key=f"delete_{doc['_id']}", help="Delete document"):
+                                            try:
+                                                delete_document(str(doc['_id']))
+                                                st.success("Deleted!")
+                                                st.rerun()
+                                            except Exception as e:
+                                                st.error(f"Error: {str(e)}")
+                                else:
+                                    # For URLs: Only delete button
+                                    if st.button("🗑️", key=f"delete_{doc['_id']}", help="Delete document"):
+                                        try:
+                                            delete_document(str(doc['_id']))
+                                            st.success("Deleted!")
+                                            st.rerun()
+                                        except Exception as e:
+                                            st.error(f"Error: {str(e)}")
+                            
+                            st.divider()
+                    
+                    # Handle PDF preview dialogs
+                    for doc in filtered_docs:
+                        if doc.get("doc_type") == "pdf" and st.session_state.get(f"show_preview_{doc['_id']}", False):
+                            @st.dialog(f"Preview: {doc.get('filename')}", width="large")
+                            def show_pdf_preview():
+                                import base64
+                                
+                                pdf_data = download_pdf_from_mongodb(str(doc['_id']))
+                                if pdf_data:
+                                    # Add download button at the top
+                                    st.download_button(
+                                        label="⬇️ Download PDF",
+                                        data=pdf_data["content"],
+                                        file_name=pdf_data["filename"],
+                                        mime="application/pdf",
+                                        key=f"dialog_download_{doc['_id']}",
+                                        use_container_width=True
+                                    )
+                                    
+                                    # Encode PDF to base64 for embedding
+                                    base64_pdf = base64.b64encode(pdf_data["content"]).decode('utf-8')
+                                    
+                                    # Display PDF using iframe with base64 data (reduced height)
+                                    pdf_display = f'''
+                                    <iframe src="data:application/pdf;base64,{base64_pdf}" 
+                                            width="100%" 
+                                            height="600px" 
+                                            type="application/pdf"
+                                            style="border: none;">
+                                    </iframe>
+                                    '''
+                                    st.markdown(pdf_display, unsafe_allow_html=True)
+                                else:
+                                    st.error("Failed to load PDF")
+                            
+                            show_pdf_preview()
+    
+    # Tab 3: Knowledge Base
+    with tab3:        
+        domains = get_all_domains()
+        if not domains:
+            st.warning("No domains found. Create a domain and add documents first.")
+        else:
+            domain_names = [d["name"] for d in domains]
+            kb_domain = st.selectbox("Select Domain", domain_names, key="kb_domain")
+            
+            if kb_domain:
+                # Check if KB already exists
+                kb_info = get_knowledge_base_info(kb_domain)
+                
+                if kb_info:
+                    kb_status = kb_info.get("status", "completed")
+                    
+                    if kb_status == "completed":
+                        st.success(f"✅ Knowledge base exists for domain '{kb_domain}'")
+                    elif kb_status == "creating":
+                        st.warning(f"⏳ Knowledge base is being created for domain '{kb_domain}'. This may take several minutes...")
+                    elif kb_status == "error":
+                        error_msg = kb_info.get("error_message", "Unknown error")
+                        st.error(f"❌ Knowledge base creation failed: {error_msg}")
+                    
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.metric("Created", kb_info.get("created_at").strftime("%Y-%m-%d %H:%M"))
+                        st.metric("Documents Processed", kb_info.get("document_count", 0))
+                        if kb_status == "completed" and kb_info.get("completed_at"):
+                            st.metric("Completed", kb_info.get("completed_at").strftime("%Y-%m-%d %H:%M"))
+                    with col2:
+                        st.metric("Provider", kb_info.get("provider", "N/A"))
+                        st.metric("Model", kb_info.get("model", "N/A"))
+                        st.metric("Status", kb_status.upper())
+                    
+                    st.write(f"**URLs:** {kb_info.get('url_count', 0)}")
+                    st.write(f"**PDFs:** {kb_info.get('pdf_count', 0)}")
+                    st.write(f"**Embedding:** {kb_info.get('embedding', 'N/A')}")
+                    st.write(f"**Folder:** `{kb_info.get('folder', 'N/A')}`")
+                    
+                    st.divider()
+                    
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        if st.button("🔄 Rebuild Knowledge Base", type="secondary", use_container_width=True):
+                            st.session_state["rebuild_kb"] = True
+                            st.rerun()
+                    with col2:
+                        if st.button("🗑️ Delete Knowledge Base", type="secondary", use_container_width=True):
+                            try:
+                                delete_knowledge_base(kb_domain)
+                                st.success("Knowledge base deleted!")
+                                st.rerun()
+                            except Exception as e:
+                                st.error(f"Error deleting KB: {str(e)}")
+                else:
+                    st.warning(f"No knowledge base found for domain '{kb_domain}'")
+                
+                # Show KB creation form
+                kb_is_creating = kb_info and kb_info.get("status") == "creating"
+                
+                if kb_is_creating and not st.session_state.get("rebuild_kb", False):
+                    st.divider()
+                    st.info("⏳ A knowledge base is currently being created. Please wait for it to complete.")
+                    
+                    # Display logs from database
+                    if kb_info.get("logs"):
+                        st.text_area("Progress Log", "\n".join(kb_info.get("logs")), height=300, key="kb_progress_log", disabled=True)
+                        
+                        # Auto-refresh every 5 seconds if creating
+                        time.sleep(5)
+                        st.rerun()
+                    
+                    if st.button("🔄 Refresh Status", key="refresh_kb_status"):
+                        st.rerun()
+                elif not kb_info or st.session_state.get("rebuild_kb", False):
+                    st.divider()
+                    st.subheader("Create Knowledge Base")
+                    
+                    # Count documents
+                    documents = get_documents_by_domain(kb_domain)
+                    url_count = sum(1 for doc in documents if doc.get("doc_type") == "url")
+                    pdf_count = sum(1 for doc in documents if doc.get("doc_type") == "pdf")
+                    
+                    if not documents:
+                        st.error("No documents found in this domain. Add some documents first!")
+                    else:
+                        st.info(f"This domain contains {len(documents)} document(s): {url_count} URL(s) and {pdf_count} PDF(s)")
+                        
+                        # Configuration
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            kb_provider = st.selectbox("Provider", ["openai", "ollama"], key="kb_provider")
+                        with col2:
+                            if kb_provider == "ollama":
+                                available_models = cached_get_ollama_models()
+                                kb_model = st.selectbox("Model", available_models, key="kb_model")
+                                available_embeddings = cached_get_ollama_embeddings()
+                                default_emb_idx = available_embeddings.index("mxbai-embed-large") if "mxbai-embed-large" in available_embeddings else 0
+                                kb_embedding = st.selectbox("Embedding", available_embeddings, index=default_emb_idx, key="kb_embedding")
+                            else:
+                                kb_model = st.selectbox("Model", ["gpt-5-nano", "gpt-4.1-nano", "gpt-4o-mini-2024-07-18"], key="kb_model", index=1)
+                                kb_embedding = st.selectbox("Embedding", ["text-embedding-3-small", "text-embedding-3-large"], key="kb_embedding")
+                        
+                        # Performance options
+                        with st.expander("⚡ Performance Options", expanded=False):
+                            enable_parallel = st.checkbox(
+                                "Enable Parallel Processing", 
+                                value=True, 
+                                help="Process multiple LLM calls and embeddings in parallel for faster KB creation. Recommended for most cases."
+                            )
+                            max_workers = st.slider(
+                                "Max Workers", 
+                                min_value=1, 
+                                max_value=8, 
+                                value=4, 
+                                help="Number of parallel workers. Higher values = faster but more resource intensive. Recommended: 4-8"
+                            )
+                            if not enable_parallel:
+                                st.info("💡 Parallel processing is disabled. KB creation will be slower but use less resources.")
+                        
+                        if st.button("🚀 Create Knowledge Base", type="primary", use_container_width=True):
+                            # Create a progress container
+                            log_placeholder = st.empty()
+                            
+                            logs = []
+                            def log_progress(message):
+                                logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {message}")
+                                log_placeholder.text_area("Progress Log", "\n".join(logs), height=200)
+                            
+                            # Use background thread so user can switch tabs
+                            result = build_knowledge_base_for_domain(
+                                kb_domain,
+                                provider=kb_provider,
+                                model=kb_model,
+                                embedding=kb_embedding,
+                                progress_callback=log_progress,
+                                use_background_thread=True,
+                                max_workers=max_workers,
+                                enable_parallel=enable_parallel
+                            )
+                            
+                            
+                            if result["status"] == "success":
+                                st.success(result["message"])
+                                st.info("💡 The knowledge base is being created in the background. You can safely switch tabs and check back later.")
+                                st.session_state["rebuild_kb"] = False
+                                st.rerun()
+                            else:
+                                st.error(result["message"])
+    
+    # Tab 4: Delete Domain
+    with tab4:
+        st.warning("⚠️ **Warning:** Deleting a domain will permanently remove it and all its documents!")
+        
+        domains = get_all_domains()
+        if not domains:
+            st.info("No domains to delete.")
+        else:
+            domain_names = [d["name"] for d in domains]
+            delete_domain_name = st.selectbox("Select Domain to Delete", domain_names, key="delete_domain")
+            
+            if delete_domain_name:
+                doc_count = get_collection().count_documents({"type": "document", "domain": delete_domain_name})
+                st.error(f"This will delete domain '{delete_domain_name}' and its {doc_count} document(s).")
+                
+                confirm_text = st.text_input(f"Type '{delete_domain_name}' to confirm deletion:", key="confirm_delete")
+                
+                if st.button("Delete Domain Permanently", type="primary"):
+                    if confirm_text == delete_domain_name:
+                        try:
+                            delete_domain(delete_domain_name)
+                            st.success(f"Domain '{delete_domain_name}' deleted successfully!")
+                            st.rerun()
+                        except Exception as e:
+                            st.error(f"Error deleting domain: {str(e)}")
+                    else:
+                        st.error("Domain name doesn't match. Deletion cancelled.")
+
+
+# Main application with navigation
+def main():
+    """Main application entry point with page navigation."""
+    st.set_page_config(
+        page_title="Energenius Platform",
+        page_icon="⚡",
+        layout="wide"
+    )
+    
+    # Hide Streamlit menu and reduce top padding
+    hide_streamlit_style = """
+        <style>
+        #MainMenu {visibility: hidden;}
+        header {visibility: hidden;}
+        .block-container {
+            padding-top: 1rem;
+            padding-bottom: 0rem;
+        }
+        </style>
+    """
+    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+    
+    # Create navigation buttons
+    col1, col2, col3 = st.columns([1, 1, 4])
+    
+    with col1:
+        if st.button("💬 GURU Chat", use_container_width=True, type="primary" if st.session_state.get("page", "guru") == "guru" else "secondary"):
+            st.session_state["page"] = "guru"
+            st.rerun()
+    
+    with col2:
+        if st.button("📚 Document Manager", use_container_width=True, type="primary" if st.session_state.get("page", "guru") == "documents" else "secondary"):
+            st.session_state["page"] = "documents"
+            st.rerun()
+    
+    st.divider()
+    
+    # Initialize page selection
+    if "page" not in st.session_state:
+        st.session_state["page"] = "guru"
+    
+    # Render the selected page
+    if st.session_state["page"] == "guru":
+        render_guru_page()
+    else:
+        render_document_manager_page()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/energy_bench.py b/energy_bench.py
index 995319f..96b8f24 100644
--- a/energy_bench.py
+++ b/energy_bench.py
@@ -1,10 +1,14 @@
 """Module to benchmark the energy consumption of the Guru orchestrator."""
 
+import os
 import pandas as pd
+from dotenv import load_dotenv
 
 from benchmark import Benchmark
 from orchestrator import Guru
-from private_settings import PRIVATE_SETINGS
+
+# Load environment variables from .env file
+load_dotenv()
 
 
 if __name__ == "__main__":
@@ -12,7 +16,8 @@
     dataset = pd.read_csv("benchmark/backup/DatasetQA.csv")
 
     # Create the Guru instance
-    if PRIVATE_SETINGS["LLM_LOCAL"]:
+    llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+    if llm_local:
         guru = Guru("ollama", "gpt-oss:120b", "mxbai-embed-large", "english", 0, "Italy")
     else:
         guru = Guru("openai", "gpt-4", "text-embedding-3-small", "english", 0, "Italy")
diff --git a/kb_creator.py b/kb_creator.py
index 7ec13db..78fe57a 100644
--- a/kb_creator.py
+++ b/kb_creator.py
@@ -1,12 +1,16 @@
 """Knowledge Base Creator"""
 
+import os
+from dotenv import load_dotenv
 from knowledge_base import KnowledgeExtractor
 
-from private_settings import PRIVATE_SETINGS
+# Load environment variables from .env file
+load_dotenv()
 
 # Creating and running the knowledge base class based on the environment
-if PRIVATE_SETINGS["LLM_LOCAL"]:
-    ke = KnowledgeExtractor("ollama", "gpt-oss", "mxbai-embed-large")
+llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+if llm_local:
+    ke = KnowledgeExtractor("ollama", "gemma3:12b-it-qat", "mxbai-embed-large")
 else:
     # Online
     ke = KnowledgeExtractor("openai", "gpt-4", "text-embedding-3-small")
@@ -18,4 +22,6 @@
         "https://www.agenziaentrate.gov.it/portale/web/guest/aree-tematiche/casa/agevolazioni/bonus-mobili-ed-elettrodomestici",
         "https://italiainclassea.enea.it/le-tecnologie/",
     ],
+    load_cached_docs=True,
+    load_cached_preprocessed_chunks=True
 )
diff --git a/kb_creator_Europe.py b/kb_creator_Europe.py
index f04e6f8..50e06d5 100644
--- a/kb_creator_Europe.py
+++ b/kb_creator_Europe.py
@@ -1,11 +1,15 @@
 """Knowledge Base Creator"""
 
+import os
+from dotenv import load_dotenv
 from knowledge_base import KnowledgeExtractor
 
-from private_settings import PRIVATE_SETINGS
+# Load environment variables from .env file
+load_dotenv()
 
 # Creating and running the knowledge base class based on the environment
-if PRIVATE_SETINGS["LLM_LOCAL"]:
+llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+if llm_local:
     ke = KnowledgeExtractor("ollama", "gpt-oss:120b", "mxbai-embed-large")
 else:
     # Online
diff --git a/kb_creator_Italy.py b/kb_creator_Italy.py
index 392c4f0..a8c6903 100644
--- a/kb_creator_Italy.py
+++ b/kb_creator_Italy.py
@@ -1,11 +1,15 @@
 """Knowledge Base Creator"""
 
+import os
+from dotenv import load_dotenv
 from knowledge_base import KnowledgeExtractor
 
-from private_settings import PRIVATE_SETINGS
+# Load environment variables from .env file
+load_dotenv()
 
 # Creating and running the knowledge base class based on the environment
-if PRIVATE_SETINGS["LLM_LOCAL"]:
+llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+if llm_local:
     ke = KnowledgeExtractor("ollama", "gpt-oss:120b", "mxbai-embed-large")
 else:
     # Online
diff --git a/kb_creator_Switzerland.py b/kb_creator_Switzerland.py
index 627bd90..c83be84 100644
--- a/kb_creator_Switzerland.py
+++ b/kb_creator_Switzerland.py
@@ -1,11 +1,15 @@
 """Knowledge Base Creator"""
 
+import os
+from dotenv import load_dotenv
 from knowledge_base import KnowledgeExtractor
 
-from private_settings import PRIVATE_SETINGS
+# Load environment variables from .env file
+load_dotenv()
 
 # Creating and running the knowledge base class based on the environment
-if PRIVATE_SETINGS["LLM_LOCAL"]:
+llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+if llm_local:
     ke = KnowledgeExtractor("ollama", "gpt-oss:120b", "mxbai-embed-large")
 else:
     # Online
diff --git a/knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/data_level0.bin b/knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/data_level0.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/data_level0.bin
diff --git a/knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/header.bin b/knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/header.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/header.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/header.bin
diff --git a/knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/length.bin b/knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/length.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/length.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/length.bin
diff --git a/knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/link_lists.bin b/knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/link_lists.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/35d3a63a-a234-48ab-bda8-430f4bd7c0f7/link_lists.bin
diff --git a/knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/data_level0.bin b/knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/data_level0.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/data_level0.bin
diff --git a/knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/header.bin b/knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/header.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/header.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/header.bin
diff --git a/knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/length.bin b/knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/length.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/length.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/length.bin
diff --git a/knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/link_lists.bin b/knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/link_lists.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/52d84313-b357-4a79-8d9e-978cf10dd750/link_lists.bin
diff --git a/knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/data_level0.bin b/knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/data_level0.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/data_level0.bin
diff --git a/knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/header.bin b/knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/header.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/header.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/header.bin
diff --git a/knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/length.bin b/knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/length.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/length.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/length.bin
diff --git a/knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/link_lists.bin b/knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/link_lists.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/59f240be-98ad-4310-bd23-aa8c690b72e2/link_lists.bin
diff --git a/knowledge_base/files_Europe/chroma_db/chroma.sqlite3 b/knowledge_base/kbs/files_Europe/chroma_db/chroma.sqlite3
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/chroma.sqlite3
rename to knowledge_base/kbs/files_Europe/chroma_db/chroma.sqlite3
diff --git a/knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/data_level0.bin b/knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/data_level0.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/data_level0.bin
diff --git a/knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/header.bin b/knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/header.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/header.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/header.bin
diff --git a/knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/length.bin b/knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/length.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/length.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/length.bin
diff --git a/knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/link_lists.bin b/knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/link_lists.bin
rename to knowledge_base/kbs/files_Europe/chroma_db/f3c723d9-c9da-4bab-8e7f-f22c845a9c85/link_lists.bin
diff --git a/knowledge_base/files_Europe/graph_documents.joblib b/knowledge_base/kbs/files_Europe/graph_documents.joblib
similarity index 100%
rename from knowledge_base/files_Europe/graph_documents.joblib
rename to knowledge_base/kbs/files_Europe/graph_documents.joblib
diff --git a/knowledge_base/files_Europe/preprocessed_chunks.joblib b/knowledge_base/kbs/files_Europe/preprocessed_chunks.joblib
similarity index 100%
rename from knowledge_base/files_Europe/preprocessed_chunks.joblib
rename to knowledge_base/kbs/files_Europe/preprocessed_chunks.joblib
diff --git a/knowledge_base/files_Europe/raw_docs.joblib b/knowledge_base/kbs/files_Europe/raw_docs.joblib
similarity index 100%
rename from knowledge_base/files_Europe/raw_docs.joblib
rename to knowledge_base/kbs/files_Europe/raw_docs.joblib
diff --git a/knowledge_base/files_Europe/rdf_graph.ttl b/knowledge_base/kbs/files_Europe/rdf_graph.ttl
similarity index 100%
rename from knowledge_base/files_Europe/rdf_graph.ttl
rename to knowledge_base/kbs/files_Europe/rdf_graph.ttl
diff --git a/knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/data_level0.bin b/knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/data_level0.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/data_level0.bin
diff --git a/knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/header.bin b/knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/header.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/header.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/header.bin
diff --git a/knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/index_metadata.pickle b/knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/index_metadata.pickle
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/index_metadata.pickle
rename to knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/index_metadata.pickle
diff --git a/knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/length.bin b/knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/length.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/length.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/length.bin
diff --git a/knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/link_lists.bin b/knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/link_lists.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/5c997f27-f387-4015-a27c-27b8cf5ea877/link_lists.bin
diff --git a/knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/data_level0.bin b/knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/data_level0.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/data_level0.bin
diff --git a/knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/header.bin b/knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/header.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/header.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/header.bin
diff --git a/knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/length.bin b/knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/length.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/length.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/length.bin
diff --git a/knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/link_lists.bin b/knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/link_lists.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/72e31084-777b-4f13-b5ec-59a858438f8a/link_lists.bin
diff --git a/knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/data_level0.bin b/knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/data_level0.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/data_level0.bin
diff --git a/knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/header.bin b/knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/header.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/header.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/header.bin
diff --git a/knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/length.bin b/knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/length.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/length.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/length.bin
diff --git a/knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/link_lists.bin b/knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/link_lists.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/a034a1de-d422-487f-8a96-2fb9ff1f64a4/link_lists.bin
diff --git a/knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/data_level0.bin b/knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/data_level0.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/data_level0.bin
diff --git a/knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/header.bin b/knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/header.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/header.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/header.bin
diff --git a/knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/length.bin b/knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/length.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/length.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/length.bin
diff --git a/knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/link_lists.bin b/knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/link_lists.bin
rename to knowledge_base/kbs/files_Generic/chroma_db/af18c57f-035c-4ea1-bb05-e707a5567325/link_lists.bin
diff --git a/knowledge_base/files_Generic/chroma_db/chroma.sqlite3 b/knowledge_base/kbs/files_Generic/chroma_db/chroma.sqlite3
similarity index 100%
rename from knowledge_base/files_Generic/chroma_db/chroma.sqlite3
rename to knowledge_base/kbs/files_Generic/chroma_db/chroma.sqlite3
diff --git a/knowledge_base/files_Generic/graph_documents.joblib b/knowledge_base/kbs/files_Generic/graph_documents.joblib
similarity index 100%
rename from knowledge_base/files_Generic/graph_documents.joblib
rename to knowledge_base/kbs/files_Generic/graph_documents.joblib
diff --git a/knowledge_base/files_Generic/preprocessed_chunks.joblib b/knowledge_base/kbs/files_Generic/preprocessed_chunks.joblib
similarity index 100%
rename from knowledge_base/files_Generic/preprocessed_chunks.joblib
rename to knowledge_base/kbs/files_Generic/preprocessed_chunks.joblib
diff --git a/knowledge_base/files_Generic/raw_docs.joblib b/knowledge_base/kbs/files_Generic/raw_docs.joblib
similarity index 100%
rename from knowledge_base/files_Generic/raw_docs.joblib
rename to knowledge_base/kbs/files_Generic/raw_docs.joblib
diff --git a/knowledge_base/files_Generic/rdf_graph.ttl b/knowledge_base/kbs/files_Generic/rdf_graph.ttl
similarity index 100%
rename from knowledge_base/files_Generic/rdf_graph.ttl
rename to knowledge_base/kbs/files_Generic/rdf_graph.ttl
diff --git a/knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/data_level0.bin b/knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/data_level0.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/data_level0.bin
diff --git a/knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/header.bin b/knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/header.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/header.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/header.bin
diff --git a/knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/index_metadata.pickle b/knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/index_metadata.pickle
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/index_metadata.pickle
rename to knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/index_metadata.pickle
diff --git a/knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/length.bin b/knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/length.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/length.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/length.bin
diff --git a/knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/link_lists.bin b/knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/link_lists.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/125b5628-3aad-4821-9cf0-4d625fde6b6d/link_lists.bin
diff --git a/knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/data_level0.bin b/knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/data_level0.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/data_level0.bin
diff --git a/knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/header.bin b/knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/header.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/header.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/header.bin
diff --git a/knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/index_metadata.pickle b/knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/index_metadata.pickle
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/index_metadata.pickle
rename to knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/index_metadata.pickle
diff --git a/knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/length.bin b/knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/length.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/length.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/length.bin
diff --git a/knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/link_lists.bin b/knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/link_lists.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/313a7e81-1680-4717-b906-a80ff3f06b3d/link_lists.bin
diff --git a/knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/data_level0.bin b/knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/data_level0.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/data_level0.bin
diff --git a/knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/header.bin b/knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/header.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/header.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/header.bin
diff --git a/knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/index_metadata.pickle b/knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/index_metadata.pickle
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/index_metadata.pickle
rename to knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/index_metadata.pickle
diff --git a/knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/length.bin b/knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/length.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/length.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/length.bin
diff --git a/knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/link_lists.bin b/knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/link_lists.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/38827c57-f211-4721-b00d-bca74efe69ac/link_lists.bin
diff --git a/knowledge_base/files_Italy/chroma_db/chroma.sqlite3 b/knowledge_base/kbs/files_Italy/chroma_db/chroma.sqlite3
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/chroma.sqlite3
rename to knowledge_base/kbs/files_Italy/chroma_db/chroma.sqlite3
diff --git a/knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/data_level0.bin b/knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/data_level0.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/data_level0.bin
diff --git a/knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/header.bin b/knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/header.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/header.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/header.bin
diff --git a/knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/length.bin b/knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/length.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/length.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/length.bin
diff --git a/knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/link_lists.bin b/knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/link_lists.bin
rename to knowledge_base/kbs/files_Italy/chroma_db/eb1d395e-7e32-42ec-907d-732e365f958d/link_lists.bin
diff --git a/knowledge_base/files_Italy/graph_documents.joblib b/knowledge_base/kbs/files_Italy/graph_documents.joblib
similarity index 100%
rename from knowledge_base/files_Italy/graph_documents.joblib
rename to knowledge_base/kbs/files_Italy/graph_documents.joblib
diff --git a/knowledge_base/files_Italy/preprocessed_chunks.joblib b/knowledge_base/kbs/files_Italy/preprocessed_chunks.joblib
similarity index 100%
rename from knowledge_base/files_Italy/preprocessed_chunks.joblib
rename to knowledge_base/kbs/files_Italy/preprocessed_chunks.joblib
diff --git a/knowledge_base/files_Italy/raw_docs.joblib b/knowledge_base/kbs/files_Italy/raw_docs.joblib
similarity index 100%
rename from knowledge_base/files_Italy/raw_docs.joblib
rename to knowledge_base/kbs/files_Italy/raw_docs.joblib
diff --git a/knowledge_base/files_Italy/rdf_graph.ttl b/knowledge_base/kbs/files_Italy/rdf_graph.ttl
similarity index 100%
rename from knowledge_base/files_Italy/rdf_graph.ttl
rename to knowledge_base/kbs/files_Italy/rdf_graph.ttl
diff --git a/knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/data_level0.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/data_level0.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/data_level0.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/header.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/header.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/header.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/header.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/index_metadata.pickle b/knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/index_metadata.pickle
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/index_metadata.pickle
rename to knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/index_metadata.pickle
diff --git a/knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/length.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/length.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/length.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/length.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/link_lists.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/link_lists.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/0f03843a-fd43-4d27-8b2d-5a2bd81df494/link_lists.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/data_level0.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/data_level0.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/data_level0.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/header.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/header.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/header.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/header.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/length.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/length.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/length.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/length.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/link_lists.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/link_lists.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/984bbfa9-2b07-4d94-a385-ace07e7e2ebd/link_lists.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/chroma.sqlite3 b/knowledge_base/kbs/files_Switzerland/chroma_db/chroma.sqlite3
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/chroma.sqlite3
rename to knowledge_base/kbs/files_Switzerland/chroma_db/chroma.sqlite3
diff --git a/knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/data_level0.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/data_level0.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/data_level0.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/header.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/header.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/header.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/header.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/length.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/length.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/length.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/length.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/link_lists.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/link_lists.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e2420058-0eed-403c-970c-9ca2d361f757/link_lists.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/data_level0.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/data_level0.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/data_level0.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/data_level0.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/header.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/header.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/header.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/header.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/length.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/length.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/length.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/length.bin
diff --git a/knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/link_lists.bin b/knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/link_lists.bin
similarity index 100%
rename from knowledge_base/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/link_lists.bin
rename to knowledge_base/kbs/files_Switzerland/chroma_db/e9ff614d-0eb3-4162-8ebf-5d9f8e021cab/link_lists.bin
diff --git a/knowledge_base/files_Switzerland/graph_documents.joblib b/knowledge_base/kbs/files_Switzerland/graph_documents.joblib
similarity index 100%
rename from knowledge_base/files_Switzerland/graph_documents.joblib
rename to knowledge_base/kbs/files_Switzerland/graph_documents.joblib
diff --git a/knowledge_base/files_Switzerland/preprocessed_chunks.joblib b/knowledge_base/kbs/files_Switzerland/preprocessed_chunks.joblib
similarity index 100%
rename from knowledge_base/files_Switzerland/preprocessed_chunks.joblib
rename to knowledge_base/kbs/files_Switzerland/preprocessed_chunks.joblib
diff --git a/knowledge_base/files_Switzerland/raw_docs.joblib b/knowledge_base/kbs/files_Switzerland/raw_docs.joblib
similarity index 100%
rename from knowledge_base/files_Switzerland/raw_docs.joblib
rename to knowledge_base/kbs/files_Switzerland/raw_docs.joblib
diff --git a/knowledge_base/files_Switzerland/rdf_graph.ttl b/knowledge_base/kbs/files_Switzerland/rdf_graph.ttl
similarity index 100%
rename from knowledge_base/files_Switzerland/rdf_graph.ttl
rename to knowledge_base/kbs/files_Switzerland/rdf_graph.ttl
diff --git a/knowledge_base/knowledge_extractor.py b/knowledge_base/knowledge_extractor.py
index e488ada..8a5fcbf 100644
--- a/knowledge_base/knowledge_extractor.py
+++ b/knowledge_base/knowledge_extractor.py
@@ -3,17 +3,18 @@
 import hashlib
 import os
 import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import joblib
 import numpy as np
 from langchain.embeddings import init_embeddings
-from langchain_community.document_loaders import AsyncHtmlLoader, PyPDFLoader
-from langchain_community.document_transformers import Html2TextTransformer
+from langchain_community.document_loaders import AsyncHtmlLoader
+from langchain_community.graphs.graph_document import GraphDocument
 from langchain_core.documents import Document
 from langchain_experimental.graph_transformers import LLMGraphTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_experimental.text_splitter import SemanticChunker
-from rdflib import FOAF, OWL, RDF, RDFS, XSD, BNode, Graph, Literal, Namespace, URIRef
+from rdflib import RDF, XSD, BNode, Literal
 from sklearn.metrics.pairwise import cosine_similarity
 from tqdm import tqdm
 import chromadb
@@ -26,32 +27,35 @@
 
 from llm import LLMHandler
 
-from .utils.graph_prompt import entities_comparator, extract_descriptions_for_entities, extract_descriptions_for_triples, representative_entity_selector, translate_chunk, summarize_chunk
-from .utils.graph_helpers import process_name_or_relationship, normalize_l2, sparql_query
+from .utils.graph_prompt import entities_comparator, extract_descriptions_for_entities, extract_descriptions_for_triples, translate_chunk, summarize_chunk
+from .utils.graph_helpers import process_name_or_relationship
 from .utils.energenius_graph import EnergeniusGraph
 
-from itertools import permutations
-
 from bs4 import BeautifulSoup
 
 
 class KnowledgeExtractor:
     """_Class to create a knowledge base from a text files._"""
 
-    def __init__(self, provider: str, model: str, embedding: str):
+    def __init__(self, provider: str, model: str, embedding: str, max_workers: int = 4):
         """_Initialize the KnowledgeExtractor._
         Args:
             provider (str): _Description of the model provider._
             model (str): _Description of the model name._
             embedding (str): _Description of the embedding model name._
+            max_workers (int): _Maximum number of parallel workers for LLM calls (default: 4)._
         """
 
+        # Set temperature based on model - gpt-5 models require temperature=1.0
+        temperature = 1.0 if model.startswith("gpt-5") else 0.0
+
         # Initialize the LLMHandler and embedding model.
         self.llm_handler = LLMHandler(
-            provider=provider, model=model, temperature=0.0, language=None, keep_history=False
+            provider=provider, model=model, temperature=temperature, language=None, keep_history=False
         )
 
         self.embeddings = init_embeddings(model=embedding, provider=provider)
+        self.max_workers = max_workers
 
         self.llm_graph_transformer = LLMGraphTransformer(
             llm=self.llm_handler.get_model(),
@@ -83,6 +87,125 @@ def _get_first_sentence(self, text):
         sentences = re.split(r'(?<=[.!?])\s+', text.strip())
         return sentences[0] if sentences else ''
     
+    def _process_chunk_parallel(self, i, chunk, prev_content, next_content):
+        """Process a single chunk for translation and summarization (parallel worker)."""
+        try:
+            if "language" not in chunk.metadata.keys():
+                chunk.metadata["language"] = "na"
+                
+            # Translation
+            if "en" not in chunk.metadata["language"].lower():
+                chunk.page_content = self._strip_quotes(
+                    self.llm_handler.generate_response(translate_chunk(), f"{chunk.page_content}", False)
+                )
+            
+            # Summarization
+            curr = chunk.page_content
+            context = "\n".join(filter(None, [
+                self._get_last_sentence(prev_content) if prev_content else None,
+                curr,
+                self._get_first_sentence(next_content) if next_content else None
+            ]))
+            chunk.page_content = self._strip_quotes(
+                self.llm_handler.generate_response(summarize_chunk(), context, False)
+            ).replace("\n\n", "\n")
+            
+            return i, chunk
+        except Exception as e:
+            print(f"\n[Warning] Failed to process chunk {i}: {str(e)}")
+            return i, chunk  # Return original chunk on error
+    
+    def _convert_to_graph_parallel(self, doc, max_retries=2):
+        """Convert a single document to graph format (parallel worker) with retry logic and timeout."""
+        for attempt in range(max_retries + 1):
+            try:
+                result = self.llm_graph_transformer.convert_to_graph_documents([doc])[0]
+                return result
+                
+            except Exception as e:
+                error_msg = str(e)
+                
+                # Check if it's a token limit error
+                if "length limit was reached" in error_msg or "completion_tokens" in error_msg:
+                    if attempt < max_retries:
+                        # Try splitting the chunk further
+                        current_size = len(doc.page_content)
+                        new_size = int(current_size * 0.5)  # Reduce by 50%
+                        
+                        print(f"\n[Warning] Chunk too large ({current_size} chars), splitting into smaller pieces (target: {new_size} chars)...")
+                        
+                        splitter = RecursiveCharacterTextSplitter(
+                            chunk_size=new_size,
+                            chunk_overlap=0,
+                            length_function=len,
+                            separators=["\n\n", "\n", ". ", " ", ""]
+                        )
+                        
+                        # Split and take only the first sub-chunk to avoid duplication
+                        sub_chunks = splitter.split_text(doc.page_content)
+                        if sub_chunks:
+                            doc = doc.model_copy(update={"page_content": sub_chunks[0]})
+                            continue
+                
+                # If not a token limit error or final attempt, return empty
+                if attempt == max_retries:
+                    print(f"\n[Error] Failed to convert chunk after {max_retries + 1} attempts: {error_msg}")
+                    return GraphDocument(nodes=[], relationships=[], source=doc)
+                    
+        # Fallback (should not reach here)
+        return GraphDocument(nodes=[], relationships=[], source=doc)
+    
+    def _generate_triple_description_parallel(self, row):
+        """Generate description for a single triple (parallel worker)."""
+        try:
+            chunk = f"{row['prev_chunk_content']}\n\n{row['chunk_content']}\n\n{row['next_chunk_content']}"
+            description = self._strip_quotes(
+                self.llm_handler.generate_response(
+                    extract_descriptions_for_triples(f"{chunk}"), 
+                    f"{row['source_entity_name']} {row['relationship_name']} {row['target_entity_name']}", 
+                    False
+                )
+            ).replace("\n\n", "\n")
+            return row['triple'], description
+        except Exception as e:
+            print(f"\n[Warning] Failed to generate triple description: {str(e)}")
+            return row['triple'], ""  # Return empty description on error
+    
+    def _generate_entity_description_parallel(self, row, graph):
+        """Generate description for a single entity (parallel worker)."""
+        try:
+            types = graph.get_types(row["entity"])
+            entity_description_from_triples = '\n'.join(f'{row["name"]} is {type["name"]}.' for _, type in types.iterrows())
+            entity_description_from_triples += "\n".join(graph.get_entity_triples(row["entity"])["description"])
+            
+            description = self._strip_quotes(
+                self.llm_handler.generate_response(
+                    extract_descriptions_for_entities(f"{entity_description_from_triples}"), 
+                    f"{row['name']}", 
+                    False
+                )
+            ).replace("\n\n", "\n")
+            return row["entity"], description
+        except Exception as e:
+            print(f"\n[Warning] Failed to generate entity description: {str(e)}")
+            return row["entity"], ""  # Return empty description on error
+    
+    def _embed_item_parallel(self, item_data, item_type):
+        """Embed a single item (parallel worker)."""
+        try:
+            if item_type == "entity":
+                return item_data["entity"], self.embeddings.embed_query(item_data["name"])
+            elif item_type == "type":
+                return item_data["type"], self.embeddings.embed_query(item_data["name"])
+            elif item_type == "relationship":
+                return item_data["relationship"], self.embeddings.embed_query(item_data["name"])
+            elif item_type == "triple":
+                return item_data["triple"], self.embeddings.embed_query(item_data["description"])
+        except Exception as e:
+            print(f"\n[Warning] Failed to embed {item_type}: {str(e)}")
+            # Return zero vector on error
+            return list(item_data.values())[0], [0.0] * 1536  # Typical embedding dimension
+    
     def __extract_main_content(self, html):
         # Alternatively
         #return Html2TextTransformer().transform_documents(html_docs)
@@ -179,6 +302,7 @@ def run(
         load_cached_triple_descriptions: bool = False,
         load_cached_entity_descriptions: bool = False,
         load_cached_embeddings: bool = False,
+        enable_parallel: bool = True,
     ) -> None:
         """_Main function to create the knowledge base._
         Args:
@@ -191,11 +315,12 @@ def run(
             load_cached_triple_descriptions (bool, optional): Whether to load cached triple descriptions. Defaults to False.
             load_cached_entity_descriptions (bool, optional): Whether to load cached entity descriptions. Defaults to False.
             load_cached_embeddings (bool, optional): Whether to load cached embeddings for the knowledge base. Defaults to False.
+            enable_parallel (bool, optional): Whether to use parallel processing for LLM calls and embeddings. Defaults to True.
         """
 
         # Initialize the variables
         dir_path = os.path.dirname(os.path.realpath(__file__))
-        path = os.path.join(dir_path, folder)
+        path = os.path.join(dir_path, "kbs", folder)
 
         # Checking if files folder is present
         if not os.path.exists(path):
@@ -231,13 +356,20 @@ def run(
             # Load PDF documents and convert to HTML
             for pdf_url in pdf_urls:
                 try:
-                    # Download PDF from URL
-                    response = requests.get(pdf_url, timeout=30)
-                    response.raise_for_status()
+                    # Check if it's a local file path or URL
+                    if os.path.exists(pdf_url):
+                        # Local file path
+                        with open(pdf_url, 'rb') as f:
+                            pdf_content = f.read()
+                    else:
+                        # Download PDF from URL
+                        response = requests.get(pdf_url, timeout=30)
+                        response.raise_for_status()
+                        pdf_content = response.content
 
                     # Extract text with layout preservation as HTML
                     output_string = io.StringIO()
-                    pdf_file = io.BytesIO(response.content)
+                    pdf_file = io.BytesIO(pdf_content)
 
                     extract_text_to_fp(
                         pdf_file, 
@@ -305,9 +437,9 @@ def run(
                 temp_chunks.append(chunk.model_copy(update={"page_content": text}))
         chunks = temp_chunks
         
-        # Size limiter 2
+        # Size limiter 2 - Reduced from 1500 to 1200 to prevent token overflow in graph conversion
         size_limiter = RecursiveCharacterTextSplitter(
-            chunk_size=1500,
+            chunk_size=1200,
             chunk_overlap=0,
             length_function=len,
             separators=[".", "!", "?"],
@@ -342,30 +474,57 @@ def run(
         else:
             preprocessed_chunks = chunks
             
-            for i in tqdm(range(len(preprocessed_chunks)), desc="Translation & summarization of the chunks: "):
-
-                if "language" not in preprocessed_chunks[i].metadata.keys():
-                    preprocessed_chunks[i].metadata["language"] = "na"
+            if enable_parallel and self.max_workers > 1:
+                print(f"Using parallel processing with {self.max_workers} workers for chunk preprocessing")
+                
+                # Process chunks in parallel - pass prev/next content to avoid race conditions
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {
+                        executor.submit(
+                            self._process_chunk_parallel, 
+                            i, 
+                            chunk,
+                            preprocessed_chunks[i - 1].page_content if i > 0 else "",
+                            preprocessed_chunks[i + 1].page_content if i < len(preprocessed_chunks) - 1 else ""
+                        ): i 
+                        for i, chunk in enumerate(preprocessed_chunks)
+                    }
+                    
+                    # Collect results with progress bar with timeout
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc="Translation & summarization of chunks: "):
+                        try:
+                            idx, processed_chunk = future.result(timeout=60)
+                            preprocessed_chunks[idx] = processed_chunk
+                        except Exception as e:
+                            idx = futures[future]
+                            print(f"\n[Error] Chunk {idx} failed: {str(e)}")
+                            # Keep original chunk on failure
+            else:
+                # Sequential processing (original behavior)
+                for i in tqdm(range(len(preprocessed_chunks)), desc="Translation & summarization of the chunks: "):
+
+                    if "language" not in preprocessed_chunks[i].metadata.keys():
+                        preprocessed_chunks[i].metadata["language"] = "na"
+                        
+                    # Translation
+                    if "en" not in preprocessed_chunks[i].metadata["language"].lower():
+                        preprocessed_chunks[i].page_content = self._strip_quotes(
+                            self.llm_handler.generate_response(translate_chunk(), f"{preprocessed_chunks[i].page_content}", False)
+                        )
                     
-                # Translation
-                if "en" not in preprocessed_chunks[i].metadata["language"].lower():
+                    # Summarization
+                    prev = preprocessed_chunks[i - 1].page_content if i > 0 else ""
+                    curr = preprocessed_chunks[i].page_content
+                    next_ = preprocessed_chunks[i + 1].page_content if i < len(preprocessed_chunks) - 1 else ""
+                    context = "\n".join(filter(None, [
+                        self._get_last_sentence(prev) if prev else None,
+                        curr,
+                        self._get_first_sentence(next_) if next_ else None
+                    ]))
+                    print(f"\n\n{context}")
                     preprocessed_chunks[i].page_content = self._strip_quotes(
-                        self.llm_handler.generate_response(translate_chunk(), f"{preprocessed_chunks[i].page_content}", False)
-                    )
-                
-                # Summarization
-                prev = preprocessed_chunks[i - 1].page_content if i > 0 else ""
-                curr = preprocessed_chunks[i].page_content
-                next_ = preprocessed_chunks[i + 1].page_content if i < len(preprocessed_chunks) - 1 else ""
-                context = "\n".join(filter(None, [
-                    self._get_last_sentence(prev) if prev else None,
-                    curr,
-                    self._get_first_sentence(next_) if next_ else None
-                ]))
-                print(f"\n\n{context}")
-                preprocessed_chunks[i].page_content = self._strip_quotes(
-                    self.llm_handler.generate_response(summarize_chunk(), context, False)
-                ).replace("\n\n", "\n")
+                        self.llm_handler.generate_response(summarize_chunk(), context, False)
+                    ).replace("\n\n", "\n")
 
             joblib.dump(preprocessed_chunks, os.path.join(path, "preprocessed_chunks.joblib")) # Save
 
@@ -380,18 +539,40 @@ def run(
                 print("No existing knowledge base found.")
                 return
         else:
-            
-            graph_documents = []
-            for doc in tqdm(preprocessed_chunks, desc="Conversion to graph documents: "): # Nodes and relationships extraction
-                graph_from_chunk = self.llm_graph_transformer.convert_to_graph_documents([doc])[0]
-                print("\n".join([f"{rel.source.id} ({rel.source.type}), {rel.type}, {rel.target.id} ({rel.target.type})" for rel in graph_from_chunk.relationships]))
-                graph_documents.append(graph_from_chunk)
+            if enable_parallel and self.max_workers > 1:
+                print(f"Using parallel processing with {self.max_workers} workers for graph conversion")
+                
+                graph_documents = []
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {executor.submit(self._convert_to_graph_parallel, doc): i for i, doc in enumerate(preprocessed_chunks)}
+                    
+                    # Collect results with progress bar, maintaining order
+                    results = {}
+                    for future in tqdm(as_completed(futures, timeout=600), total=len(futures), desc="Conversion to graph documents: "):
+                        try:
+                            idx = futures[future]
+                            graph_from_chunk = future.result(timeout=120)
+                            results[idx] = graph_from_chunk
+                        except Exception as e:
+                            idx = futures[future]
+                            print(f"\n[Error] Graph conversion {idx} failed: {str(e)}")
+                            results[idx] = GraphDocument(nodes=[], relationships=[], source=preprocessed_chunks[idx])
+                    
+                    # Sort by index to maintain order
+                    graph_documents = [results[i] for i in range(len(results))]
+            else:
+                # Sequential processing (original behavior)
+                graph_documents = []
+                for doc in tqdm(preprocessed_chunks, desc="Conversion to graph documents: "): # Nodes and relationships extraction
+                    graph_from_chunk = self._convert_to_graph_parallel(doc)  # Use same retry logic
+                    print("\n".join([f"{rel.source.id} ({rel.source.type}), {rel.type}, {rel.target.id} ({rel.target.type})" for rel in graph_from_chunk.relationships]))
+                    graph_documents.append(graph_from_chunk)
                 
             joblib.dump(graph_documents, os.path.join(path, "graph_documents.joblib")) # Save
 
         
         # --- Syntactic disambiguation ---
-
+        print("Starting syntactic disambiguation...")
         def is_valid_text(text):
             # Check for non-empty alphanumeric content
             if not re.match(r'^(?=.*[a-zA-Z0-9]).+$', text):
@@ -436,33 +617,86 @@ def is_valid_text(text):
                 if re.match(r'^(?=.*[a-zA-Z0-9]).+$', rel.target.type):
                     all_entities[rel.target.type] = rel.target.type
             
+        # Helper function for parallel entity comparison
+        def compare_entity_pair(i, j, ids, similarity_matrix):
+            """Compare a pair of entities to determine if they should be merged."""
+            try:
+                if similarity_matrix[i][j] > 0.9 and not re.search(r'\d', ids[i]) and not re.search(r'\d', ids[j]):
+                    same = self.llm_handler.generate_response(entities_comparator(), f"{ids[i]}\n{ids[j]}", False) == "Same"
+                    if same:
+                        def to_keep(s1, s2):
+                            s1c = s1.count(' ')
+                            s2c = s2.count(' ')
+                            if s1c > s2c and s1c < 5: return s1
+                            elif s1c < s2c and s1c < 5: return s2
+                            else: return s1 if len(s1) <= len(s2) else s2
+                        ent = to_keep(ids[i], ids[j])
+                        print(f"{ids[i]} - {ids[j]} -> {ent}")
+                        return (ids[i], ids[j], ent)
+                return None
+            except Exception as e:
+                print(f"\n[Warning] Failed to compare entities {ids[i]} and {ids[j]}: {str(e)}")
+                return None
+        
         # Compute cosine similarity matrix
-        merged_map = {}
-        for iterations in range(5):
+        for iterations in tqdm(range(5), desc="Syntactic disambiguation iterations: "):
             if not all_entities: break
 
             ids = list(all_entities.keys())
-            embeddings = np.array([self.embeddings.embed_query(key) for key in ids])
+            embeddings = np.array([self.embeddings.embed_query(key) for key in tqdm(ids, desc=f"  Embedding entities (iteration {iterations+1}): ", leave=False)])
             similarity_matrix = cosine_similarity(embeddings)
 
             # Group similar nodes
             new_merged_map = {}
-            for i in tqdm(range(len(ids)), desc="Syntactic disambiguation: "):
-                for j in range(i + 1, len(ids)):
-                    if similarity_matrix[i][j] > 0.9 and not re.search(r'\d', ids[i]) and not re.search(r'\d', ids[j]): # No numbers
-                        same = self.llm_handler.generate_response(entities_comparator(), f"{ids[i]}\n{ids[j]}", False) == "Same" # If they are not the same thing
-                        if same:
-                            def to_keep(s1, s2):
-                                s1c = s1.count(' ')
-                                s2c = s2.count(' ')
-                                if s1c > s2c and s1c < 5: return s1
-                                elif s1c < s2c and s1c < 5: return s2
-                                else: return s1 if len(s1) <= len(s2) else s2
-                            ent = to_keep(ids[i],ids[j]) # Chose which of the two to keep
-                            print(f"{ids[i]} - {ids[j]} -> {ent}")
-                            # Merge j into i
-                            new_merged_map[ids[i]] = ent
-                            new_merged_map[ids[j]] = ent
+            
+            if enable_parallel and self.max_workers > 1:
+                print(f"  Using parallel processing with {self.max_workers} workers for entity comparison")
+                
+                # Collect all candidate pairs
+                candidate_pairs = []
+                for i in range(len(ids)):
+                    for j in range(i + 1, len(ids)):
+                        if similarity_matrix[i][j] > 0.9 and not re.search(r'\d', ids[i]) and not re.search(r'\d', ids[j]):
+                            candidate_pairs.append((i, j))
+                
+                print(f"  Found {len(candidate_pairs)} candidate pairs to compare (iteration {iterations+1})")
+                
+                # Process pairs in parallel 
+                with ThreadPoolExecutor(max_workers=min(6, self.max_workers)) as executor:
+                    futures = {
+                        executor.submit(compare_entity_pair, i, j, ids, similarity_matrix): (i, j) 
+                        for i, j in candidate_pairs
+                    }
+                    
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc=f"  Comparing entity pairs (iteration {iterations+1}): ", leave=False):
+                        try:
+                            result = future.result(timeout=60)
+                            if result:
+                                id_i, id_j, ent = result
+                                new_merged_map[id_i] = ent
+                                new_merged_map[id_j] = ent
+                        except Exception as e:
+                            i, j = futures[future]
+                            print(f"\n[Error] Entity comparison ({ids[i]}, {ids[j]}) failed: {str(e)}")
+            else:
+                # Sequential processing (original behavior)
+                for i in tqdm(range(len(ids)), desc=f"  Comparing entity pairs (iteration {iterations+1}): ", leave=False):
+                    for j in range(i + 1, len(ids)):
+                        if similarity_matrix[i][j] > 0.9 and not re.search(r'\d', ids[i]) and not re.search(r'\d', ids[j]): # No numbers
+                            same = self.llm_handler.generate_response(entities_comparator(), f"{ids[i]}\n{ids[j]}", False) == "Same" # If they are not the same thing
+                            if same:
+                                def to_keep(s1, s2):
+                                    s1c = s1.count(' ')
+                                    s2c = s2.count(' ')
+                                    if s1c > s2c and s1c < 5: return s1
+                                    elif s1c < s2c and s1c < 5: return s2
+                                    else: return s1 if len(s1) <= len(s2) else s2
+                                ent = to_keep(ids[i],ids[j]) # Chose which of the two to keep
+                                print(f"{ids[i]} - {ids[j]} -> {ent}")
+                                # Merge j into i
+                                new_merged_map[ids[i]] = ent
+                                new_merged_map[ids[j]] = ent
+            
             all_entities = {v: v for v in new_merged_map.values()}
             
             # Update graph_documents
@@ -610,13 +844,29 @@ def to_keep(s1, s2):
             triples["chunk_content"] = triples["chunk_content"].str.replace("\n", " ", regex=False)
             triples["next_chunk_content"] = triples["next_chunk_content"].str.replace("\n", " ", regex=False)
 
-            for index, row in tqdm(list(triples.iterrows()), desc="Summarizing triples: "):
-                chunk = f"{row["prev_chunk_content"]}\n\n{row["chunk_content"]}\n\n{row["next_chunk_content"]}"
-                description = self._strip_quotes(
-                    self.llm_handler.generate_response(
-                        extract_descriptions_for_triples(f"{chunk}"), f"{row['source_entity_name']} {row["relationship_name"]} {row['target_entity_name']}", False)
-                ).replace("\n\n", "\n")
-                graph.rdf_graph.add((row["triple"], graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
+            if enable_parallel and self.max_workers > 1:
+                print(f"Using parallel processing with {self.max_workers} workers for triple descriptions")
+                
+                # Max workers to avoid rate limits
+                with ThreadPoolExecutor(max_workers=min(6, self.max_workers)) as executor:
+                    futures = {executor.submit(self._generate_triple_description_parallel, row): index for index, row in triples.iterrows()}
+                    
+                    for future in tqdm(as_completed(futures, timeout=600), total=len(futures), desc="Summarizing triples: "):
+                        try:
+                            triple_uri, description = future.result(timeout=120)
+                            graph.rdf_graph.add((triple_uri, graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
+                        except Exception as e:
+                            index = futures[future]
+                            print(f"\n[Error] Triple {index} description failed: {str(e)}")
+            else:
+                # Sequential processing (original behavior)
+                for index, row in tqdm(list(triples.iterrows()), desc="Summarizing triples: "):
+                    chunk = f"{row['prev_chunk_content']}\n\n{row['chunk_content']}\n\n{row['next_chunk_content']}"
+                    description = self._strip_quotes(
+                        self.llm_handler.generate_response(
+                            extract_descriptions_for_triples(f"{chunk}"), f"{row['source_entity_name']} {row['relationship_name']} {row['target_entity_name']}", False)
+                    ).replace("\n\n", "\n")
+                    graph.rdf_graph.add((row["triple"], graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
             
             graph.save_to_file(os.path.join(path, f"{file_name}.ttl")) # Save
 
@@ -634,18 +884,35 @@ def to_keep(s1, s2):
 
             # Entities
             entities = graph.get_entities()
-            for index, row in tqdm(list(entities.iterrows()), desc="Summarizing entities: "):
-                # Types
-                types = graph.get_types(row["entity"])
-                entity_description_from_triples = '\n'.join(f'{row["name"]} is {type["name"]}.' for _, type in types.iterrows())
-                # Descriptions
-                entity_description_from_triples += "\n".join(graph.get_entity_triples(row["entity"])["description"])
-
-                description = self._strip_quotes(
-                    self.llm_handler.generate_response(
-                        extract_descriptions_for_entities(f"{entity_description_from_triples}"), f"{row["name"]}", False)
-                ).replace("\n\n", "\n")
-                graph.rdf_graph.add((row["entity"], graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
+            
+            if enable_parallel and self.max_workers > 1:
+                print(f"Using parallel processing with {self.max_workers} workers for entity descriptions")
+                
+                # Max workers to avoid rate limits
+                with ThreadPoolExecutor(max_workers=min(6, self.max_workers)) as executor:
+                    futures = {executor.submit(self._generate_entity_description_parallel, row, graph): index for index, row in entities.iterrows()}
+                    
+                    for future in tqdm(as_completed(futures, timeout=600), total=len(futures), desc="Summarizing entities: "):
+                        try:
+                            entity_uri, description = future.result(timeout=120)
+                            graph.rdf_graph.add((entity_uri, graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
+                        except Exception as e:
+                            index = futures[future]
+                            print(f"\n[Error] Entity {index} description failed: {str(e)}")
+            else:
+                # Sequential processing (original behavior)
+                for index, row in tqdm(list(entities.iterrows()), desc="Summarizing entities: "):
+                    # Types
+                    types = graph.get_types(row["entity"])
+                    entity_description_from_triples = '\n'.join(f'{row["name"]} is {type["name"]}.' for _, type in types.iterrows())
+                    # Descriptions
+                    entity_description_from_triples += "\n".join(graph.get_entity_triples(row["entity"])["description"])
+
+                    description = self._strip_quotes(
+                        self.llm_handler.generate_response(
+                            extract_descriptions_for_entities(f"{entity_description_from_triples}"), f"{row["name"]}", False)
+                    ).replace("\n\n", "\n")
+                    graph.rdf_graph.add((row["entity"], graph.ONTO.hasDescription, Literal(description, datatype=XSD.string)))
 
             graph.save_to_file(os.path.join(path, f"{file_name}.ttl")) # Save
         
@@ -664,33 +931,85 @@ def to_keep(s1, s2):
 
         if not load_cached_embeddings:
 
-            # Entities
-            #collection_entities.delete(ids=collection_entities.get()["ids"])
-            entities = graph.get_entities()
-            for index, row in tqdm(list(entities.iterrows()), desc="Embedding entities: "):
-                emb = self.embeddings.embed_query(row["name"])
-                collection_entities.add(ids=[row["entity"]], embeddings=[emb])
-
-            # Types
-            #collection_types.delete(ids=collection_types.get()["ids"])
-            types = graph.get_types()
-            for index, row in tqdm(list(types.iterrows()), desc="Embedding types: "):
-                emb = self.embeddings.embed_query(row["name"])
-                collection_types.add(ids=[row["type"]], embeddings=[emb])
-
-            # Relationships
-            #collection_relationships.delete(ids=collection_relationships.get()["ids"])
-            relationships = graph.get_relationships()
-            for index, row in tqdm(list(relationships.iterrows()), desc="Embedding relationships: "):
-                emb = self.embeddings.embed_query(row["name"])
-                collection_relationships.add(ids=[row["relationship"]], embeddings=[emb])
-
-            # Triples
-            #collection_triples.delete(ids=collection_triples.get()["ids"])
-            triples = graph.get_triples()
-            for index, row in tqdm(list(triples.iterrows()), desc="Embedding triples: "):
-                emb = self.embeddings.embed_query(row["description"])
-                collection_triples.add(ids=[row["triple"]], embeddings=[emb])
+            if enable_parallel and self.max_workers > 1:
+                print(f"Using parallel processing with {self.max_workers} workers for embeddings")
+                
+                # Entities
+                entities = graph.get_entities()
+                entity_rows = [{"entity": row["entity"], "name": row["name"]} for _, row in entities.iterrows()]
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {executor.submit(self._embed_item_parallel, item, "entity"): item for item in entity_rows}
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc="Embedding entities: "):
+                        try:
+                            entity_id, emb = future.result(timeout=30)
+                            collection_entities.add(ids=[entity_id], embeddings=[emb])
+                        except Exception as e:
+                            item = futures[future]
+                            print(f"\n[Error] Embedding entity {item.get('entity', 'unknown')} failed: {str(e)}")
+
+                # Types
+                types = graph.get_types()
+                type_rows = [{"type": row["type"], "name": row["name"]} for _, row in types.iterrows()]
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {executor.submit(self._embed_item_parallel, item, "type"): item for item in type_rows}
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc="Embedding types: "):
+                        try:
+                            type_id, emb = future.result(timeout=30)
+                            collection_types.add(ids=[type_id], embeddings=[emb])
+                        except Exception as e:
+                            item = futures[future]
+                            print(f"\n[Error] Embedding type {item.get('type', 'unknown')} failed: {str(e)}")
+
+                # Relationships
+                relationships = graph.get_relationships()
+                rel_rows = [{"relationship": row["relationship"], "name": row["name"]} for _, row in relationships.iterrows()]
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {executor.submit(self._embed_item_parallel, item, "relationship"): item for item in rel_rows}
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc="Embedding relationships: "):
+                        try:
+                            rel_id, emb = future.result(timeout=30)
+                            collection_relationships.add(ids=[rel_id], embeddings=[emb])
+                        except Exception as e:
+                            item = futures[future]
+                            print(f"\n[Error] Embedding relationship {item.get('relationship', 'unknown')} failed: {str(e)}")
+
+                # Triples
+                triples = graph.get_triples()
+                triple_rows = [{"triple": row["triple"], "description": row["description"]} for _, row in triples.iterrows()]
+                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                    futures = {executor.submit(self._embed_item_parallel, item, "triple"): item for item in triple_rows}
+                    for future in tqdm(as_completed(futures, timeout=300), total=len(futures), desc="Embedding triples: "):
+                        try:
+                            triple_id, emb = future.result(timeout=30)
+                            collection_triples.add(ids=[triple_id], embeddings=[emb])
+                        except Exception as e:
+                            item = futures[future]
+                            print(f"\n[Error] Embedding triple {item.get('triple', 'unknown')} failed: {str(e)}")
+            else:
+                # Sequential processing (original behavior)
+                # Entities
+                entities = graph.get_entities()
+                for index, row in tqdm(list(entities.iterrows()), desc="Embedding entities: "):
+                    emb = self.embeddings.embed_query(row["name"])
+                    collection_entities.add(ids=[row["entity"]], embeddings=[emb])
+
+                # Types
+                types = graph.get_types()
+                for index, row in tqdm(list(types.iterrows()), desc="Embedding types: "):
+                    emb = self.embeddings.embed_query(row["name"])
+                    collection_types.add(ids=[row["type"]], embeddings=[emb])
+
+                # Relationships
+                relationships = graph.get_relationships()
+                for index, row in tqdm(list(relationships.iterrows()), desc="Embedding relationships: "):
+                    emb = self.embeddings.embed_query(row["name"])
+                    collection_relationships.add(ids=[row["relationship"]], embeddings=[emb])
+
+                # Triples
+                triples = graph.get_triples()
+                for index, row in tqdm(list(triples.iterrows()), desc="Embedding triples: "):
+                    emb = self.embeddings.embed_query(row["description"])
+                    collection_triples.add(ids=[row["triple"]], embeddings=[emb])
 
             # Chunks
             #collection_chunks.delete(ids=collection_chunks.get()["ids"])
diff --git a/knowledge_base/knowledge_manager.py b/knowledge_base/knowledge_manager.py
index 02d77da..6dca3c2 100644
--- a/knowledge_base/knowledge_manager.py
+++ b/knowledge_base/knowledge_manager.py
@@ -51,10 +51,19 @@ def __init__(self, provider: str, model: str, embedding: str, language: str, kno
             language=None,
             keep_history=False
         )
-        self.embeddings = init_embeddings(
-            model=embedding,
-            provider=provider
-        )
+        
+        # Initialize embeddings with keep_alive for Ollama
+        if provider == "ollama":
+            self.embeddings = init_embeddings(
+                model=embedding,
+                provider=provider,
+                keep_alive=-1  # Keep model loaded indefinitely to prevent crashes
+            )
+        else:
+            self.embeddings = init_embeddings(
+                model=embedding,
+                provider=provider
+            )
         self.language = language
 
         self.knowledge_base_path = knowledge_base_path
@@ -93,7 +102,7 @@ def user_message(self, message: str, user_type: str, house_type: str, region: st
         # --- Init ---
 
         # Initialize the variables
-        self.path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.knowledge_base_path)
+        self.path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "kbs", self.knowledge_base_path)
 
         # Load the RDF graph
         self.graph = EnergeniusGraph()
@@ -124,7 +133,6 @@ def user_message(self, message: str, user_type: str, house_type: str, region: st
 
         #print(f"\n\n-----User message-----\n{message}")
 
-        # If the question is not well-formed
         if len(message) < 3:
             return wrong_answer_prompt(self.language)
 
@@ -459,7 +467,7 @@ def user_message(self, message: str, user_type: str, house_type: str, region: st
                 n_results=30,
             )
             triples = [{"id": id, "distance": distance} for (id, distance) in zip(triples["ids"][0], triples["distances"][0]) if distance < 0.5] if triples else []
-            print(f"\n\n-----Triples for {message}-----\n{"\n".join([f"{triple}" for triple in triples])}")
+            #print(f"\n\n-----Triples for {message}-----\n{"\n".join([f"{triple}" for triple in triples])}")
         
             # No triple found: return wrong answer prompt
             if not triples:
diff --git a/knowledge_base/utils/graph_helpers.py b/knowledge_base/utils/graph_helpers.py
index a71232b..9fdc21a 100644
--- a/knowledge_base/utils/graph_helpers.py
+++ b/knowledge_base/utils/graph_helpers.py
@@ -4,7 +4,6 @@
 
 from rdflib import Graph
 from rdflib.plugins.sparql import prepareQuery
-from nltk.corpus import wordnet as wn
 
 import re
 import unicodedata
diff --git a/llm/__init__.py b/llm/__init__.py
index f9e5bcb..4b5f7ea 100644
--- a/llm/__init__.py
+++ b/llm/__init__.py
@@ -1,2 +1,2 @@
 """LLM module for handling different large language models."""
-from .langchain import LLMHandler
+from .langchain import LLMHandler, get_ollama_models, get_ollama_embeddings
diff --git a/llm/langchain.py b/llm/langchain.py
index 678cd19..dd4a356 100644
--- a/llm/langchain.py
+++ b/llm/langchain.py
@@ -1,12 +1,78 @@
 """LangChain LLM wrapper."""
 
 import os
+import requests
+from dotenv import load_dotenv
 
 from langchain.chat_models import init_chat_model
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage
 
-from private_settings import PRIVATE_SETINGS
+# Load environment variables from .env file
+load_dotenv()
+
+
+def get_ollama_models(base_url: str = None, timeout: int = 5) -> list[str]:
+    """Fetch available Ollama chat models from the API (excludes embedding models).
+    
+    Args:
+        base_url (str, optional): The base URL of the Ollama server. 
+                                 If None, uses PRIVATE_SETINGS["LLM_BASE_URL"].
+        timeout (int, optional): Request timeout in seconds. Defaults to 5.
+    
+    Returns:
+        list[str]: List of available chat model names, or error messages if request fails.
+    """
+    try:
+        if base_url is None:
+            base_url = os.getenv("LLM_BASE_URL", "http://localhost:11434")
+        # Remove trailing slash if present
+        base_url = base_url.rstrip('/')
+        response = requests.get(f"{base_url}/api/tags", timeout=timeout)
+        if response.status_code == 200:
+            models_data = response.json()
+            # Filter out embedding models based on model family metadata
+            embedding_families = ["bert", "nomic-bert"]
+            chat_models = [
+                model["name"] for model in models_data.get("models", [])
+                if model.get("details", {}).get("family", "").lower() not in embedding_families
+            ]
+            return chat_models if chat_models else ["No chat models found"]
+        else:
+            return ["Error: Could not connect to Ollama"]
+    except Exception as e:
+        return [f"Error: {str(e)}"]
+
+
+def get_ollama_embeddings(base_url: str = None, timeout: int = 5) -> list[str]:
+    """Fetch available Ollama embedding models from the API.
+    
+    Args:
+        base_url (str, optional): The base URL of the Ollama server.
+                                 If None, uses PRIVATE_SETINGS["LLM_BASE_URL"].
+        timeout (int, optional): Request timeout in seconds. Defaults to 5.
+    
+    Returns:
+        list[str]: List of available embedding model names, or fallback defaults if request fails.
+    """
+    try:
+        if base_url is None:
+            base_url = os.getenv("LLM_BASE_URL", "http://localhost:11434")
+        base_url = base_url.rstrip('/')
+        response = requests.get(f"{base_url}/api/tags", timeout=timeout)
+        if response.status_code == 200:
+            models_data = response.json()
+            # Filter for embedding models based on model family metadata
+            embedding_families = ["bert", "nomic-bert"]
+            embeddings = [
+                model["name"] for model in models_data.get("models", [])
+                if model.get("details", {}).get("family", "").lower() in embedding_families
+            ]
+            return embeddings if embeddings else ["mxbai-embed-large"]
+        else:
+            return ["mxbai-embed-large", "nomic-embed-text"]  # Fallback defaults
+    except Exception:
+        return ["mxbai-embed-large", "nomic-embed-text"]  # Fallback defaults
 
 
 class LLMHandler:
@@ -43,19 +109,31 @@ def __init__(
         self.__env_creation(provider)
 
         # creation of the model
-        if not PRIVATE_SETINGS["LLM_LOCAL"]:
+        llm_local = os.getenv("LLM_LOCAL", "false").lower() == "true"
+        if not llm_local:
             self.model = init_chat_model(
                 model=model,
                 model_provider=provider,
                 temperature=temperature,
             )
         else:
-            self.model = init_chat_model(
-                model=model,
-                model_provider=provider,
-                temperature=temperature,
-                base_url=PRIVATE_SETINGS["LLM_BASE_URL"],
-            )
+            # For local models (like Ollama), add keep_alive parameter
+            llm_base_url = os.getenv("LLM_BASE_URL", "http://localhost:11434")
+            if provider == "ollama":
+                self.model = init_chat_model(
+                    model=model,
+                    model_provider=provider,
+                    temperature=temperature,
+                    base_url=llm_base_url,
+                    keep_alive=-1,  # Keep model loaded indefinitely to prevent crashes
+                )
+            else:
+                self.model = init_chat_model(
+                    model=model,
+                    model_provider=provider,
+                    temperature=temperature,
+                    base_url=llm_base_url,
+                )
 
     def __env_creation(self, provider: str) -> None:
         """
@@ -67,7 +145,12 @@ def __env_creation(self, provider: str) -> None:
         """
 
         if provider == "openai":
-            os.environ["OPENAI_API_KEY"] = PRIVATE_SETINGS["LLM_KEY"]["openai"]
+            # OPENAI_API_KEY should already be set in environment or .env file
+            # Only set it if not already present
+            if "OPENAI_API_KEY" not in os.environ:
+                openai_key = os.getenv("OPENAI_API_KEY")
+                if openai_key:
+                    os.environ["OPENAI_API_KEY"] = openai_key
 
     def get_model(self) -> BaseChatModel:
         """Get the current BaseChatModel model
diff --git a/requirements.txt b/requirements.txt
index d59b1ae..fbd8a88 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -47,6 +47,7 @@ joblib==1.5.0
 jsonpatch==1.33
 jsonpickle==4.0.5
 jsonpointer==3.0.0
+json_repair==0.52.3
 jsonschema==4.23.0
 jsonschema-specifications==2025.4.1
 kiwisolver==1.4.8
@@ -133,3 +134,5 @@ wcwidth==0.2.13
 wheel==0.45.1
 yarl==1.20.0
 zstandard==0.23.0
+pymongo==4.3.3
+pdfminer.six==20221105
\ No newline at end of file
diff --git a/run_document_manager.sh b/run_document_manager.sh
new file mode 100755
index 0000000..cee5980
--- /dev/null
+++ b/run_document_manager.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+cd "$(dirname "$0")" && streamlit run document_manager_ui.py --server.port 8501
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..2c3547f
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,3 @@
+"""Document Manager - Core modules for MongoDB document management."""
+
+__version__ = "1.0.0"
diff --git a/src/chat_manager.py b/src/chat_manager.py
new file mode 100644
index 0000000..cecc6f4
--- /dev/null
+++ b/src/chat_manager.py
@@ -0,0 +1,70 @@
+"""Chat management functions - upload, download, and session handling."""
+
+import json
+from io import StringIO
+import streamlit as st
+
+
+def prepare_chat_download_data(messages):
+    """Prepare chat messages for download as JSON."""
+    return json.dumps(messages, separators=(",", ": "))
+
+
+def process_chat_upload(uploaded_file):
+    """
+    Process uploaded chat file and return messages.
+    
+    Returns:
+        tuple: (messages_list, error_message)
+    """
+    if uploaded_file is None:
+        return None, None
+    
+    try:
+        # Convert to string based IO
+        stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
+        string_data = stringio.read()
+        
+        # Convert into JSON
+        chat_data = json.loads(string_data)
+        
+        # Validate format
+        if not isinstance(chat_data, list):
+            return None, "Invalid chat file format: expected a list of messages."
+        
+        # Validate each message
+        for message in chat_data:
+            if (not isinstance(message, dict) 
+                or "role" not in message 
+                or "content" not in message):
+                return None, "Invalid chat file format: each message must have 'role' and 'content'."
+        
+        return chat_data, None
+        
+    except json.JSONDecodeError as e:
+        return None, f"Error decoding JSON: {e}"
+    except Exception as e:
+        return None, f"Error processing file: {e}"
+
+
+def initialize_chat_messages():
+    """Initialize chat messages in session state if not present."""
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = []
+
+
+def clear_chat_messages():
+    """Clear all chat messages from session state."""
+    st.session_state.pop("messages", None)
+
+
+def add_message(role, content):
+    """Add a message to the chat history."""
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = []
+    st.session_state["messages"].append({"role": role, "content": content})
+
+
+def get_chat_messages():
+    """Get all chat messages from session state."""
+    return st.session_state.get("messages", [])
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..4459bd3
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,32 @@
+"""MongoDB database connection and configuration."""
+
+import os
+import streamlit as st
+from pymongo import MongoClient
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# MongoDB configuration
+MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://localhost:27017")
+MONGODB_DATABASE = os.getenv("MONGODB_DATABASE", "energenius")
+MONGODB_COLLECTION = os.getenv("MONGODB_COLLECTION", "documents")
+
+
+@st.cache_resource
+def get_mongodb_client():
+    """Get MongoDB client connection."""
+    return MongoClient(MONGODB_URI)
+
+
+def get_db():
+    """Get MongoDB database."""
+    client = get_mongodb_client()
+    return client[MONGODB_DATABASE]
+
+
+def get_collection():
+    """Get MongoDB collection."""
+    db = get_db()
+    return db[MONGODB_COLLECTION]
diff --git a/src/document_manager.py b/src/document_manager.py
new file mode 100644
index 0000000..1d2428e
--- /dev/null
+++ b/src/document_manager.py
@@ -0,0 +1,143 @@
+"""Document management functions for MongoDB - PDFs and URLs."""
+
+from datetime import datetime
+from bson.objectid import ObjectId
+from .database import get_collection
+from .utils import calculate_file_hash
+
+
+def upload_pdf_to_mongodb(domain_name, pdf_file, description="", tags=None):
+    """Upload a PDF file to MongoDB."""
+    collection = get_collection()
+    
+    # Read PDF content
+    pdf_content = pdf_file.read()
+    
+    # Calculate content hash for duplicate detection
+    content_hash = calculate_file_hash(pdf_content)
+    
+    document_doc = {
+        "type": "document",
+        "domain": domain_name,
+        "doc_type": "pdf",
+        "filename": pdf_file.name,
+        "description": description,
+        "tags": tags or [],
+        "content": pdf_content,
+        "content_hash": content_hash,
+        "size": len(pdf_content),
+        "uploaded_at": datetime.now(),
+        "updated_at": datetime.now()
+    }
+    
+    result = collection.insert_one(document_doc)
+    return result.inserted_id
+
+
+def add_url_to_mongodb(domain_name, url, title="", description="", tags=None):
+    """Add a URL to MongoDB."""
+    collection = get_collection()
+    
+    document_doc = {
+        "type": "document",
+        "domain": domain_name,
+        "doc_type": "url",
+        "url": url,
+        "title": title or url,
+        "description": description,
+        "tags": tags or [],
+        "uploaded_at": datetime.now(),
+        "updated_at": datetime.now()
+    }
+    
+    result = collection.insert_one(document_doc)
+    return result.inserted_id
+
+
+def get_documents_by_domain(domain_name):
+    """Get all documents for a specific domain."""
+    collection = get_collection()
+    documents = list(collection.find({"type": "document", "domain": domain_name}).sort("uploaded_at", -1))
+    return documents
+
+
+def delete_document(document_id):
+    """Delete a document from MongoDB."""
+    collection = get_collection()
+    collection.delete_one({"_id": ObjectId(document_id)})
+
+
+def download_pdf_from_mongodb(document_id):
+    """Retrieve a PDF document from MongoDB."""
+    collection = get_collection()
+    
+    document = collection.find_one({"_id": ObjectId(document_id), "doc_type": "pdf"})
+    
+    if document and "content" in document:
+        return {
+            "content": document["content"],
+            "filename": document.get("filename", "document.pdf")
+        }
+    return None
+
+
+def check_duplicate_pdf(domain_name, filename, file_content):
+    """Check if a PDF already exists in the domain by filename or content hash."""
+    collection = get_collection()
+    content_hash = calculate_file_hash(file_content)
+    
+    # Check by filename
+    filename_match = collection.find_one({
+        "type": "document",
+        "domain": domain_name,
+        "doc_type": "pdf",
+        "filename": filename
+    })
+    
+    # Check by content hash
+    hash_match = collection.find_one({
+        "type": "document",
+        "domain": domain_name,
+        "doc_type": "pdf",
+        "content_hash": content_hash
+    })
+    
+    return {
+        "is_duplicate": bool(filename_match or hash_match),
+        "duplicate_type": "filename" if filename_match else ("content" if hash_match else None),
+        "existing_doc": filename_match or hash_match,
+        "content_hash": content_hash
+    }
+
+
+def check_duplicate_url(domain_name, url):
+    """Check if a URL already exists in the domain."""
+    collection = get_collection()
+    
+    existing = collection.find_one({
+        "type": "document",
+        "domain": domain_name,
+        "doc_type": "url",
+        "url": url
+    })
+    
+    return {
+        "is_duplicate": bool(existing),
+        "existing_doc": existing
+    }
+
+
+def get_document_stats():
+    """Get statistics about documents."""
+    collection = get_collection()
+    total_domains = collection.count_documents({"type": "domain"})
+    total_docs = collection.count_documents({"type": "document"})
+    total_pdfs = collection.count_documents({"type": "document", "doc_type": "pdf"})
+    total_urls = collection.count_documents({"type": "document", "doc_type": "url"})
+    
+    return {
+        "total_domains": total_domains,
+        "total_docs": total_docs,
+        "total_pdfs": total_pdfs,
+        "total_urls": total_urls
+    }
diff --git a/src/domain_manager.py b/src/domain_manager.py
new file mode 100644
index 0000000..603de5f
--- /dev/null
+++ b/src/domain_manager.py
@@ -0,0 +1,34 @@
+"""Domain management functions for MongoDB."""
+
+from datetime import datetime
+from .database import get_collection
+
+
+def create_domain(domain_name, description=""):
+    """Create a new domain in MongoDB."""
+    collection = get_collection()
+    domain_doc = {
+        "type": "domain",
+        "name": domain_name,
+        "description": description,
+        "created_at": datetime.now(),
+        "updated_at": datetime.now()
+    }
+    result = collection.insert_one(domain_doc)
+    return result.inserted_id
+
+
+def get_all_domains():
+    """Get all domains from MongoDB."""
+    collection = get_collection()
+    domains = list(collection.find({"type": "domain"}).sort("name", 1))
+    return domains
+
+
+def delete_domain(domain_name):
+    """Delete a domain and all its documents."""
+    collection = get_collection()
+    # Delete all documents in the domain
+    collection.delete_many({"domain": domain_name})
+    # Delete the domain itself
+    collection.delete_one({"type": "domain", "name": domain_name})
diff --git a/src/kb_builder.py b/src/kb_builder.py
new file mode 100644
index 0000000..8b57acd
--- /dev/null
+++ b/src/kb_builder.py
@@ -0,0 +1,428 @@
+"""Knowledge Base Builder from MongoDB documents."""
+
+import os
+import gc
+import threading
+from datetime import datetime
+from .database import get_collection
+from .document_manager import get_documents_by_domain
+
+def _close_guru_sessions_for_domain(folder_name, log_progress):
+    """
+    Close any GURU sessions that are using the specified knowledge base folder.
+    This releases ChromaDB file locks to allow rebuilding.
+    
+    Args:
+        folder_name (str): The knowledge base folder name (e.g., "files_Italy")
+        log_progress (callable): Logging callback
+    """
+    import streamlit as st
+    
+    # Check if there's an active orchestrator in the session state
+    if "orchestrator" in st.session_state:
+        orchestrator = st.session_state["orchestrator"]
+        
+        # Check if the orchestrator's guru is using this domain's KB
+        if hasattr(orchestrator, 'guru') and hasattr(orchestrator.guru, 'know_base'):
+            kb = orchestrator.guru.know_base
+            if hasattr(kb, 'knowledge_base_path') and kb.knowledge_base_path == folder_name:
+                log_progress(f"Found active GURU session using {folder_name}")
+                
+                # Close ChromaDB client if it exists
+                if hasattr(kb, 'chromadbClient') and kb.chromadbClient is not None:
+                    try:
+                        log_progress("Closing ChromaDB connection...")
+                        # ChromaDB doesn't have an explicit close method, but we can clear the reference
+                        kb.chromadbClient = None
+                        log_progress("ChromaDB connection cleared")
+                    except Exception as e:
+                        log_progress(f"Note: {str(e)}")
+                
+                # Clear the orchestrator to force reinitialization
+                log_progress("Clearing orchestrator from session...")
+                del st.session_state["orchestrator"]
+                log_progress("Orchestrator cleared successfully")
+    
+    # Force garbage collection to release file handles
+    gc.collect()
+    log_progress("Garbage collection completed")
+    
+    # Give the system a moment to fully release file handles
+    import time
+    time.sleep(1)
+    log_progress("Ready to proceed with KB creation")
+
+
+def _build_kb_worker(domain_name, provider, model, embedding, max_workers, enable_parallel):
+    """
+    Worker function that runs in a separate thread to build KB.
+    This allows the process to continue even if user switches tabs in Streamlit.
+    """
+    from knowledge_base import KnowledgeExtractor
+    
+    collection = get_collection()
+    temp_pdf_files = []
+    
+    try:
+        print(f"[Thread] Starting KB creation for domain: {domain_name}")
+        
+        # Get all documents for the domain
+        documents = get_documents_by_domain(domain_name)
+        
+        # Separate URLs and PDFs
+        urls = []
+        pdf_count = 0
+        
+        for doc in documents:
+            if doc.get("doc_type") == "url":
+                urls.append(doc.get("url"))
+            elif doc.get("doc_type") == "pdf":
+                pdf_count += 1
+                try:
+                    pdf_content = doc.get("content")
+                    if pdf_content:
+                        kb_temp_dir = os.path.join(
+                            os.path.dirname(__file__),
+                            "..",
+                            "knowledge_base",
+                            "kbs",
+                            "temp_pdfs"
+                        )
+                        os.makedirs(kb_temp_dir, exist_ok=True)
+                        
+                        import hashlib
+                        safe_filename = doc.get('filename', 'document.pdf').replace(' ', '_')
+                        file_hash = hashlib.md5(pdf_content[:1024]).hexdigest()[:8]
+                        temp_filename = f"{file_hash}_{safe_filename}"
+                        temp_path = os.path.join(kb_temp_dir, temp_filename)
+                        
+                        with open(temp_path, 'wb') as f:
+                            f.write(pdf_content)
+                        
+                        urls.append(temp_path)
+                        temp_pdf_files.append(temp_path)
+                        print(f"[Thread] Saved PDF '{doc.get('filename')}'")
+                except Exception as e:
+                    print(f"[Thread] Warning: Could not process PDF '{doc.get('filename')}': {str(e)}")
+        
+        print(f"[Thread] Processing {len(urls)} documents")
+        
+        # Initialize Knowledge Extractor with parallel processing options
+        ke = KnowledgeExtractor(provider, model, embedding, max_workers=max_workers)
+        folder_name = f"files_{domain_name}"
+        
+        # Run the knowledge extraction
+        ke.run(
+            folder=folder_name,
+            file_name="rdf_graph",
+            html_links=urls,
+            load_cached_docs=False,
+            load_cached_preprocessed_chunks=False,
+            enable_parallel=enable_parallel
+        )
+        
+        print(f"[Thread] KB creation completed for domain '{domain_name}'")
+        
+        # Update status to completed
+        collection.update_one(
+            {"type": "knowledge_base", "domain": domain_name},
+            {"$set": {"status": "completed", "completed_at": datetime.now()}}
+        )
+        print(f"[Thread] Status updated to completed")
+        
+    except Exception as e:
+        print(f"[Thread] Error creating KB: {str(e)}")
+        collection.update_one(
+            {"type": "knowledge_base", "domain": domain_name},
+            {"$set": {"status": "error", "error_message": str(e), "error_at": datetime.now()}}
+        )
+    finally:
+        # Clean up temporary PDF files
+        for temp_file in temp_pdf_files:
+            try:
+                if os.path.exists(temp_file):
+                    os.remove(temp_file)
+                    print(f"[Thread] Removed temp file: {os.path.basename(temp_file)}")
+            except Exception as e:
+                print(f"[Thread] Warning: Could not remove temp file: {str(e)}")
+
+
+def build_knowledge_base_for_domain(domain_name, provider="ollama", model=None, embedding=None, progress_callback=None, use_background_thread=False, max_workers=4, enable_parallel=True):
+    """
+    Build a knowledge base from all documents in a MongoDB domain.
+    
+    Args:
+        domain_name (str): Name of the domain to build KB from
+        provider (str): LLM provider ("openai" or "ollama")
+        model (str): Model name (optional, will use defaults)
+        embedding (str): Embedding model name (optional, will use defaults)
+        progress_callback (callable): Optional callback function to report progress
+        use_background_thread (bool): Whether to run in background thread
+        max_workers (int): Maximum number of parallel workers (default: 4)
+        enable_parallel (bool): Whether to enable parallel processing (default: True)
+        
+    Returns:
+        dict: Result dictionary with status and message
+    """
+    from knowledge_base import KnowledgeExtractor
+    
+    def log_progress(message):
+        """Helper to log progress."""
+        if progress_callback:
+            progress_callback(message)
+        print(message)
+    
+    try:
+        log_progress(f"Starting KB creation for domain: {domain_name}")
+        
+        # Get all documents for the domain
+        documents = get_documents_by_domain(domain_name)
+        
+        if not documents:
+            return {
+                "status": "error",
+                "message": f"No documents found in domain '{domain_name}'"
+            }
+        
+        # Separate URLs and PDFs, save MongoDB PDFs to temp files
+        urls = []
+        temp_pdf_files = []
+        pdf_count = 0
+        
+        for doc in documents:
+            if doc.get("doc_type") == "url":
+                urls.append(doc.get("url"))
+            elif doc.get("doc_type") == "pdf":
+                pdf_count += 1
+                # Save PDF from MongoDB to temporary file with http-accessible path
+                try:
+                    pdf_content = doc.get("content")
+                    if pdf_content:
+                        # Create temp directory in the KB folder for PDFs
+                        kb_temp_dir = os.path.join(
+                            os.path.dirname(__file__),
+                            "..",
+                            "knowledge_base",
+                            "kbs",
+                            "temp_pdfs"
+                        )
+                        os.makedirs(kb_temp_dir, exist_ok=True)
+                        
+                        # Create a unique filename
+                        import hashlib
+                        safe_filename = doc.get('filename', 'document.pdf').replace(' ', '_')
+                        file_hash = hashlib.md5(pdf_content[:1024]).hexdigest()[:8]
+                        temp_filename = f"{file_hash}_{safe_filename}"
+                        temp_path = os.path.join(kb_temp_dir, temp_filename)
+                        
+                        # Write PDF to file
+                        with open(temp_path, 'wb') as f:
+                            f.write(pdf_content)
+                        
+                        # Add the absolute file path as a URL (KnowledgeExtractor will handle it)
+                        urls.append(temp_path)
+                        temp_pdf_files.append(temp_path)
+                        log_progress(f"Saved PDF '{doc.get('filename')}' to {temp_filename}")
+                except Exception as e:
+                    log_progress(f"Warning: Could not process PDF '{doc.get('filename')}': {str(e)}")
+        
+        log_progress(f"Found {len(urls) - len(temp_pdf_files)} URLs and {pdf_count} PDFs ({len(temp_pdf_files)} successfully prepared)")
+        
+        # Set default models based on provider
+        if model is None:
+            if provider == "ollama":
+                model = "gemma3:12b-it-qat"
+            else:
+                model = "gpt-4"
+        
+        if embedding is None:
+            if provider == "ollama":
+                embedding = "mxbai-embed-large"
+            else:
+                embedding = "text-embedding-3-small"
+        
+        log_progress(f"Using provider: {provider}, model: {model}, embedding: {embedding}")
+        
+        # Close any existing GURU sessions using this domain's KB to avoid ChromaDB locking
+        log_progress("Checking for active GURU sessions with this domain...")
+        folder_name = f"files_{domain_name}"
+        _close_guru_sessions_for_domain(folder_name, log_progress)
+        
+        # Store KB metadata in MongoDB BEFORE starting the long process
+        # This way, even if the process is interrupted, the metadata exists
+        collection = get_collection()
+        kb_doc = {
+            "type": "knowledge_base",
+            "domain": domain_name,
+            "folder": folder_name,
+            "file_name": "rdf_graph",
+            "provider": provider,
+            "model": model,
+            "embedding": embedding,
+            "created_at": datetime.now(),
+            "document_count": len(documents),
+            "url_count": len(urls),
+            "pdf_count": pdf_count,
+            "status": "creating"  # Mark as in-progress
+        }
+        
+        # Save metadata immediately
+        collection.update_one(
+            {"type": "knowledge_base", "domain": domain_name},
+            {"$set": kb_doc},
+            upsert=True
+        )
+        log_progress("Metadata saved to database")
+        
+        if not urls:
+            # Update status to error
+            collection.update_one(
+                {"type": "knowledge_base", "domain": domain_name},
+                {"$set": {"status": "error", "error_message": "No documents found to process"}}
+            )
+            return {
+                "status": "error",
+                "message": "No documents found to process in this domain."
+            }
+        
+        # If background thread requested, start worker and return immediately
+        if use_background_thread:
+            log_progress("Starting background thread for KB creation...")
+            thread = threading.Thread(
+                target=_build_kb_worker,
+                args=(domain_name, provider, model, embedding, max_workers, enable_parallel),
+                daemon=True
+            )
+            thread.start()
+            log_progress("Background thread started. You can safely switch tabs.")
+            
+            return {
+                "status": "success",
+                "message": f"Knowledge base creation started in background for domain '{domain_name}'. Check back later for status.",
+                "folder": folder_name,
+                "url_count": len(urls),
+                "pdf_count": pdf_count,
+                "background": True
+            }
+        
+        # Otherwise, run synchronously (original behavior)
+        # Initialize Knowledge Extractor with parallel processing options
+        ke = KnowledgeExtractor(provider, model, embedding, max_workers=max_workers)
+        
+        # Create folder for this domain's KB
+        file_name = "rdf_graph"
+        
+        # Run the knowledge extraction
+        try:
+            ke.run(
+                folder=folder_name,
+                file_name=file_name,
+                html_links=urls,
+                load_cached_docs=False,
+                load_cached_preprocessed_chunks=False,
+                enable_parallel=enable_parallel
+            )
+            
+            
+            # Update status to completed
+            collection.update_one(
+                {"type": "knowledge_base", "domain": domain_name},
+                {"$set": {"status": "completed", "completed_at": datetime.now()}}
+            )
+            log_progress("Status updated to completed")
+        finally:
+            # Clean up temporary PDF files
+            if temp_pdf_files:
+                for temp_file in temp_pdf_files:
+                    try:
+                        if os.path.exists(temp_file):
+                            os.remove(temp_file)
+                    except Exception as e:
+                        log_progress(f"Warning: Could not remove temp file {temp_file}: {str(e)}")
+        
+        return {
+            "status": "success",
+            "message": f"Knowledge base created successfully for domain '{domain_name}'",
+            "folder": folder_name,
+            "url_count": len(urls),
+            "pdf_count": pdf_count
+        }
+        
+    except Exception as e:
+        error_msg = f"Error creating knowledge base: {str(e)}"
+        log_progress(error_msg)
+        
+        # Update status to error in MongoDB
+        try:
+            collection = get_collection()
+            collection.update_one(
+                {"type": "knowledge_base", "domain": domain_name},
+                {"$set": {"status": "error", "error_message": str(e), "error_at": datetime.now()}}
+            )
+        except Exception:
+            pass  # Don't fail if we can't update the error status
+        
+        # Clean up temporary PDF files even on error
+        if 'temp_pdf_files' in locals() and temp_pdf_files:
+            log_progress("Cleaning up temporary PDF files after error...")
+            for temp_file in temp_pdf_files:
+                try:
+                    if os.path.exists(temp_file):
+                        os.remove(temp_file)
+                except Exception:
+                    pass  # Ignore cleanup errors
+        
+        return {
+            "status": "error",
+            "message": error_msg
+        }
+
+
+def get_knowledge_base_info(domain_name):
+    """
+    Get information about a knowledge base for a domain.
+    
+    Args:
+        domain_name (str): Name of the domain
+        
+    Returns:
+        dict: KB information or None if not found
+    """
+    collection = get_collection()
+    kb_info = collection.find_one({"type": "knowledge_base", "domain": domain_name})
+    return kb_info
+
+
+def delete_knowledge_base(domain_name):
+    """
+    Delete a knowledge base for a domain.
+    
+    Args:
+        domain_name (str): Name of the domain
+        
+    Returns:
+        bool: True if deleted successfully
+    """
+    import shutil
+    
+    collection = get_collection()
+    kb_info = get_knowledge_base_info(domain_name)
+    
+    if kb_info:
+        # Delete folder
+        folder_path = os.path.join(
+            os.path.dirname(__file__),
+            "..",
+            "knowledge_base",
+            "kbs",
+            kb_info.get("folder")
+        )
+        
+        if os.path.exists(folder_path):
+            shutil.rmtree(folder_path)
+        
+        # Delete metadata
+        collection.delete_one({"type": "knowledge_base", "domain": domain_name})
+        return True
+    
+    return False
diff --git a/src/orchestrator_manager.py b/src/orchestrator_manager.py
new file mode 100644
index 0000000..35181e4
--- /dev/null
+++ b/src/orchestrator_manager.py
@@ -0,0 +1,34 @@
+"""Orchestrator management functions for Energenius GURU."""
+
+import streamlit as st
+
+from orchestrator import LiveOrchestrator
+from llm import get_ollama_models, get_ollama_embeddings
+
+
+@st.cache_data(ttl=60)
+def cached_get_ollama_models():
+    """Cached wrapper for getting Ollama models."""
+    return get_ollama_models()
+
+
+@st.cache_data(ttl=60)
+def cached_get_ollama_embeddings():
+    """Cached wrapper for getting Ollama embeddings."""
+    return get_ollama_embeddings()
+
+
+def initialize_orchestrator(provider, model, embedding, language, temperature, 
+                           user_type, house_type, region, use_knowledge_base):
+    """Initialize the LiveOrchestrator with given parameters."""
+    return LiveOrchestrator(
+        provider=provider,
+        model=model,
+        embedding=embedding,
+        language=language,
+        temperature=temperature,
+        user_type=user_type,
+        house_type=house_type,
+        region=region,
+        use_knowledge=use_knowledge_base,
+    )
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..2b0812a
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,8 @@
+"""Utility functions for document management."""
+
+import hashlib
+
+
+def calculate_file_hash(file_content):
+    """Calculate SHA256 hash of file content."""
+    return hashlib.sha256(file_content).hexdigest()
diff --git a/streamlit_ui.py b/streamlit_ui.py
index ef6c6b8..2777a14 100644
--- a/streamlit_ui.py
+++ b/streamlit_ui.py
@@ -6,6 +6,7 @@
 import streamlit as st
 
 from orchestrator import LiveOrchestrator
+from llm import get_ollama_models, get_ollama_embeddings
 
 # give title to the page
 st.title("Energenius GURU")
@@ -19,17 +20,48 @@
 if "messages" not in st.session_state:
     st.session_state["messages"] = []
 
+# Cached wrappers for Ollama model fetching
+@st.cache_data(ttl=60)
+def cached_get_ollama_models():
+    """Cached wrapper for getting Ollama models."""
+    return get_ollama_models()
+
+@st.cache_data(ttl=60)
+def cached_get_ollama_embeddings():
+    """Cached wrapper for getting Ollama embeddings."""
+    return get_ollama_embeddings()
+
 # create sidebar to adjust parameters
 st.sidebar.title("Model Parameters")
 provider = st.sidebar.selectbox("Provider", ["openai", "ollama"], index=1)
 
 if provider == "ollama":
-    model = st.sidebar.selectbox("Model", ["gpt-oss:120b", "gpt-oss:20b", "llama3.2", "mistral"], index=0)
-    embedding = st.sidebar.selectbox(
-        "Embedding", ["mxbai-embed-large", "nomic-embed-text"], index=0
-    )
+    # Add refresh button for model list
+    col1, col2 = st.sidebar.columns([4, 1])
+    with col1:
+        st.write("") # Spacing
+    with col2:
+        if st.button("🔄", help="Refresh model list"):
+            st.cache_data.clear()
+            st.rerun()
+    
+    # Get available models dynamically
+    available_models = cached_get_ollama_models()
+    available_embeddings = cached_get_ollama_embeddings()
+    
+    # Try to find a default model, otherwise use the first one
+    default_model_index = 0
+    
+    model = st.sidebar.selectbox("Model", available_models, index=default_model_index)
+    
+    # Try to find a default embedding, otherwise use the first one
+    default_embedding_index = 0
+    if "mxbai-embed-large" in available_embeddings:
+        default_embedding_index = available_embeddings.index("mxbai-embed-large")
+    
+    embedding = st.sidebar.selectbox("Embedding", available_embeddings, index=default_embedding_index)
 elif provider == "openai":
-    model = st.sidebar.selectbox("Model", ["gpt-3.5-turbo", "gpt-4", "ollama"], index=1)
+    model = st.sidebar.selectbox("Model", ["gpt-3.5-turbo", "gpt-4"], index=1)
     embedding = st.sidebar.selectbox(
         "Embedding", ["text-embedding-3-small", "text-embedding-3-large"], index=0
     )