Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
db*/
.venv/
venv/
.idea/
**/_temp/
patch_store/
**/.os
**/*.bin
log*.txt
logs/
__pycache__/
poc/
.env
Binary file added Supervisor.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
115 changes: 115 additions & 0 deletions agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import dataclasses
from pathlib import Path

from langchain_core.runnables.graph import NodeStyles, CurveStyle, MermaidDrawMethod
from langchain_openai import AzureChatOpenAI
from langgraph.graph.state import CompiledStateGraph
import abc
from langchain_core.runnables.graph_mermaid import draw_mermaid_png

from common import LLM

NEON_THEME = {
"config": {
"theme": "dark",
"themeVariables": {
"background": "#5E5E5E",
"fontFamily": "'Fira Code', monospace",
"primaryColor": "#1f6feb",
"primaryBorderColor": "#3b8eea",
"primaryTextColor": "#f0f6fc",
"lineColor": "#58a6ff",
"nodeBorderRadius": 8,
"edgeLabelBackground": "#00000000",
},
"flowchart": {
"curve": "basis",
"layout": "elk"
},
}
}

NODE_STYLES = NodeStyles(
default="fill:#1f6feb33,stroke:#3b8eea,stroke-width:2px,color:#f0f6fc",
first="fill:#06d6a033,stroke:#06d6a0,stroke-width:2px,color:#f0f6fc",
last="fill:#ff006e33,stroke:#ff006e,stroke-width:2px,color:#f0f6fc"
)


def dummy(state):
print(state)
return {}


class Agent(abc.ABC):
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if cls is Agent:
return

nodes = getattr(cls, "NODES", None)
if nodes is None:
raise TypeError(f"{cls.__name__} must declare a nested `NODES` dataclass")

if not (
dataclasses.is_dataclass(nodes)
and nodes.__dataclass_params__.frozen
):
raise TypeError(
f"{cls.__name__}.NODES must be decorated with @defaultdataclass(frozen=True)"
)

wrong = [
(name, f.type)
for name, f in nodes.__dataclass_fields__.items() # type: ignore[attr-defined]
if f.type is not str
]
if wrong:
bad = ", ".join(f"{n}: {t!r}" for n, t in wrong)
raise TypeError(
f"{cls.__name__}.NODES fields must be 'str' – offending: {bad}"
)

def __init__(self, llm: AzureChatOpenAI = LLM.mini, draw: bool = False):
self._llm = llm
self._graph: CompiledStateGraph = None
self._build()
if draw:
self._draw_graph()

def get_graph(self):
return self._graph

def get_llm(self):
return self._llm

@abc.abstractmethod
def _build(self):
...

def _draw_graph(self):
if self._graph:
mermaid_syntax = self._graph.get_graph(xray=True).draw_mermaid(
curve_style=CurveStyle.BASIS,
node_colors=NODE_STYLES,
wrap_label_n_words=4,
frontmatter_config=NEON_THEME,
)

draw_mermaid_png(
mermaid_syntax=mermaid_syntax,
output_file_path=f'{self.__class__.__name__}.png',
draw_method=MermaidDrawMethod.API,
background_color="#5E5E5E",
padding=10,
max_retries=1,
retry_delay=1.0,
)

# except Exception as e:
#
# pass

# Definitions of all nodes

# Definitions of all conditional edges
89 changes: 89 additions & 0 deletions agent_tools/LLM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import logging
import os

from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from azure.identity import ClientSecretCredential, get_bearer_token_provider, DefaultAzureCredential

logging.getLogger('azure').setLevel(logging.WARNING)
logging.getLogger('httpx').setLevel(logging.WARNING)

endpoint = "https://esg-research.openai.azure.com/"

# https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
tenant_id = os.environ.get("AZURE_TENANT_ID")
client_id = os.environ.get("AZURE_CLIENT_ID")
client_secret = os.environ.get("AZURE_CLIENT_SECRET")

if tenant_id and client_id and client_secret:
credential = ClientSecretCredential(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
)
else:
credential = DefaultAzureCredential(
exclude_environment_credential=True
)


token_provider = get_bearer_token_provider(
credential,
"https://cognitiveservices.azure.com/.default",
)


class LLM:
o3 = AzureChatOpenAI(
model="o3",
azure_deployment="o3",
api_version="2024-12-01-preview",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
streaming=False,
# model_kwargs={'max_completion_tokens': 100000}
)
o4_mini = AzureChatOpenAI(
model="o4-mini",
azure_deployment="o4-mini",
api_version="2024-12-01-preview",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
streaming=False,
# model_kwargs={'max_completion_tokens': 100000}
)
o3_mini = AzureChatOpenAI(
model="o3-mini",
azure_deployment="o3-mini",
api_version="2024-12-01-preview",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
streaming=False,
# model_kwargs={'max_completion_tokens': 100000}
)
nano = AzureChatOpenAI(
max_tokens=250,
model="gpt-4.1-nano",
azure_deployment="gpt-4.1-nano",
api_version="2024-12-01-preview",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
temperature=0.0,
streaming=False,
)
mini = AzureChatOpenAI(
model="gpt-4.1-mini",
azure_deployment="gpt-4.1-mini",
api_version="2024-12-01-preview",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
temperature=1.0,
streaming=False,
# model_kwargs={'max_completion_tokens': 100000}
)
embedding = AzureOpenAIEmbeddings(
model="text-embedding-3-small",
azure_deployment="text-embedding-3-small",
api_version="2024-02-01",
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider
)
Empty file added agent_tools/__init__.py
Empty file.
41 changes: 41 additions & 0 deletions agent_tools/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import uuid
from pathlib import Path
from langchain.prompts import ChatPromptTemplate
from langchain_core.documents import Document

from agent_tools.vector_store import VectorStore
from common import logger, LLM
from patch_analysis.files_collection import file_desc

DESC_PROMPT = ChatPromptTemplate.from_template(
"Write a maximum of 80 token unformatted paragraph about the Windows executable "
"{filename} package: {package} description: {description}. Include only technical details about its "
"purpose in the system. Keep it short, consistent, and strictly one paragraph. "
"Do not repeat facts. Omit headings, bullets, and conjunctions; the output is "
"for embedding context."
)


async def generate_file_info_if_needed(base_path: Path, name: str, package: str):
res = VectorStore.file_info._collection.get(
where={'$and': [{'name': name}, {'package': package}]},
)

if res.get('ids'):
return

desc = file_desc(base_path) or '' if base_path.exists() else ''
chain = DESC_PROMPT | LLM.nano
result = await chain.ainvoke({"filename": name, 'package': package, "description": desc})
logger.debug(result.content)

doc = Document(page_content=result.content,
metadata={'name': name.lower(), 'package': package.lower(), 'description': desc.lower()})
await VectorStore.file_info.aadd_documents(documents=[doc], ids=[str(uuid.uuid4())])

# sample_path = Path(
# r"E:\Git\snippets\patch_wednesday\patch_store\amd64_microsoft-onecore-s..dlers-speechprivacy_31bf3856ad364e35\settingshandlers_speechprivacy.dll\base\settingshandlers_speechprivacy.dll")
#
# print(file_desc(sample_path))
#
# ts, size = get_pe_ts_size_id(sample_path)
33 changes: 33 additions & 0 deletions agent_tools/vector_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

os.environ["ANONYMIZED_TELEMETRY"] = "FALSE"
os.environ["CHROMA_TELEMETRY_ENABLED"] = "FALSE"

from langchain_chroma import Chroma
from common import LLM


class VectorStore:
file_info = Chroma(
persist_directory='./db',
collection_name="windows.exe.desc",
embedding_function=LLM.embedding,
collection_metadata={"hnsw:space": "cosine"},
create_collection_if_not_exists=True,
)

func_logic = Chroma(
persist_directory='./db',
collection_name="windows.exe.functions.logic",
embedding_function=LLM.embedding,
collection_metadata={"hnsw:space": "cosine"},
create_collection_if_not_exists=True,
)

reports = Chroma(
persist_directory='./db',
collection_name="windows.exe.rca.reports",
embedding_function=LLM.embedding,
collection_metadata={"hnsw:space": "cosine"},
create_collection_if_not_exists=True,
)
Loading