Skip to content

Commit 06d322f

Browse files
Merge pull request #70 from shcherbak-ai/dev
Dev
2 parents 243824f + 403bf43 commit 06d322f

File tree

74 files changed

+75502
-143486
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+75502
-143486
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55

66
- **Refactor**: Code reorganization that doesn't change functionality but improves structure or maintainability
77

8+
## [0.18.0](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.18.0) - 2025-09-01
9+
### Added
10+
- Chat: Added optional `chat_session` parameter (accepts a `ChatSession`) to preserve message history across turns in `DocumentLLM.chat()`. When this parameter is omitted, chat is single-turn, without message history.
11+
812
## [0.17.1](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.17.1) - 2025-08-26
913
### Changed
1014
- `DocxConverter`: Conversion speed improved by ~2X, significantly reducing processing time for DOCX files.

contextgem/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
ContextGem - Effortless LLM extraction from documents
2121
"""
2222

23-
__version__ = "0.17.1"
23+
__version__ = "0.18.0"
2424
__author__ = "Shcherbak AI AS"
2525

2626
from contextgem.public import (
2727
Aspect,
2828
BooleanConcept,
29+
ChatSession,
2930
DateConcept,
3031
Document,
3132
DocumentLLM,
@@ -78,6 +79,7 @@
7879
"StringExample",
7980
"JsonObjectExample",
8081
# LLMs
82+
"ChatSession",
8183
"DocumentLLM",
8284
"DocumentLLMGroup",
8385
# Data models

contextgem/internal/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
_LOCAL_MODEL_PROVIDERS,
2222
_Aspect,
2323
_BooleanConcept,
24+
_ChatSession,
2425
_DateConcept,
2526
_Document,
2627
_DocumentLLM,
@@ -134,6 +135,7 @@
134135
"_LOCAL_MODEL_PROVIDERS",
135136
"_Aspect",
136137
"_BooleanConcept",
138+
"_ChatSession",
137139
"_DateConcept",
138140
"_Document",
139141
"_DocumentLLM",

contextgem/internal/base/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from contextgem.internal.base.llms import (
3535
_COST_QUANT,
3636
_LOCAL_MODEL_PROVIDERS,
37+
_ChatSession,
3738
_DocumentLLM,
3839
_DocumentLLMGroup,
3940
)
@@ -66,6 +67,7 @@
6667
# LLMs
6768
"_COST_QUANT",
6869
"_LOCAL_MODEL_PROVIDERS",
70+
"_ChatSession",
6971
"_DocumentLLM",
7072
"_DocumentLLMGroup",
7173
# Paragraphs and sentences
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""
2+
Abstract base layer for instance and LLM processor types.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
from abc import ABC, abstractmethod
8+
from typing import TYPE_CHECKING
9+
10+
from pydantic import ConfigDict, Field, PrivateAttr
11+
from ulid import ULID
12+
13+
from contextgem.internal.base.mixins import _PostInitCollectorMixin
14+
from contextgem.internal.base.serialization import _InstanceSerializer
15+
16+
17+
if TYPE_CHECKING:
18+
from contextgem.internal.data_models import (
19+
_LLMCostOutputContainer,
20+
_LLMUsageOutputContainer,
21+
)
22+
from contextgem.internal.typings.aliases import LLMRoleAny
23+
24+
25+
class _AbstractInstanceBase(_PostInitCollectorMixin, _InstanceSerializer, ABC):
26+
"""
27+
Abstract base for instance-like Pydantic models.
28+
"""
29+
30+
custom_data: dict = Field(
31+
default_factory=dict,
32+
description="A serializable dictionary for storing additional custom data "
33+
"related to the instance.",
34+
)
35+
36+
_unique_id: str = PrivateAttr(default_factory=lambda: str(ULID()))
37+
38+
model_config = ConfigDict(extra="forbid", validate_assignment=True)
39+
40+
@property
41+
def unique_id(self) -> str:
42+
"""
43+
Returns the ULID of the instance.
44+
"""
45+
return self._unique_id
46+
47+
48+
class _AbstractGenericLLMProcessor(_PostInitCollectorMixin, _InstanceSerializer, ABC):
49+
"""
50+
Abstract base for LLM-backed processors (single or grouped).
51+
"""
52+
53+
model_config = ConfigDict(extra="forbid", validate_assignment=True)
54+
55+
@property
56+
@abstractmethod
57+
def is_group(self) -> bool:
58+
"""
59+
Abstract property, to be implemented by subclasses.
60+
61+
Whether the LLM is a single instance or a group.
62+
"""
63+
pass
64+
65+
@property
66+
@abstractmethod
67+
def list_roles(self) -> list[LLMRoleAny]:
68+
"""
69+
Abstract property, to be implemented by subclasses.
70+
71+
Returns the list of all LLM roles in the LLM group or LLM.
72+
"""
73+
pass
74+
75+
@abstractmethod
76+
def _set_private_attrs(self) -> None:
77+
"""
78+
Abstract method, to be implemented by subclasses.
79+
80+
Sets private attributes for the LLM group or LLM, e.g. prompts, capabilities, etc.
81+
"""
82+
pass
83+
84+
@abstractmethod
85+
def get_usage(self, *args, **kwargs) -> list[_LLMUsageOutputContainer]:
86+
"""
87+
Abstract method, to be implemented by subclasses.
88+
89+
Returns the usage data for the LLM group or LLM.
90+
"""
91+
pass
92+
93+
@abstractmethod
94+
def get_cost(self, *args, **kwargs) -> list[_LLMCostOutputContainer]:
95+
"""
96+
Abstract method, to be implemented by subclasses.
97+
98+
Returns the cost data for the LLM group or LLM as a list of
99+
`_LLMCostOutputContainer` entries. Implementations may accept optional
100+
filter parameters (e.g., role) where applicable.
101+
"""
102+
pass
103+
104+
@abstractmethod
105+
def reset_usage_and_cost(self) -> None:
106+
"""
107+
Abstract method, to be implemented by subclasses.
108+
109+
Resets the usage and cost data for the LLM group or LLM. Implementations
110+
may support optional filters (e.g., by role) where applicable.
111+
"""
112+
pass

contextgem/internal/base/aspects.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ def __setattr__(self, name: str, value: Any) -> None:
121121
:type name: str
122122
:param value: The value to assign to the attribute
123123
:type value: Any
124+
:return: None
125+
:rtype: None
124126
"""
125127

126128
if name == "aspects":

contextgem/internal/base/images.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,7 @@ class _Image(_InstanceBase):
4141
..., description="The MIME type of the image."
4242
)
4343
base64_data: NonEmptyStr = Field(
44-
..., description="The base64-encoded data of the image."
44+
...,
45+
description="The base64-encoded data of the image.",
46+
repr=False, # do not show in repr due to the excessive base64 string length
4547
)

contextgem/internal/base/instances.py

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,13 @@
2727

2828
from __future__ import annotations
2929

30-
from abc import ABC
3130
from copy import deepcopy
3231
from typing import TYPE_CHECKING
3332

34-
from pydantic import ConfigDict, Field, PrivateAttr, field_validator
33+
from pydantic import field_validator
3534
from typing_extensions import Self
36-
from ulid import ULID
3735

38-
from contextgem.internal.base.mixins import _PostInitCollectorMixin
39-
from contextgem.internal.base.serialization import _InstanceSerializer
36+
from contextgem.internal.base.abstract import _AbstractInstanceBase
4037
from contextgem.internal.utils import _is_text_content_empty
4138

4239

@@ -45,7 +42,7 @@
4542
from contextgem.internal.base.concepts import _Concept
4643

4744

48-
class _InstanceBase(_PostInitCollectorMixin, _InstanceSerializer, ABC):
45+
class _InstanceBase(_AbstractInstanceBase):
4946
"""
5047
Base class that provides reusable methods for all instance-specific subclasses.
5148
@@ -54,16 +51,6 @@ class _InstanceBase(_PostInitCollectorMixin, _InstanceSerializer, ABC):
5451
for various instance types in the ContextGem framework.
5552
"""
5653

57-
custom_data: dict = Field(
58-
default_factory=dict,
59-
description="A serializable dictionary for storing additional custom data "
60-
"related to the instance.",
61-
)
62-
63-
_unique_id: str = PrivateAttr(default_factory=lambda: str(ULID()))
64-
65-
model_config = ConfigDict(extra="forbid", validate_assignment=True)
66-
6754
def clone(self) -> Self:
6855
"""
6956
Creates and returns a deep copy of the current instance.
@@ -85,13 +72,6 @@ def model_copy(self, *args, **kwargs):
8572
"""
8673
raise NotImplementedError("Use `clone()` instead")
8774

88-
@property
89-
def unique_id(self) -> str:
90-
"""
91-
Returns the ULID of the instance.
92-
"""
93-
return self._unique_id
94-
9575
@field_validator("raw_text", check_fields=False)
9676
@classmethod
9777
def _validate_raw_text(cls, raw_text: str | None) -> str | None:
@@ -130,15 +110,15 @@ def _validate_raw_text(cls, raw_text: str | None) -> str | None:
130110
)
131111
@classmethod
132112
def _validate_list_uniqueness(
133-
cls, instances: list[_InstanceBase]
134-
) -> list[_InstanceBase]:
113+
cls, instances: list[_AbstractInstanceBase]
114+
) -> list[_AbstractInstanceBase]:
135115
"""
136116
Validates that all elements in the provided list have unique IDs.
137117
138-
:param instances: List of `_InstanceBase` objects to validate.
139-
:type instances: list[_InstanceBase]
118+
:param instances: List of `_AbstractInstanceBase` objects to validate.
119+
:type instances: list[_AbstractInstanceBase]
140120
:return: The original list if all elements have unique IDs.
141-
:rtype: list[_InstanceBase]
121+
:rtype: list[_AbstractInstanceBase]
142122
:raises ValueError: If duplicate elements based on unique IDs are found in the list.
143123
"""
144124
ids: list[str] = [i.unique_id for i in instances]

0 commit comments

Comments
 (0)