diff --git a/.github/workflows/RavenClient.yml b/.github/workflows/RavenClient.yml
index 63407d8a..a3e4581a 100644
--- a/.github/workflows/RavenClient.yml
+++ b/.github/workflows/RavenClient.yml
@@ -25,12 +25,12 @@ jobs:
         RAVENDB_PYTHON_TEST_SERVER_CERTIFICATE_PATH: certs/server.pfx
         RAVENDB_PYTHON_TEST_CLIENT_CERTIFICATE_PATH: certs/python.pem
         RAVENDB_PYTHON_TEST_CA_PATH: /usr/local/share/ca-certificates/ca.crt
-        RAVENDB_PYTHON_TEST_HTTPS_SERVER_URL: https://localhost:7326
+        RAVENDB_PYTHON_TEST_HTTPS_SERVER_URL: https://localhost:8081
 
       strategy:
         matrix:
-          python-version: [ '3.9', '3.10' ,'3.11', '3.12']
-          serverVersion: [ '7.1' ]
+          python-version: [ '3.10' ,'3.11', '3.12', '3.13', '3.14']
+          serverVersion: [ '7.1', '7.2' ]
         fail-fast: false
 
       steps:
@@ -54,7 +54,7 @@ jobs:
 
       - run: mkdir certs
       - run: openssl genrsa -out certs/ca.key 2048
-      - run: openssl req -new -x509 -key certs/ca.key -out certs/ca.crt -subj "/C=US/ST=Arizona/L=Nevada/O=RavenDB Test CA/OU=RavenDB test CA/CN=localhost/emailAddress=ravendbca@example.com" -addext "basicConstraints = CA:TRUE" -addext "keyUsage = digitalSignature,keyCertSign"
+      - run: openssl req -new -x509 -key certs/ca.key -out certs/ca.crt -subj "/C=US/ST=Arizona/L=Nevada/O=RavenDB Test CA/OU=RavenDB test CA/CN=localhost/emailAddress=ravendbca@example.com" -addext "basicConstraints = critical, CA:TRUE" -addext "keyUsage = critical, digitalSignature, keyCertSign"
       - run: openssl genrsa -out certs/localhost.key 2048
       - run: openssl req -new  -key certs/localhost.key -out certs/localhost.csr -subj "/C=US/ST=Arizona/L=Nevada/O=RavenDB Test/OU=RavenDB test/CN=localhost/emailAddress=ravendb@example.com" -addext "subjectAltName = DNS:localhost"
       - run: openssl x509 -req -extensions ext -extfile cert/test_cert.conf -in certs/localhost.csr -CA certs/ca.crt -CAkey certs/ca.key -CAcreateserial -out certs/localhost.crt
@@ -85,11 +85,9 @@ jobs:
         run: mkdir RavenDB/Server/certs && cp certs/server.pfx RavenDB/Server/certs/
 
       - name: Install black linter
-        if: ${{ matrix.python-version != '3.7' }}
         run: pip install black
 
       - name: Check code format
-        if: ${{ matrix.python-version != '3.7' }}
         run: black --check .
 
       - name: Run tests
diff --git a/ravendb/documents/conventions.py b/ravendb/documents/conventions.py
index 95db4edd..5b19e86e 100644
--- a/ravendb/documents/conventions.py
+++ b/ravendb/documents/conventions.py
@@ -87,6 +87,7 @@ def __init__(self):
         self.wait_for_indexes_after_save_changes_timeout = timedelta(seconds=15)
         self.wait_for_replication_after_save_changes_timeout = timedelta(seconds=15)
         self.wait_for_non_stale_results_timeout = timedelta(seconds=15)
+        self.max_empty_lines_in_jsonl_stream = 100
 
         # Balancing
         self._load_balancer_context_seed: Optional[int] = None
diff --git a/ravendb/documents/indexes/time_series.py b/ravendb/documents/indexes/time_series.py
index f3ed65fb..bf4e961f 100644
--- a/ravendb/documents/indexes/time_series.py
+++ b/ravendb/documents/indexes/time_series.py
@@ -21,7 +21,6 @@
 from ravendb.documents.indexes.spatial.configuration import SpatialOptions, SpatialOptionsFactory
 from ravendb.primitives import constants
 
-
 _T_IndexDefinition = TypeVar("_T_IndexDefinition", bound=IndexDefinition)
 
 
diff --git a/ravendb/documents/operations/ai/abstract_ai_settings.py b/ravendb/documents/operations/ai/abstract_ai_settings.py
index b078f897..1cf58d79 100644
--- a/ravendb/documents/operations/ai/abstract_ai_settings.py
+++ b/ravendb/documents/operations/ai/abstract_ai_settings.py
@@ -3,8 +3,8 @@
 
 
 class AbstractAiSettings(ABC):
-    def __init__(self):
-        self.embeddings_max_concurrent_batches = None
+    def __init__(self, embeddings_max_concurrent_batches: int = None):
+        self.embeddings_max_concurrent_batches = embeddings_max_concurrent_batches
 
     @classmethod
     @abstractmethod
diff --git a/ravendb/documents/operations/ai/agents/add_or_update_ai_agent_operation.py b/ravendb/documents/operations/ai/agents/add_or_update_ai_agent_operation.py
index ed4d1491..34f0e4fa 100644
--- a/ravendb/documents/operations/ai/agents/add_or_update_ai_agent_operation.py
+++ b/ravendb/documents/operations/ai/agents/add_or_update_ai_agent_operation.py
@@ -9,7 +9,6 @@
 from ravendb.http.server_node import ServerNode
 import requests
 
-
 if TYPE_CHECKING:
     from ravendb.documents.operations.ai.agents.ai_agent_configuration import AiAgentConfiguration
 
diff --git a/ravendb/documents/operations/ai/agents/run_conversation_operation.py b/ravendb/documents/operations/ai/agents/run_conversation_operation.py
index 54dc293e..9f03afa5 100644
--- a/ravendb/documents/operations/ai/agents/run_conversation_operation.py
+++ b/ravendb/documents/operations/ai/agents/run_conversation_operation.py
@@ -11,7 +11,6 @@
 from ravendb.http.misc import ResponseDisposeHandling
 from ravendb.documents.ai.content_part import ContentPart
 
-
 TSchema = TypeVar("TSchema")
 
 
diff --git a/ravendb/documents/operations/ai/azure_open_ai_settings.py b/ravendb/documents/operations/ai/azure_open_ai_settings.py
index b636b0d8..d0bb8c91 100644
--- a/ravendb/documents/operations/ai/azure_open_ai_settings.py
+++ b/ravendb/documents/operations/ai/azure_open_ai_settings.py
@@ -12,8 +12,9 @@ def __init__(
         deployment_name: str = None,
         dimensions: int = None,
         temperature: float = None,
+        embeddings_max_concurrent_batches: int = None,
     ):
-        super().__init__(api_key, endpoint, model, dimensions, temperature)
+        super().__init__(api_key, endpoint, model, dimensions, temperature, embeddings_max_concurrent_batches)
         if deployment_name is None:
             raise ValueError("deployment_name cannot be None")
         self.deployment_name = deployment_name
@@ -27,6 +28,9 @@ def from_json(cls, json_dict: Dict[str, Any]) -> "AzureOpenAiSettings":
             dimensions=json_dict["Dimensions"] if "Dimensions" in json_dict else None,
             temperature=json_dict["Temperature"] if "Temperature" in json_dict else None,
             deployment_name=json_dict["DeploymentName"] if "DeploymentName" in json_dict else None,
+            embeddings_max_concurrent_batches=(
+                json_dict["EmbeddingsMaxConcurrentBatches"] if "EmbeddingsMaxConcurrentBatches" in json_dict else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
@@ -37,4 +41,5 @@ def to_json(self) -> Dict[str, Any]:
             "Dimensions": self.dimensions,
             "Temperature": self.temperature,
             "DeploymentName": self.deployment_name,
+            "EmbeddingsMaxConcurrentBatches": self.embeddings_max_concurrent_batches,
         }
diff --git a/ravendb/documents/operations/ai/embedded_settings.py b/ravendb/documents/operations/ai/embedded_settings.py
index 416dc63f..7a385538 100644
--- a/ravendb/documents/operations/ai/embedded_settings.py
+++ b/ravendb/documents/operations/ai/embedded_settings.py
@@ -4,12 +4,18 @@
 
 
 class EmbeddedSettings(AbstractAiSettings):
-    def __init__(self):
-        super().__init__()
+    def __init__(self, embeddings_max_concurrent_batches: int = None):
+        super().__init__(embeddings_max_concurrent_batches)
 
     @classmethod
     def from_json(cls, json_dict: Dict[str, Any]) -> "EmbeddedSettings":
-        return cls()
+        return cls(
+            embeddings_max_concurrent_batches=(
+                json_dict.get("EmbeddingsMaxConcurrentBatches")
+                if json_dict.get("EmbeddingsMaxConcurrentBatches")
+                else None
+            ),
+        )
 
     def to_json(self) -> Dict[str, Any]:
         return {"EmbeddingsMaxConcurrentBatches": self.embeddings_max_concurrent_batches}
diff --git a/ravendb/documents/operations/ai/google_settings.py b/ravendb/documents/operations/ai/google_settings.py
index 6aa72135..9cdce76e 100644
--- a/ravendb/documents/operations/ai/google_settings.py
+++ b/ravendb/documents/operations/ai/google_settings.py
@@ -11,9 +11,14 @@ class GoogleAiVersion(Enum):
 
 class GoogleSettings(AbstractAiSettings):
     def __init__(
-        self, model: str = None, api_key: str = None, ai_version: GoogleAiVersion = None, dimensions: int = None
+        self,
+        model: str = None,
+        api_key: str = None,
+        ai_version: GoogleAiVersion = None,
+        dimensions: int = None,
+        embeddings_max_concurrent_batches: int = None,
     ):
-        super().__init__()
+        super().__init__(embeddings_max_concurrent_batches)
         self.model = model
         self.api_key = api_key
         self.ai_version = ai_version
@@ -22,10 +27,13 @@ def __init__(
     @classmethod
     def from_json(cls, json_dict: Dict[str, Any]) -> "GoogleSettings":
         return cls(
-            model=json_dict["Model"],
-            api_key=json_dict["ApiKey"],
-            ai_version=GoogleAiVersion(json_dict["AiVersion"]),
-            dimensions=json_dict["Dimensions"],
+            model=json_dict["Model"] if "Model" in json_dict else None,
+            api_key=json_dict["ApiKey"] if "ApiKey" in json_dict else None,
+            ai_version=GoogleAiVersion(json_dict["AiVersion"]) if "AiVersion" in json_dict else None,
+            dimensions=json_dict["Dimensions"] if "Dimensions" in json_dict else None,
+            embeddings_max_concurrent_batches=(
+                json_dict["EmbeddingsMaxConcurrentBatches"] if "EmbeddingsMaxConcurrentBatches" in json_dict else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
diff --git a/ravendb/documents/operations/ai/hugging_face_settings.py b/ravendb/documents/operations/ai/hugging_face_settings.py
index e34b1d1b..a7b7a934 100644
--- a/ravendb/documents/operations/ai/hugging_face_settings.py
+++ b/ravendb/documents/operations/ai/hugging_face_settings.py
@@ -4,8 +4,14 @@
 
 
 class HuggingFaceSettings(AbstractAiSettings):
-    def __init__(self, api_key: str = None, model: str = None, endpoint: str = None):
-        super().__init__()
+    def __init__(
+        self,
+        api_key: str = None,
+        model: str = None,
+        endpoint: str = None,
+        embeddings_max_concurrent_batches: int = None,
+    ):
+        super().__init__(embeddings_max_concurrent_batches)
         self.api_key = api_key
         self.model = model
         self.endpoint = endpoint
@@ -13,9 +19,12 @@ def __init__(self, api_key: str = None, model: str = None, endpoint: str = None)
     @classmethod
     def from_json(cls, json_dict: Dict[str, Any]) -> "HuggingFaceSettings":
         return cls(
-            api_key=json_dict["ApiKey"],
-            model=json_dict["Model"],
-            endpoint=json_dict["Endpoint"],
+            api_key=json_dict["ApiKey"] if "ApiKey" in json_dict else None,
+            model=json_dict["Model"] if "Model" in json_dict else None,
+            endpoint=json_dict["Endpoint"] if "Endpoint" in json_dict else None,
+            embeddings_max_concurrent_batches=(
+                json_dict["EmbeddingsMaxConcurrentBatches"] if "EmbeddingsMaxConcurrentBatches" in json_dict else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
diff --git a/ravendb/documents/operations/ai/mistral_ai_settings.py b/ravendb/documents/operations/ai/mistral_ai_settings.py
index 5719349e..60ba71ca 100644
--- a/ravendb/documents/operations/ai/mistral_ai_settings.py
+++ b/ravendb/documents/operations/ai/mistral_ai_settings.py
@@ -4,8 +4,14 @@
 
 
 class MistralAiSettings(AbstractAiSettings):
-    def __init__(self, api_key: str = None, model: str = None, endpoint: str = None):
-        super().__init__()
+    def __init__(
+        self,
+        api_key: str = None,
+        model: str = None,
+        endpoint: str = None,
+        embeddings_max_concurrent_batches: int = None,
+    ):
+        super().__init__(embeddings_max_concurrent_batches)
         self.api_key = api_key
         self.model = model
         self.endpoint = endpoint
@@ -13,9 +19,12 @@ def __init__(self, api_key: str = None, model: str = None, endpoint: str = None)
     @classmethod
     def from_json(cls, json_dict: Dict[str, Any]) -> "MistralAiSettings":
         return cls(
-            api_key=json_dict["ApiKey"],
-            model=json_dict["Model"],
-            endpoint=json_dict["Endpoint"],
+            api_key=json_dict["ApiKey"] if "ApiKey" in json_dict else None,
+            model=json_dict["Model"] if "Model" in json_dict else None,
+            endpoint=json_dict["Endpoint"] if "Endpoint" in json_dict else None,
+            embeddings_max_concurrent_batches=(
+                json_dict["EmbeddingsMaxConcurrentBatches"] if "EmbeddingsMaxConcurrentBatches" in json_dict else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
diff --git a/ravendb/documents/operations/ai/ollama_settings.py b/ravendb/documents/operations/ai/ollama_settings.py
index cb5e93e0..4f72e15f 100644
--- a/ravendb/documents/operations/ai/ollama_settings.py
+++ b/ravendb/documents/operations/ai/ollama_settings.py
@@ -12,12 +12,11 @@ def __init__(
         temperature: float = None,
         embeddings_max_concurrent_batches: int = None,
     ):
-        super().__init__()
+        super().__init__(embeddings_max_concurrent_batches)
         self.uri = uri
         self.model = model
         self.think = think
         self.temperature = temperature
-        self.embeddings_max_concurrent_batches = embeddings_max_concurrent_batches
 
     @classmethod
     def from_json(cls, json_dict: Dict[str, Any]) -> "OllamaSettings":
diff --git a/ravendb/documents/operations/ai/open_ai_base_settings.py b/ravendb/documents/operations/ai/open_ai_base_settings.py
index abf70f53..c7eed1dc 100644
--- a/ravendb/documents/operations/ai/open_ai_base_settings.py
+++ b/ravendb/documents/operations/ai/open_ai_base_settings.py
@@ -11,8 +11,9 @@ def __init__(
         model: str = None,
         dimensions: int = None,
         temperature: float = None,
+        embeddings_max_concurrent_batches: int = None,
     ):
-        super().__init__()
+        super().__init__(embeddings_max_concurrent_batches)
         self.api_key = api_key
         self.endpoint = endpoint
         self.model = model
diff --git a/ravendb/documents/operations/ai/open_ai_settings.py b/ravendb/documents/operations/ai/open_ai_settings.py
index d5974c98..f099e2ad 100644
--- a/ravendb/documents/operations/ai/open_ai_settings.py
+++ b/ravendb/documents/operations/ai/open_ai_settings.py
@@ -13,8 +13,9 @@ def __init__(
         project_id: str = None,
         dimensions: int = None,
         temperature: float = None,
+        embeddings_max_concurrent_batches: int = None,
     ):
-        super().__init__(api_key, endpoint, model, dimensions, temperature)
+        super().__init__(api_key, endpoint, model, dimensions, temperature, embeddings_max_concurrent_batches)
         self.organization_id = organization_id
         self.project_id = project_id
 
@@ -28,6 +29,9 @@ def from_json(cls, json_dict: Dict[str, Any]) -> "OpenAiSettings":
             temperature=json_dict["Temperature"] if "Temperature" in json_dict else None,
             organization_id=json_dict["OrganizationId"] if "OrganizationId" in json_dict else None,
             project_id=json_dict["ProjectId"] if "ProjectId" in json_dict else None,
+            embeddings_max_concurrent_batches=(
+                json_dict["EmbeddingsMaxConcurrentBatches"] if "EmbeddingsMaxConcurrentBatches" in json_dict else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
diff --git a/ravendb/documents/operations/ai/vertex_settings.py b/ravendb/documents/operations/ai/vertex_settings.py
index 24a2713c..c9307e71 100644
--- a/ravendb/documents/operations/ai/vertex_settings.py
+++ b/ravendb/documents/operations/ai/vertex_settings.py
@@ -16,8 +16,9 @@ def __init__(
         google_credentials_json: Optional[str] = None,
         location: Optional[str] = None,
         ai_version: Optional[VertexAIVersion] = None,
+        embeddings_max_concurrent_batches: Optional[int] = None,
     ):
-        super().__init__()
+        super().__init__(embeddings_max_concurrent_batches)
         self.model = model
         self.google_credentials_json = google_credentials_json
         self.location = location
@@ -30,6 +31,11 @@ def from_json(cls, json_dict: Dict[str, Any]) -> "VertexSettings":
             google_credentials_json=json_dict.get("GoogleCredentialsJson"),
             location=json_dict.get("Location"),
             ai_version=VertexAIVersion(json_dict["AiVersion"]) if json_dict.get("AiVersion") else None,
+            embeddings_max_concurrent_batches=(
+                json_dict.get("EmbeddingsMaxConcurrentBatches")
+                if json_dict.get("EmbeddingsMaxConcurrentBatches")
+                else None
+            ),
         )
 
     def to_json(self) -> Dict[str, Any]:
diff --git a/ravendb/documents/operations/etl/configuration.py b/ravendb/documents/operations/etl/configuration.py
index f503a62a..205a4757 100644
--- a/ravendb/documents/operations/etl/configuration.py
+++ b/ravendb/documents/operations/etl/configuration.py
@@ -7,7 +7,6 @@
 from ravendb.documents.operations.etl.transformation import Transformation
 import ravendb.serverwide.server_operation_executor
 
-
 _T = TypeVar("_T", bound=ConnectionString)
 
 
diff --git a/ravendb/documents/operations/ongoing_tasks.py b/ravendb/documents/operations/ongoing_tasks.py
index 98fbed52..58845833 100644
--- a/ravendb/documents/operations/ongoing_tasks.py
+++ b/ravendb/documents/operations/ongoing_tasks.py
@@ -365,7 +365,9 @@ def get_raft_unique_request_id(self) -> str:
             return RaftIdGenerator.new_id()
 
 
-class GetOngoingTaskInfoOperation(MaintenanceOperation[OngoingTask]):
+class GetOngoingTaskInfoOperation(
+    MaintenanceOperation[Union[OngoingTask, OngoingTaskGenAi, OngoingTaskEmbeddingsGeneration]]
+):
     """
     Operation to retrieve detailed information about a specific ongoing task.
     Ongoing tasks include various types of tasks such as replication, ETL, backup, and subscriptions.
@@ -398,14 +400,16 @@ def __init__(self, task_id_or_name: Union[int, str], task_type: OngoingTaskType)
 
         self._task_type = task_type
 
-    def get_command(self, conventions: "DocumentConventions") -> RavenCommand[OngoingTask]:
+    def get_command(
+        self, conventions: "DocumentConventions"
+    ) -> RavenCommand[OngoingTask | OngoingTaskGenAi | OngoingTaskEmbeddingsGeneration]:
         if self._task_name is not None:
             return GetOngoingTaskInfoOperation._GetOngoingTaskInfoCommand(
                 task_name=self._task_name, task_type=self._task_type
             )
         return GetOngoingTaskInfoOperation._GetOngoingTaskInfoCommand(task_id=self._task_id, task_type=self._task_type)
 
-    class _GetOngoingTaskInfoCommand(RavenCommand[OngoingTask]):
+    class _GetOngoingTaskInfoCommand(RavenCommand[OngoingTask | OngoingTaskGenAi | OngoingTaskEmbeddingsGeneration]):
         def __init__(
             self,
             task_type: OngoingTaskType,
@@ -432,7 +436,9 @@ def set_response(self, response: Optional[str], from_cache: bool) -> None:
                 json_dict = json.loads(response)
                 self.result = self._deserialize_task(json_dict)
 
-        def _deserialize_task(self, json_dict: dict) -> OngoingTask:
+        def _deserialize_task(
+            self, json_dict: dict
+        ) -> OngoingTask | OngoingTaskGenAi | OngoingTaskEmbeddingsGeneration:
             """Deserialize the task based on its type."""
             if self._task_type == OngoingTaskType.GEN_AI:
                 return OngoingTaskGenAi.from_json(json_dict)
diff --git a/ravendb/documents/operations/revisions.py b/ravendb/documents/operations/revisions.py
index 242a64fa..a84731c1 100644
--- a/ravendb/documents/operations/revisions.py
+++ b/ravendb/documents/operations/revisions.py
@@ -15,7 +15,6 @@
 from ravendb.documents.session.entity_to_json import EntityToJsonStatic
 from ravendb.documents.conventions import DocumentConventions
 
-
 if TYPE_CHECKING:
     from ravendb.http.http_cache import HttpCache
     from ravendb import DocumentStore, ServerNode
diff --git a/ravendb/documents/queries/more_like_this.py b/ravendb/documents/queries/more_like_this.py
index 085447e8..76553486 100644
--- a/ravendb/documents/queries/more_like_this.py
+++ b/ravendb/documents/queries/more_like_this.py
@@ -6,7 +6,6 @@
 from ravendb.primitives import constants
 from ravendb.documents.session.tokens.query_tokens.definitions import MoreLikeThisToken
 
-
 _T = TypeVar("_T")
 
 if TYPE_CHECKING:
diff --git a/ravendb/documents/queries/vector.py b/ravendb/documents/queries/vector.py
index 7862b246..d3ebc109 100644
--- a/ravendb/documents/queries/vector.py
+++ b/ravendb/documents/queries/vector.py
@@ -1,22 +1,25 @@
+from __future__ import annotations
 import struct
-from typing import List, Tuple
+from typing import List
 
 
 class VectorQuantizer:
     @staticmethod
-    def to_int8(raw_embedding: List[float]) -> bytes:
+    def to_int8(raw_embedding: List[float]) -> List[int]:
         """
-        Converts a list of floats to a packed byte array of signed 8-bit integers (int8).
-        The maximum absolute value is appended as a 4-byte float at the end.
+        Converts a list of floats to a list of signed 8-bit integers (int8).
+        The returned list contains the quantized values followed by the
+        max_component float encoded as 4 signed bytes.
 
         Args:
             raw_embedding (List[float]): List of floating-point numbers to be quantized.
 
         Returns:
-            bytes: Packed byte array containing the quantized int8 values and the max component.
+            List[int]: List of signed integers (-128 to 127) representing the quantized vector.
+                      Includes both the quantized values and the 4 bytes of the max_component float.
         """
         if not raw_embedding:
-            return b""
+            return []
 
         # Find the maximum absolute value in the input array
         max_component: float = max(abs(x) for x in raw_embedding)
@@ -35,21 +38,24 @@ def to_int8(raw_embedding: List[float]) -> bytes:
         # Append the max_component as a little-endian float
         packed += struct.pack("<f", max_component)
 
-        return packed
+        # Convert to list of signed integers
+        return VectorQuantizer._bytes_to_int8_list(packed)
 
     @staticmethod
-    def to_int1(raw_embedding: List[float]) -> bytes:
+    def to_int1(raw_embedding: List[float]) -> List[int]:
         """
-        Converts a list of floats to a packed byte array of binary values (int1).
-        Each byte represents 8 consecutive float values, where each bit corresponds to
-        whether the float is non-negative (1) or negative (0).
+        Converts a list of floats to a list of binary values (0 or 1).
+        Each value in the input is converted to 1 if non-negative, 0 if negative.
 
         Args:
             raw_embedding (List[float]): List of floating-point numbers to be quantized.
 
         Returns:
-            bytes: Packed byte array containing the binary-packed values.
+            List[int]: List of 0s and 1s representing the binary-quantized vector.
         """
+        if not raw_embedding:
+            return []
+
         # Calculate the number of bytes needed to store the binary-packed values
         output_length: int = (len(raw_embedding) + 7) // 8
 
@@ -63,4 +69,43 @@ def to_int1(raw_embedding: List[float]) -> bytes:
                 bit_pos: int = 7 - (i % 8)  # Determine the bit position within the byte
                 bytes_list[byte_index] |= 1 << bit_pos  # Set the bit to 1 if the value is non-negative
 
-        return bytes(bytes_list)
+        # Convert to list of 0s and 1s
+        return VectorQuantizer._bytes_to_int1_list(bytes(bytes_list), len(raw_embedding))
+
+    @staticmethod
+    def _bytes_to_int8_list(packed_bytes: bytes) -> List[int]:
+        """
+        Protected method to convert packed bytes to a list of signed int8 values.
+        Includes all bytes (quantized values + max_component float bytes).
+
+        Args:
+            packed_bytes (bytes): Packed byte array from to_int8().
+
+        Returns:
+            List[int]: List of signed integers (-128 to 127).
+        """
+        if not packed_bytes:
+            return []
+
+        # Unpack ALL bytes as signed int8 (including the 4-byte max_component)
+        return list(struct.unpack("b" * len(packed_bytes), packed_bytes))
+
+    @staticmethod
+    def _bytes_to_int1_list(packed_bytes: bytes, original_length: int) -> List[int]:
+        """
+        Protected method to convert packed binary bytes to a list of 0s and 1s.
+
+        Args:
+            packed_bytes (bytes): Packed byte array from to_int1().
+            original_length (int): Original number of float values (to trim padding).
+
+        Returns:
+            List[int]: List of 0s and 1s.
+        """
+        result = []
+        for byte_val in packed_bytes:
+            for bit_pos in range(7, -1, -1):
+                result.append(1 if (byte_val & (1 << bit_pos)) else 0)
+
+        # Trim to original length (remove padding bits)
+        return result[:original_length]
diff --git a/ravendb/documents/session/document_session_operations/in_memory_document_session_operations.py b/ravendb/documents/session/document_session_operations/in_memory_document_session_operations.py
index 470945c9..c1802819 100644
--- a/ravendb/documents/session/document_session_operations/in_memory_document_session_operations.py
+++ b/ravendb/documents/session/document_session_operations/in_memory_document_session_operations.py
@@ -177,14 +177,14 @@ def __contains__(self, item):
             try:
                 return item in self.__documents_by_entity_hashable
             except TypeError as e:
-                if str(e.args[0]).startswith("unhashable type"):
+                if "unhashable type" in str(e.args[0]):
                     return item in self.__documents_by_entity_unhashable
                 raise e
         self.__create_on_before_store_documents_by_entity_if_needed()
         try:
             return item in self.__on_before_store_documents_by_entity_hashable
         except TypeError as e:
-            if str(e.args[0]).startswith("unhashable type"):
+            if "unhashable type" in str(e.args[0]):
                 return item in self.__documents_by_entity_unhashable
             raise e
 
@@ -193,7 +193,7 @@ def __setitem__(self, key, value):
             try:
                 self.__documents_by_entity_hashable[key] = value
             except TypeError as e:
-                if str(e.args[0]).startswith("unhashable type"):
+                if "unhashable type" in str(e.args[0]):
                     self.__documents_by_entity_unhashable[key] = value
                     return
                 raise e
@@ -202,7 +202,7 @@ def __setitem__(self, key, value):
         try:
             self.__on_before_store_documents_by_entity_hashable[key] = value
         except TypeError as e:
-            if str(e.args[0]).startswith("unhashable type"):
+            if "unhashable type" in str(e.args[0]):
                 self.__on_before_store_documents_by_entity_unhashable[key] = value
                 return
             raise e
diff --git a/ravendb/documents/session/operations/stream.py b/ravendb/documents/session/operations/stream.py
index bd7fa042..1f784449 100644
--- a/ravendb/documents/session/operations/stream.py
+++ b/ravendb/documents/session/operations/stream.py
@@ -11,7 +11,6 @@
 from ravendb.tools.utils import Utils
 from ravendb.primitives import constants
 
-
 if typing.TYPE_CHECKING:
     from ravendb.documents.session.document_session import InMemoryDocumentSessionOperations
 
@@ -74,7 +73,9 @@ def set_result(self, response: StreamResultResponse) -> Iterator[Dict]:
         if response is None or response.stream_iterator is None:
             raise IndexDoesNotExistException("The index does not exists, failed to stream results")
 
-        parser = JSONLRavenStreamParser(response.stream_iterator)
+        parser = JSONLRavenStreamParser(
+            response.stream_iterator, self._session.conventions.max_empty_lines_in_jsonl_stream
+        )
 
         if self._is_query_stream:
             self._handle_stream_query_stats(parser, self._statistics)
diff --git a/ravendb/documents/session/tokens/query_tokens/definitions.py b/ravendb/documents/session/tokens/query_tokens/definitions.py
index 9a50f2ec..2ff98f68 100644
--- a/ravendb/documents/session/tokens/query_tokens/definitions.py
+++ b/ravendb/documents/session/tokens/query_tokens/definitions.py
@@ -1009,7 +1009,10 @@ def __init__(
         is_exact: bool = VectorSearch.DEFAULT_IS_EXACT,
         task_name: str = None,
     ):
-        super().__init__(wrapped_field_name, WhereOperator.VECTOR_SEARCH, parameter_name)
+        where_options = WhereToken.WhereOptions()
+        where_options.exact = is_exact
+
+        super().__init__(wrapped_field_name, WhereOperator.VECTOR_SEARCH, parameter_name, where_options)
         self._source_quantization_type = source_quantization_type
         self._parameter_name = parameter_name
 
diff --git a/ravendb/serverwide/operations/common.py b/ravendb/serverwide/operations/common.py
index c3a436f9..bf27cedf 100644
--- a/ravendb/serverwide/operations/common.py
+++ b/ravendb/serverwide/operations/common.py
@@ -16,7 +16,6 @@
 from ravendb.util.util import RaftIdGenerator
 from ravendb.http.topology import RaftCommand
 
-
 if TYPE_CHECKING:
     from ravendb.http.server_node import ServerNode
     from ravendb.http.request_executor import RequestExecutor
diff --git a/ravendb/serverwide/operations/configuration.py b/ravendb/serverwide/operations/configuration.py
index 7b28f4c7..b98641d0 100644
--- a/ravendb/serverwide/operations/configuration.py
+++ b/ravendb/serverwide/operations/configuration.py
@@ -16,7 +16,6 @@
 from ravendb.tools.utils import Utils
 from ravendb.util.util import RaftIdGenerator
 
-
 if TYPE_CHECKING:
     from ravendb.documents.conventions import DocumentConventions
 
diff --git a/ravendb/tests/documents_tests/query_tests/test_vector_search.py b/ravendb/tests/documents_tests/query_tests/test_vector_search.py
index 19a125f1..e643a8f7 100644
--- a/ravendb/tests/documents_tests/query_tests/test_vector_search.py
+++ b/ravendb/tests/documents_tests/query_tests/test_vector_search.py
@@ -1,7 +1,128 @@
+import unittest
+from ravendb.documents.queries.vector import VectorQuantizer
 from ravendb.tests.dotnet_migrated_tests.test_ravenDB_22076 import Dto
 from ravendb.tests.test_base import TestBase
 
 
+class TestVectorQuantizer(unittest.TestCase):
+    """Unit tests for VectorQuantizer - no server required."""
+
+    def test_to_int8_basic_quantization(self):
+        """Test basic Int8 quantization matches C# output."""
+        # Test case from C# example: VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f })
+        # Expected output: [64, 127, -51, -52, 76, 62]
+        # Note: C# output shows [64, 127, ...] but our analysis showed it should be [63, 127, ...]
+        result = VectorQuantizer.to_int8([0.1, 0.2])
+
+        # Should have 2 quantized values + 4 bytes for max_component float
+        self.assertEqual(6, len(result))
+
+        # All values should be signed integers in range [-128, 127]
+        for val in result:
+            self.assertIsInstance(val, int)
+            self.assertGreaterEqual(val, -128)
+            self.assertLessEqual(val, 127)
+
+        # First value: 0.1 scaled by (127 / 0.2) = 63.5 -> 63
+        self.assertEqual(63, result[0])
+
+        # Second value: 0.2 scaled by (127 / 0.2) = 127
+        self.assertEqual(127, result[1])
+
+        # Last 4 bytes represent max_component (0.2) as little-endian float
+        # 0.2f in IEEE 754 little-endian: 0x3E4CCCCD = [-51, -52, 76, 62] as signed bytes
+        self.assertEqual([-51, -52, 76, 62], result[2:6])
+
+    def test_to_int8_second_example(self):
+        """Test second example from C# code."""
+        # VectorQuantizer.ToInt8(new float[] { 0.3f, 0.4f })
+        result = VectorQuantizer.to_int8([0.3, 0.4])
+
+        self.assertEqual(6, len(result))
+
+        # First value: 0.3 scaled by (127 / 0.4) = 95.25 -> 95
+        self.assertEqual(95, result[0])
+
+        # Second value: 0.4 scaled by (127 / 0.4) = 127
+        self.assertEqual(127, result[1])
+
+        # Last 4 bytes represent max_component (0.4) as little-endian float
+        # 0.4f in IEEE 754 little-endian: 0x3ECCCCCD = [-51, -52, -52, 62] as signed bytes
+        self.assertEqual([-51, -52, -52, 62], result[2:6])
+
+    def test_to_int8_negative_values(self):
+        """Test Int8 quantization with negative values."""
+        result = VectorQuantizer.to_int8([-0.5, 0.5])
+
+        self.assertEqual(6, len(result))
+
+        # First value: -0.5 scaled by (127 / 0.5) = -127
+        self.assertEqual(-127, result[0])
+
+        # Second value: 0.5 scaled by (127 / 0.5) = 127
+        self.assertEqual(127, result[1])
+
+    def test_to_int8_all_zeros(self):
+        """Test Int8 quantization with all zeros."""
+        result = VectorQuantizer.to_int8([0.0, 0.0, 0.0])
+
+        self.assertEqual(7, len(result))  # 3 quantized + 4 float bytes
+
+        # All quantized values should be 0
+        self.assertEqual([0, 0, 0], result[:3])
+
+        # max_component is 0.0, which is all zeros in IEEE 754
+        self.assertEqual([0, 0, 0, 0], result[3:7])
+
+    def test_to_int8_empty_list(self):
+        """Test Int8 quantization with empty list."""
+        result = VectorQuantizer.to_int8([])
+        self.assertEqual([], result)
+
+    def test_to_int1_basic_quantization(self):
+        """Test basic Int1 (binary) quantization."""
+        # Positive values -> 1, negative values -> 0
+        result = VectorQuantizer.to_int1([0.5, -0.3, 0.8, -0.1, 0.2])
+
+        self.assertEqual(5, len(result))
+        self.assertEqual([1, 0, 1, 0, 1], result)
+
+    def test_to_int1_zero_is_positive(self):
+        """Test that zero is treated as non-negative (1)."""
+        result = VectorQuantizer.to_int1([0.0, -0.0, 1.0, -1.0])
+
+        self.assertEqual(4, len(result))
+        # 0.0 and -0.0 are both >= 0, so they should be 1
+        self.assertEqual([1, 1, 1, 0], result)
+
+    def test_to_int1_all_positive(self):
+        """Test Int1 quantization with all positive values."""
+        result = VectorQuantizer.to_int1([0.1, 0.2, 0.3, 0.4, 0.5])
+
+        self.assertEqual(5, len(result))
+        self.assertEqual([1, 1, 1, 1, 1], result)
+
+    def test_to_int1_all_negative(self):
+        """Test Int1 quantization with all negative values."""
+        result = VectorQuantizer.to_int1([-0.1, -0.2, -0.3, -0.4, -0.5])
+
+        self.assertEqual(5, len(result))
+        self.assertEqual([0, 0, 0, 0, 0], result)
+
+    def test_to_int1_empty_list(self):
+        """Test Int1 quantization with empty list."""
+        result = VectorQuantizer.to_int1([])
+        self.assertEqual([], result)
+
+    def test_to_int1_padding_trimmed(self):
+        """Test that Int1 quantization trims padding bits correctly."""
+        # 9 values should pack into 2 bytes (16 bits), but only return 9 values
+        result = VectorQuantizer.to_int1([1.0] * 9)
+
+        self.assertEqual(9, len(result))
+        self.assertEqual([1] * 9, result)
+
+
 class TestVectorSearch(TestBase):
     def test_should_generate_rql_with_text_field_using_named_ai_task(self):
         with self.store.open_session() as session:
diff --git a/ravendb/tests/jvm_migrated_tests/issues_tests/test_ravenDB_16328.py b/ravendb/tests/jvm_migrated_tests/issues_tests/test_ravenDB_16328.py
index 0922c8bf..892b0053 100644
--- a/ravendb/tests/jvm_migrated_tests/issues_tests/test_ravenDB_16328.py
+++ b/ravendb/tests/jvm_migrated_tests/issues_tests/test_ravenDB_16328.py
@@ -10,7 +10,6 @@
 from ravendb.serverwide.operations.sorters import PutServerWideSortersOperation, DeleteServerWideSorterOperation
 from ravendb.tests.test_base import TestBase
 
-
 sorter_code = (
     "using System;\n"
     "using System.Collections.Generic;\n"
diff --git a/ravendb/tools/parsers.py b/ravendb/tools/parsers.py
index fa8ff9d4..abcbbf24 100644
--- a/ravendb/tools/parsers.py
+++ b/ravendb/tools/parsers.py
@@ -1,4 +1,5 @@
 import json
+from json import JSONDecodeError
 from typing import Any, Iterator, Optional, Dict
 from decimal import InvalidOperation
 
@@ -92,7 +93,7 @@ def create_array(self, gen):
         raise ParseError("End array expected, but the generator ended before we got it")
 
     def get_value(self, gen):
-        (token, val) = next(gen)
+        token, val = next(gen)
 
         return self.get_value_from_token(gen, token, val)
 
@@ -118,9 +119,9 @@ def create_object(self, gen):
 
     def next_object(self) -> Optional[Dict[str, Any]]:
         try:
-            (_, text) = next(self.lexer)
+            _, text = next(self.lexer)
             if IS_WEBSOCKET and text == ",":
-                (_, text) = next(self.lexer)
+                _, text = next(self.lexer)
         except StopIteration:
             return None
 
@@ -128,7 +129,7 @@ def next_object(self) -> Optional[Dict[str, Any]]:
             raise ParseError("Expected start object, got: " + text)
 
         gen = IncrementalJsonParser.parse_object(self.lexer)
-        (token, val) = next(gen)
+        token, val = next(gen)
         assert token == "start_map"
 
         return self.create_object(gen)
@@ -249,16 +250,34 @@ def unescape(s):
 
 
 class JSONLRavenStreamParser:
-    def __init__(self, stream: Iterator):
+    def __init__(self, stream: Iterator, max_empty_lines: int = 100):
         self._stream = stream
         self._unused_buffer: Optional[Dict] = None
+        self._max_empty_lines = max_empty_lines
 
     def _get_next_json_dict(self) -> Dict:
-        return (
-            self._unused_buffer
-            if self._unused_buffer is not None
-            else json.loads(self._stream.__next__().decode("utf-8"))
-        )
+        if self._unused_buffer is not None:
+            return self._unused_buffer
+
+        empty_line_count = 0
+        while True:
+            json_data = self._stream.__next__().decode("utf-8").strip()
+
+            # Skip empty lines (standard in JSONL format)
+            if not json_data:
+                empty_line_count += 1
+                if empty_line_count >= self._max_empty_lines:
+                    raise RuntimeError(
+                        f"Received {self._max_empty_lines} consecutive empty lines in JSONL stream. "
+                        f"You can configure the maximum number of empty lines to ignore "
+                        f"using DocumentConventions.max_empty_lines_in_jsonl_stream."
+                    )
+                continue
+
+            try:
+                return json.loads(json_data)
+            except JSONDecodeError as e:
+                raise RuntimeError(f"Failed to parse JSON from stream: {json_data}") from e
 
     def purge_cache(self) -> None:
         self._unused_buffer = None