mechanism added

absadiki · absadiki · commit dbedd1647104 · 2023-05-01T23:53:14.000-04:00
diff --git a/README.md b/README.md
@@ -110,7 +110,7 @@ from pyllamacpp.model import Model
 
 model = Model(ggml_model='./models/gpt4all-model.bin')
 for token in model.generate("Tell me a joke ?"):
-    print(token, end='')
+    print(token, end='', flush=True)
 ```
 
 ### Interactive Dialogue
@@ -126,8 +126,8 @@ while True:
         if prompt == '':
             continue
         print(f"AI:", end='')
-        for tok in model.generate(prompt):
-            print(f"{tok}", end='', flush=True)
+        for token in model.generate(prompt):
+            print(f"{token}", end='', flush=True)
         print()
     except KeyboardInterrupt:
         break
@@ -149,43 +149,24 @@ Bob: Welcome! I'm here to assist you with anything you need. What can I do for y
 prompt_prefix = "\nUser:"
 prompt_suffix = "\nBob:"
 
-model = Model(model_path='/path/to/ggml/model', 
-              prompt_context=prompt_context, 
+model = Model(model_path='/path/to/ggml/model',
+              prompt_context=prompt_context,
               prompt_prefix=prompt_prefix,
               prompt_suffix=prompt_suffix)
 
-sequence = ''
-stop_word = prompt_prefix.strip()
-
 while True:
-    try:
-        prompt = input("You: ")
-        if prompt == '':
-            continue
-        print(f"AI: ", end='')
-        for token in model.generate(prompt):
-            if token == '\n':
-                sequence += token
-                continue
-            if len(sequence) != 0:
-                if stop_word.startswith(sequence.strip()):
-                    sequence += token
-                    if sequence.strip() == stop_word:
-                        sequence = ''
-                        break
-                    else:
-                        continue
-                else:
-                    print(f"{sequence}", end='', flush=True)
-                    sequence = ''
-            print(f"{token}", end='', flush=True)
-
-        print()
-    except KeyboardInterrupt:
-        break
+  try:
+    prompt = input("User: ")
+    if prompt == '':
+      continue
+    print(f"Bob: ", end='')
+    for token in model.generate(prompt, antiprompt='User:'):
+      print(f"{token}", end='', flush=True)
+      print()
+  except KeyboardInterrupt:
+    break
 ```
 
-
 # API reference
 You can check the [API reference documentation](https://abdeladim-s.github.io/pyllamacpp/) for more details.
 
diff --git a/pyllamacpp/cli.py b/pyllamacpp/cli.py
@@ -201,28 +201,14 @@ def run(args):
     print("...")
     print("[+] Press Ctrl+C to Stop ... ")
     print("...")
-    sequence = ''
+
     while True:
         try:
             prompt = input("You: ")
             if prompt == '':
                 continue
             print(f"{bcolors.OKBLUE}AI: {bcolors.ENDC}", end='', flush=True)
-            for token in model.generate(prompt, **gpt_params):
-                if token == '\n':
-                    sequence += token
-                    continue
-                if len(sequence) != 0:
-                    if PROMPT_PREFIX.strip().startswith(sequence.strip()):
-                        sequence += token
-                        if sequence.strip() == PROMPT_PREFIX.strip():
-                            sequence = ''
-                            break
-                        else:
-                            continue
-                    else:
-                        print(f"{sequence}", end='', flush=True)
-                        sequence = ''
+            for token in model.generate(prompt, antiprompt=PROMPT_PREFIX.strip(), **gpt_params):
                 print(f"{bcolors.OKCYAN}{token}{bcolors.ENDC}", end='', flush=True)
             print()
         except KeyboardInterrupt:
diff --git a/pyllamacpp/model.py b/pyllamacpp/model.py
@@ -28,7 +28,7 @@ class Model:
     ```python
     from pyllamacpp.model import Model
 
-    model = Model(ggml_model='./models/ggml-model-f16-q4_0.bin')
+    model = Model(ggml_model='path/to/ggml/model')
     for token in model.generate("Tell me a joke ?"):
         print(token, end='', flush=True)
     ```
@@ -113,6 +113,7 @@ def reset(self):
     def generate(self,
                  prompt: str,
                  n_predict: Union[None, int] = None,
+                 antiprompt: str = None,
                  infinite_generation: bool = False,
                  n_threads: int = 4,
                  repeat_last_n: int = 64,
@@ -126,6 +127,8 @@ def generate(self,
         :param prompt: The prompt :)
         :param n_predict: if n_predict is not None, the inference will stop if it reaches `n_predict` tokens, otherwise
                           it will continue until `EOS`
+        :param antiprompt: aka the stop word, the generation will stop if this word is predicted,
+                           keep it None to handle it in your own way
         :param infinite_generation: set it to `True` to make the generation go infinitely
         :param n_threads: The number of CPU threads
         :param repeat_last_n: last n tokens to penalize
@@ -156,6 +159,9 @@ def generate(self,
             self._last_n_tokens.append(tok)
 
         n_remain = 0
+        if antiprompt is not None:
+            sequence_queue = []
+            stop_word = antiprompt.strip()
 
         while infinite_generation or predicted_token != pp.llama_token_eos():
             if len(predicted_tokens) > 0:
@@ -178,6 +184,23 @@ def generate(self,
 
             predicted_tokens.append(predicted_token)
             token_str = pp.llama_token_to_str(self._ctx, predicted_token)
+            if antiprompt is not None:
+                if token_str == '\n':
+                    sequence_queue.append(token_str)
+                    continue
+                if len(sequence_queue) != 0:
+                    if stop_word.startswith(''.join(sequence_queue).strip()):
+                        sequence_queue.append(token_str)
+                        if ''.join(sequence_queue).strip() == stop_word:
+                            break
+                        else:
+                            continue
+                    else:
+                        # consume sequence queue tokens
+                        while len(sequence_queue) != 0:
+                            yield sequence_queue.pop(0)
+                        sequence_queue = []
+
             self._last_n_tokens.pop(0)
             self._last_n_tokens.append(predicted_token)
             yield token_str
diff --git a/setup.py b/setup.py
@@ -131,7 +131,7 @@ def build_extension(self, ext: CMakeExtension) -> None:
 # logic and declaration, and simpler if you include description/version in a file.
 setup(
     name="pyllamacpp",
-    version="2.1.0",
+    version="2.1.1",
     author="Abdeladim Sadiki",
     description="Python bindings for llama.cpp",
     long_description=long_description,