apattnay
diff --git a/‎python/llm/example/CPU/Applications/streaming-llm/streaming_llm/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/Applications/streaming-llm/streaming_llm/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/Deepspeed-AutoTP/deepspeed_autotp.py‎
Lines changed: 2 additions & 2 deletions b/‎python/llm/example/CPU/Deepspeed-AutoTP/deepspeed_autotp.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/generate.py‎
Lines changed: 1 addition & 1 deletion
@@ -48,7 +48,7 @@
 import urllib.request
 import os
 import json
-# code change to import from bigdl-llm API instead of using transformers API
+# code change to import from IPEX-LLM API instead of using transformers API
 from ipex_llm.transformers import AutoModelForCausalLM
 from transformers import LlamaTokenizer
 import intel_extension_for_pytorch as ipex
 
@@ -87,7 +87,7 @@
         replace_method="auto"
     )
 
-    # Apply BigDL-LLM INT4 optimizations on transformers
+    # Apply IPEX-LLM INT4 optimizations on transformers
     model = optimize_model(model.module.to(f'cpu'), low_bit='sym_int4')
 
     model = model.to(f'cpu:{local_rank}')
@@ -111,7 +111,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 do_sample=False,
                                 max_new_tokens=args.n_predict)
 
@@ -59,7 +59,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()
 
@@ -44,7 +44,7 @@
 
     model_path = args.model
 
-    # Load gguf model and vocab, then convert them to bigdl-llm model and huggingface tokenizer
+    # Load gguf model and vocab, then convert them to IPEX-LLM model and huggingface tokenizer
     model, tokenizer = AutoModelForCausalLM.from_gguf(model_path, low_bit = args.low_bit,)
 
     # Generate predicted tokens
 
@@ -60,7 +60,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()
 
@@ -56,7 +56,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()
 
@@ -56,7 +56,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()
 
@@ -56,7 +56,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()
 
@@ -45,7 +45,7 @@
     # if your selected model is capable of utilizing previous key/value attentions
     # to enhance decoding speed, but has `"use_cache": false` in its model config,
     # it is important to set `use_cache=True` explicitly in the `generate` function
-    # to obtain optimal performance with BigDL-LLM INT4 optimizations
+    # to obtain optimal performance with IPEX-LLM INT4 optimizations
     model = AutoModelForCausalLM.from_pretrained(model_path,
                                                  load_in_4bit=True,
                                                  trust_remote_code=True,
 
@@ -56,7 +56,7 @@
         # if your selected model is capable of utilizing previous key/value attentions
         # to enhance decoding speed, but has `"use_cache": false` in its model config,
         # it is important to set `use_cache=True` explicitly in the `generate` function
-        # to obtain optimal performance with BigDL-LLM INT4 optimizations
+        # to obtain optimal performance with IPEX-LLM INT4 optimizations
         output = model.generate(input_ids,
                                 max_new_tokens=args.n_predict)
         end = time.time()