Skip to content

Commit 10ee786

Browse files
authored
Replace with IPEX-LLM in example comments (intel#10671)
* Replace with IPEX-LLM in example comments * More replacement * revert some changes
1 parent 08018a1 commit 10ee786

File tree

159 files changed

+183
-183
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

159 files changed

+183
-183
lines changed

python/llm/example/CPU/Applications/streaming-llm/streaming_llm/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import urllib.request
4949
import os
5050
import json
51-
# code change to import from bigdl-llm API instead of using transformers API
51+
# code change to import from IPEX-LLM API instead of using transformers API
5252
from ipex_llm.transformers import AutoModelForCausalLM
5353
from transformers import LlamaTokenizer
5454
import intel_extension_for_pytorch as ipex

python/llm/example/CPU/Deepspeed-AutoTP/deepspeed_autotp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
replace_method="auto"
8888
)
8989

90-
# Apply BigDL-LLM INT4 optimizations on transformers
90+
# Apply IPEX-LLM INT4 optimizations on transformers
9191
model = optimize_model(model.module.to(f'cpu'), low_bit='sym_int4')
9292

9393
model = model.to(f'cpu:{local_rank}')
@@ -111,7 +111,7 @@
111111
# if your selected model is capable of utilizing previous key/value attentions
112112
# to enhance decoding speed, but has `"use_cache": false` in its model config,
113113
# it is important to set `use_cache=True` explicitly in the `generate` function
114-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
114+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
115115
output = model.generate(input_ids,
116116
do_sample=False,
117117
max_new_tokens=args.n_predict)

python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
# if your selected model is capable of utilizing previous key/value attentions
6060
# to enhance decoding speed, but has `"use_cache": false` in its model config,
6161
# it is important to set `use_cache=True` explicitly in the `generate` function
62-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
62+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6363
output = model.generate(input_ids,
6464
max_new_tokens=args.n_predict)
6565
end = time.time()

python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
model_path = args.model
4646

47-
# Load gguf model and vocab, then convert them to bigdl-llm model and huggingface tokenizer
47+
# Load gguf model and vocab, then convert them to IPEX-LLM model and huggingface tokenizer
4848
model, tokenizer = AutoModelForCausalLM.from_gguf(model_path, low_bit = args.low_bit,)
4949

5050
# Generate predicted tokens

python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
# if your selected model is capable of utilizing previous key/value attentions
6161
# to enhance decoding speed, but has `"use_cache": false` in its model config,
6262
# it is important to set `use_cache=True` explicitly in the `generate` function
63-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
63+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6464
output = model.generate(input_ids,
6565
max_new_tokens=args.n_predict)
6666
end = time.time()

python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# if your selected model is capable of utilizing previous key/value attentions
5757
# to enhance decoding speed, but has `"use_cache": false` in its model config,
5858
# it is important to set `use_cache=True` explicitly in the `generate` function
59-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
59+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6060
output = model.generate(input_ids,
6161
max_new_tokens=args.n_predict)
6262
end = time.time()

python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# if your selected model is capable of utilizing previous key/value attentions
5757
# to enhance decoding speed, but has `"use_cache": false` in its model config,
5858
# it is important to set `use_cache=True` explicitly in the `generate` function
59-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
59+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6060
output = model.generate(input_ids,
6161
max_new_tokens=args.n_predict)
6262
end = time.time()

python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# if your selected model is capable of utilizing previous key/value attentions
5757
# to enhance decoding speed, but has `"use_cache": false` in its model config,
5858
# it is important to set `use_cache=True` explicitly in the `generate` function
59-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
59+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6060
output = model.generate(input_ids,
6161
max_new_tokens=args.n_predict)
6262
end = time.time()

python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
# if your selected model is capable of utilizing previous key/value attentions
4646
# to enhance decoding speed, but has `"use_cache": false` in its model config,
4747
# it is important to set `use_cache=True` explicitly in the `generate` function
48-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
48+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
4949
model = AutoModelForCausalLM.from_pretrained(model_path,
5050
load_in_4bit=True,
5151
trust_remote_code=True,

python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# if your selected model is capable of utilizing previous key/value attentions
5757
# to enhance decoding speed, but has `"use_cache": false` in its model config,
5858
# it is important to set `use_cache=True` explicitly in the `generate` function
59-
# to obtain optimal performance with BigDL-LLM INT4 optimizations
59+
# to obtain optimal performance with IPEX-LLM INT4 optimizations
6060
output = model.generate(input_ids,
6161
max_new_tokens=args.n_predict)
6262
end = time.time()

0 commit comments

Comments
 (0)