11general :
2- name : " fmbench-openai- gemini-bedrock models "
2+ name : " fmbench-opus-sonnet-gpt- gemini-llama "
33 model_name : " openai, gemini & bedrock models"
44
55# AWS and SageMaker settings
@@ -40,15 +40,15 @@ s3_read_data:
4040 - narrativeqa.jsonl
4141 - triviaqa_e.jsonl
4242 - triviaqa.jsonl
43- tokenizer_prefix : llama3_tokenizer # # add the tokenizer.json and config.json from your specific tokenizer type
43+ tokenizer_prefix : tokenizer
4444 prompt_template_dir : prompt_template
4545 prompt_template_file : prompt_template_claude.txt # # add your desired prompt template type
4646
4747# # section that enables container to run notebooks and python scripts automatically
4848run_steps :
4949 0_setup.ipynb : yes
5050 1_generate_data.ipynb : yes
51- 2_deploy_model.ipynb : no
51+ 2_deploy_model.ipynb : yes
5252 3_run_inference.ipynb : yes
5353 4_get_evaluations.ipynb : yes
5454 5_model_metric_analysis.ipynb : yes
@@ -114,10 +114,60 @@ inference_parameters:
114114
115115# Model configurations for openAI models
116116experiments :
117- - name : gemini/gemini-1.5-pro
118- model_name : gemini/gemini-1.5-pro
119- ep_name : gemini/gemini-1.5-pro
120- instance_type : gemini/gemini-1.5-pro
117+ - name : anthropic.claude-3-opus-20240229-v1:0
118+ # model_id is interpreted in conjunction with the deployment_script, so if you
119+ # use a JumpStart model id then set the deployment_script to jumpstart.py.
120+ # if deploying directly from HuggingFace this would be a HuggingFace model id
121+ # see the DJL serving deployment script in the code repo for reference.
122+ model_id : anthropic.claude-3-opus-20240229-v1:0
123+ model_version :
124+ model_name : anthropic.claude-3-opus-20240229-v1:0
125+ ep_name : anthropic.claude-3-opus-20240229-v1:0
126+ instance_type : anthropic.claude-3-opus-20240229-v1:0
127+ image_uri :
128+ deploy : no
129+ # FMBench comes packaged with multiple deployment scripts, such as scripts for JumpStart
130+ # scripts for deploying using DJL DeepSpeed, tensorRT etc. You can also add your own.
131+ # See repo for details
132+ instance_count :
133+ deployment_script :
134+ # FMBench comes packaged with multiple inference scripts, such as scripts for SageMaker
135+ # and Bedrock. You can also add your own. See repo for details
136+ inference_script : bedrock_predictor.py
137+ inference_spec :
138+ split_input_and_parameters : no
139+ # this should match one of the sections in the inference_parameters section above
140+ parameter_set : bedrock
141+ # to stream responses, set stream to true. Enter the start and stop token for the
142+ # Time To First Token, Time To Last Token, and Time Per Output Token (TTFT, TTLT, TPOT)
143+ # metrics to be calculated. The responses from bedrock stream is received in chunks, so mention
144+ # the stop token only.
145+ stream : False
146+ start_token :
147+ stop_token : " <|eot_id|>"
148+ # runs are done for each combination of payload file and concurrency level
149+ payload_files :
150+ - payload_en_1-500.jsonl
151+ - payload_en_500-1000.jsonl
152+ - payload_en_1000-2000.jsonl
153+ - payload_en_2000-3000.jsonl
154+ - payload_en_3000-4000.jsonl
155+ - payload_en_4000-5000.jsonl
156+ - payload_en_5000-6000.jsonl
157+ # concurrency level refers to number of requests sent in parallel to an endpoint
158+ # the next set of requests is sent once responses for all concurrent requests have
159+ # been received.
160+
161+ # for streaming responses on bedrock, only a concurrency of 1 is supported on FMBench
162+ concurrency_levels :
163+ - 1
164+ # Environment variables to be passed to the container
165+ # this is not a fixed list, you can add more parameters as applicable.
166+ env :
167+ - name : gpt-4o-mini
168+ model_name : gpt-4o-mini
169+ ep_name : gpt-4o-mini
170+ instance_type : gpt-4o-mini
121171 instance_count :
122172 inference_script : external_predictor.py
123173 inference_spec :
@@ -134,10 +184,10 @@ experiments:
134184 concurrency_levels :
135185 - 1
136186 env :
137- - name : gemini/gemini-1.5-flash
138- model_name : gemini/gemini-1.5-flash
139- ep_name : gemini/gemini-1.5-flash
140- instance_type : gemini/gemini-1.5-flash
187+ - name : gpt-4o
188+ model_name : gpt-4o
189+ ep_name : gpt-4o
190+ instance_type : gpt-4o
141191 instance_count :
142192 inference_script : external_predictor.py
143193 inference_spec :
@@ -154,10 +204,10 @@ experiments:
154204 concurrency_levels :
155205 - 1
156206 env :
157- - name : gpt-4o-mini
158- model_name : gpt-4o-mini
159- ep_name : gpt-4o-mini
160- instance_type : gpt-4o-mini
207+ - name : gemini/gemini-1.5-pro
208+ model_name : gemini/gemini-1.5-pro
209+ ep_name : gemini/gemini-1.5-pro
210+ instance_type : gemini/gemini-1.5-pro
161211 instance_count :
162212 inference_script : external_predictor.py
163213 inference_spec :
@@ -174,10 +224,10 @@ experiments:
174224 concurrency_levels :
175225 - 1
176226 env :
177- - name : gpt-4o
178- model_name : gpt-4o
179- ep_name : gpt-4o
180- instance_type : gpt-4o
227+ - name : gemini/gemini-1.5-flash
228+ model_name : gemini/gemini-1.5-flash
229+ ep_name : gemini/gemini-1.5-flash
230+ instance_type : gemini/gemini-1.5-flash
181231 instance_count :
182232 inference_script : external_predictor.py
183233 inference_spec :
@@ -194,6 +244,56 @@ experiments:
194244 concurrency_levels :
195245 - 1
196246 env :
247+ - name : anthropic.claude-3-5-sonnet-20240620-v1:0
248+ # model_id is interpreted in conjunction with the deployment_script, so if you
249+ # use a JumpStart model id then set the deployment_script to jumpstart.py.
250+ # if deploying directly from HuggingFace this would be a HuggingFace model id
251+ # see the DJL serving deployment script in the code repo for reference.
252+ model_id : anthropic.claude-3-5-sonnet-20240620-v1:0
253+ model_version :
254+ model_name : anthropic.claude-3-5-sonnet-20240620-v1:0
255+ ep_name : anthropic.claude-3-5-sonnet-20240620-v1:0
256+ instance_type : anthropic.claude-3-5-sonnet-20240620-v1:0
257+ image_uri :
258+ deploy : no
259+ # FMBench comes packaged with multiple deployment scripts, such as scripts for JumpStart
260+ # scripts for deploying using DJL DeepSpeed, tensorRT etc. You can also add your own.
261+ # See repo for details
262+ instance_count :
263+ deployment_script :
264+ # FMBench comes packaged with multiple inference scripts, such as scripts for SageMaker
265+ # and Bedrock. You can also add your own. See repo for details
266+ inference_script : bedrock_predictor.py
267+ inference_spec :
268+ split_input_and_parameters : no
269+ # this should match one of the sections in the inference_parameters section above
270+ parameter_set : bedrock
271+ # to stream responses, set stream to true. Enter the start and stop token for the
272+ # Time To First Token, Time To Last Token, and Time Per Output Token (TTFT, TTLT, TPOT)
273+ # metrics to be calculated. The responses from bedrock stream is received in chunks, so mention
274+ # the stop token only.
275+ stream : False
276+ start_token :
277+ stop_token : " <|eot_id|>"
278+ # runs are done for each combination of payload file and concurrency level
279+ payload_files :
280+ - payload_en_1-500.jsonl
281+ - payload_en_500-1000.jsonl
282+ - payload_en_1000-2000.jsonl
283+ - payload_en_2000-3000.jsonl
284+ - payload_en_3000-4000.jsonl
285+ - payload_en_4000-5000.jsonl
286+ - payload_en_5000-6000.jsonl
287+ # concurrency level refers to number of requests sent in parallel to an endpoint
288+ # the next set of requests is sent once responses for all concurrent requests have
289+ # been received.
290+
291+ # for streaming responses on bedrock, only a concurrency of 1 is supported on FMBench
292+ concurrency_levels :
293+ - 1
294+ # Environment variables to be passed to the container
295+ # this is not a fixed list, you can add more parameters as applicable.
296+ env :
197297 # Experiment for claude 3 sonnet
198298 - name : anthropic.claude-3-sonnet-20240229-v1:0
199299 model_name : anthropic.claude-3-sonnet-20240229-v1:0
@@ -329,7 +429,6 @@ experiments:
329429 # concurrency level refers to number of requests sent in parallel to an endpoint
330430 # the next set of requests is sent once responses for all concurrent requests have
331431 # been received.
332-
333432 # for streaming responses on bedrock, only a concurrency of 1 is supported on FMBench
334433 concurrency_levels :
335434 - 1
0 commit comments