Skip to content

Commit 5accc93

Browse files
committed
fix bug with endpoint metrics
1 parent 1ed2595 commit 5accc93

File tree

7 files changed

+103
-38
lines changed

7 files changed

+103
-38
lines changed

src/fmbench/3_run_inference.ipynb

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
"from fmbench.globals import * \n",
7171
"from datetime import datetime\n",
7272
"from datetime import timezone\n",
73+
"from datetime import timedelta\n",
7374
"from transformers import AutoTokenizer\n",
7475
"from sagemaker.predictor import Predictor\n",
7576
"import importlib.resources as pkg_resources\n",
@@ -494,7 +495,15 @@
494495
" sys.modules[module_name] = inference_module\n",
495496
" spec.loader.exec_module(inference_module)\n",
496497
" # create a predictor from each endpoint in experiments\n",
497-
" return inference_module.create_predictor(ep_name, inference_spec)"
498+
" metadata: Optional[Dict] = None\n",
499+
" if ep_info[0].get('endpoint'):\n",
500+
" production_variants = ep_info[0].get('endpoint').get(\"ProductionVariants\")\n",
501+
" if production_variants is not None:\n",
502+
" variant_name = production_variants[0].get(\"VariantName\")\n",
503+
" metadata = dict(variant_name=variant_name)\n",
504+
" logger.info(f\"ep_name={ep_name}, variant_name={variant_name}\")\n",
505+
" logger.info(f\"ep_name={ep_name}, metadata={metadata}\")\n",
506+
" return inference_module.create_predictor(ep_name, inference_spec, metadata)"
498507
]
499508
},
500509
{
@@ -611,6 +620,9 @@
611620
"\n",
612621
"# dataframe list to hold metrics for each endpoint\n",
613622
"df_ep_metrics_list = []\n",
623+
"# list for holding predictors and run start and end timestamp\n",
624+
"# because cloud watch metrics are available after a 1-minute delay\n",
625+
"predictors_and_metrics_timestamp_list = []\n",
614626
"\n",
615627
"for e_idx, experiment in enumerate(config['experiments']):\n",
616628
" # Start timer for the experiment \n",
@@ -626,7 +638,8 @@
626638
" prompt_tokens_total: int = 0\n",
627639
" completion_tokens_total: int = 0\n",
628640
" for concurrency, payload_file, split_payload in combination_data:\n",
629-
" experiment_at_concurrency_start_dttm = datetime.now()\n",
641+
" # track time at minute boundaries\n",
642+
" experiment_at_concurrency_start_dttm = datetime.utcnow().replace(second=0, microsecond=0)\n",
630643
" for chunk_index, chunk in enumerate(split_payload):\n",
631644
" logger.info(f\"experiment_index={e_idx+1}/{num_experiments}, \"\n",
632645
" f\"concurrency={concurrency}, payload_file={payload_file}, \"\n",
@@ -666,17 +679,18 @@
666679
" METRICS_PER_INFERENCE_DIR,\n",
667680
" response_file_name)\n",
668681
" # save endpoint metrics\n",
669-
" df_ep_metrics = predictor.get_metrics(experiment_at_concurrency_start_dttm,\n",
670-
" datetime.now())\n",
671-
" if df_ep_metrics is not None:\n",
672-
" # we want concurrency after timestamp, endpoint name\n",
673-
" df_ep_metrics.insert(loc=2,\n",
674-
" column='instance_type',\n",
675-
" value=experiment['instance_type'])\n",
676-
" df_ep_metrics.insert(loc=3,\n",
677-
" column='concurrency',\n",
678-
" value=concurrency)\n",
679-
" df_ep_metrics_list.append(df_ep_metrics)\n",
682+
" experiment_at_concurrency_end_dttm = datetime.utcnow().replace(second=0, microsecond=0)\n",
683+
" # if the endtime and start time are in the same minute then move the endtime to the next\n",
684+
" # minute otherwise cloudwatch would return an empty resonse\n",
685+
" time_delta_in_seconds = (experiment_at_concurrency_end_dttm - experiment_at_concurrency_start_dttm).seconds\n",
686+
" if time_delta_in_seconds < 60:\n",
687+
" experiment_at_concurrency_end_dttm += timedelta(seconds=60)\n",
688+
"\n",
689+
" predictors_and_metrics_timestamp_list.append((predictor,\n",
690+
" experiment_at_concurrency_start_dttm,\n",
691+
" experiment_at_concurrency_end_dttm,\n",
692+
" concurrency,\n",
693+
" experiment['instance_type']))\n",
680694
"\n",
681695
" # Experiment done, stopping the timer for this given experiment\n",
682696
" experiment_end_time = time.perf_counter()\n",
@@ -715,6 +729,37 @@
715729
" f\"duration={experiment_duration:.6f} seconds, exp_cost={exp_cost:.6f}, done\")"
716730
]
717731
},
732+
{
733+
"cell_type": "code",
734+
"execution_count": null,
735+
"metadata": {},
736+
"outputs": [],
737+
"source": [
738+
"# add a 1-minute sleep to be able to grab the CW metrics from the last run\n",
739+
"sleep_time: int = 60\n",
740+
"logger.info(f\"going to sleep for {sleep_time}s before querying metrics from the endpoint\")\n",
741+
"time.sleep(sleep_time)\n",
742+
"logger.info(f\"after sleep for {sleep_time}s before querying metrics from the endpoint\")\n",
743+
"\n",
744+
"for predictor, \\\n",
745+
" experiment_at_concurrency_start_dttm, \\\n",
746+
" experiment_at_concurrency_end_dttm, \\\n",
747+
" concurrency, \\\n",
748+
" instance_type in predictors_and_metrics_timestamp_list:\n",
749+
" # save endpoint metrics\n",
750+
" df_ep_metrics = predictor.get_metrics(experiment_at_concurrency_start_dttm,\n",
751+
" experiment_at_concurrency_end_dttm)\n",
752+
" if df_ep_metrics is not None:\n",
753+
" # we want concurrency after timestamp, endpoint name\n",
754+
" df_ep_metrics.insert(loc=2,\n",
755+
" column='instance_type',\n",
756+
" value=instance_type)\n",
757+
" df_ep_metrics.insert(loc=3,\n",
758+
" column='concurrency',\n",
759+
" value=concurrency)\n",
760+
" df_ep_metrics_list.append(df_ep_metrics)"
761+
]
762+
},
718763
{
719764
"cell_type": "code",
720765
"execution_count": null,

src/fmbench/configs/llama3/8b/config-llama3-8b-instruct-p5-djl-lmi-dist.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ s3_read_data:
4545
## section that enables container to run notebooks and python scripts automatically
4646
run_steps:
4747
0_setup.ipynb: yes
48-
1_generate_data.ipynb: no
48+
1_generate_data.ipynb: yes
4949
2_deploy_model.ipynb: yes
5050
3_run_inference.ipynb: yes
5151
4_model_metric_analysis.ipynb: yes
52-
5_cleanup.ipynb: no
52+
5_cleanup.ipynb: yes
5353

5454

5555
datasets:

src/fmbench/scripts/bedrock_predictor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class BedrockPredictor(FMBenchPredictor):
2929
# overriding abstract method
3030
def __init__(self,
3131
endpoint_name: str,
32-
inference_spec: Optional[Dict]):
32+
inference_spec: Optional[Dict],
33+
metadata: Optional[Dict]):
3334
try:
3435
# initialize private member variables
3536
self._endpoint_name = endpoint_name
@@ -221,9 +222,9 @@ def inference_parameters(self) -> Dict:
221222
top_p=self._top_p)
222223

223224

224-
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict]):
225+
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict], metadata: Optional[Dict]):
225226
if endpoint_name in EMBEDDING_MODELS:
226227
logger.error(f"embeddings models not supported for now")
227228
return None
228229
else:
229-
return BedrockPredictor(endpoint_name, inference_spec)
230+
return BedrockPredictor(endpoint_name, inference_spec, metadata)

src/fmbench/scripts/fmbench_predictor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class FMBenchPredictor(ABC):
99
@abstractmethod
1010
def __init__(self,
1111
endpoint_name: str,
12-
inference_spec: Optional[Dict]):
12+
inference_spec: Optional[Dict],
13+
metadata: Optional[Dict]):
1314
pass
1415

1516
@abstractmethod

src/fmbench/scripts/rest_predictor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ class RESTPredictor(FMBenchPredictor):
2020
# overriding abstract method
2121
def __init__(self,
2222
endpoint_name: str,
23-
inference_spec: Optional[Dict]):
23+
inference_spec: Optional[Dict],
24+
metadata: Optional[Dict]):
2425
try:
2526
self._endpoint_name: str = endpoint_name
2627
self._inference_spec: Dict = inference_spec
@@ -123,5 +124,5 @@ def inference_parameters(self) -> Dict:
123124
"""The inference parameters property."""
124125
return self._inference_spec.get("parameters")
125126

126-
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict]):
127-
return RESTPredictor(endpoint_name, inference_spec)
127+
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict], metadata: Optional[Dict]):
128+
return RESTPredictor(endpoint_name, inference_spec, metadata)

src/fmbench/scripts/sagemaker_metrics.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
logger = logging.getLogger(__name__)
1515

1616
def _get_endpoint_utilization_metrics(endpoint_name: str,
17+
variant_name: str,
1718
start_time: datetime,
1819
end_time: datetime,
1920
period : int = 60) -> pd.DataFrame:
@@ -42,6 +43,8 @@ def _get_endpoint_utilization_metrics(endpoint_name: str,
4243
namespace = "/aws/sagemaker/Endpoints"
4344

4445
for metric_name in metrics:
46+
logger.debug(f"_get_endpoint_utilization_metrics, endpoint_name={endpoint_name}, variant_name={variant_name}, "
47+
f"metric_name={metric_name}, start_time={start_time}, end_time={end_time}")
4548
response = client.get_metric_statistics(
4649
Namespace=namespace,
4750
MetricName=metric_name,
@@ -52,15 +55,15 @@ def _get_endpoint_utilization_metrics(endpoint_name: str,
5255
},
5356
{
5457
'Name': 'VariantName',
55-
'Value': 'AllTraffic'
58+
'Value': variant_name
5659
}
5760
],
5861
StartTime=start_time,
5962
EndTime=end_time,
6063
Period=period,
6164
Statistics=['Average'] # You can also use 'Sum', 'Minimum', 'Maximum', 'SampleCount'
6265
)
63-
66+
logger.debug(response)
6467
for datapoint in response['Datapoints']:
6568
data.append({
6669
'EndpointName': endpoint_name,
@@ -82,6 +85,7 @@ def _get_endpoint_utilization_metrics(endpoint_name: str,
8285

8386

8487
def _get_endpoint_invocation_metrics(endpoint_name: str,
88+
variant_name: str,
8589
start_time: datetime,
8690
end_time: datetime,
8791
period : int = 60):
@@ -114,6 +118,8 @@ def _get_endpoint_invocation_metrics(endpoint_name: str,
114118
stat = 'Average'
115119
else:
116120
stat = 'Sum'
121+
logger.debug(f"_get_endpoint_invocation_metrics, endpoint_name={endpoint_name}, variant_name={variant_name}, "
122+
f"metric_name={metric_name}, start_time={start_time}, end_time={end_time}")
117123
# Get metric data for the specified metric
118124
response = client.get_metric_data(
119125
MetricDataQueries=[
@@ -130,7 +136,7 @@ def _get_endpoint_invocation_metrics(endpoint_name: str,
130136
},
131137
{
132138
'Name': 'VariantName',
133-
'Value': 'AllTraffic'
139+
'Value': variant_name
134140
}
135141
]
136142
},
@@ -143,7 +149,7 @@ def _get_endpoint_invocation_metrics(endpoint_name: str,
143149
StartTime=start_time,
144150
EndTime=end_time
145151
)
146-
152+
logger.debug(response)
147153
# Extract the data points from the response
148154
timestamps = response['MetricDataResults'][0]['Timestamps']
149155
values = response['MetricDataResults'][0]['Values']
@@ -169,6 +175,7 @@ def _get_endpoint_invocation_metrics(endpoint_name: str,
169175

170176

171177
def get_endpoint_metrics(endpoint_name: str,
178+
variant_name: str,
172179
start_time: datetime,
173180
end_time: datetime,
174181
period: int = 60):
@@ -188,26 +195,31 @@ def get_endpoint_metrics(endpoint_name: str,
188195
endpoint_metrics_df: Optional[pd.DataFrame] = None
189196
try:
190197
logger.info(f"get_endpoint_metrics, going to retrieve endpoint utlization metrics for "
191-
f"endpoint={endpoint_name}")
198+
f"endpoint={endpoint_name}, variant_name={variant_name}, start_time={start_time}, "
199+
f"end_time={end_time}, period={period}")
192200
utilization_metrics_df = _get_endpoint_utilization_metrics(endpoint_name=endpoint_name,
201+
variant_name=variant_name,
193202
start_time=start_time,
194203
end_time=end_time,
195204
period=period)
196-
logger.info(f"get_endpoint_metrics, going to retrieve invocation metrics for "
197-
f"endpoint={endpoint_name}")
198-
invocation_metrics_df = _get_endpoint_invocation_metrics(endpoint_name=endpoint_name,
199-
start_time=start_time,
200-
end_time=end_time,
201-
period=period)
205+
logger.info(f"get_endpoint_metrics, going to retrieve endpoint invocation metrics for "
206+
f"endpoint={endpoint_name}, variant_name={variant_name}, start_time={start_time}, "
207+
f"end_time={end_time}, period={period}")
208+
invocation_metrics_df = _get_endpoint_invocation_metrics(endpoint_name=endpoint_name,
209+
variant_name=variant_name,
210+
start_time=start_time,
211+
end_time=end_time,
212+
period=period)
202213

203214
endpoint_metrics_df = pd.merge(utilization_metrics_df,
204215
invocation_metrics_df,
205216
on=['Timestamp', 'EndpointName'],
206217
how='outer')
207218
logger.info(f"get_endpoint_metrics, shape of invocation and utilization metrics for "
208219
f"endpoint={endpoint_name} is {endpoint_metrics_df.shape}")
220+
logger.info(f"get_endpoint_metrics, endpoint_metrics_df={endpoint_metrics_df.head()}")
209221
except Exception as e:
210222
logger.error(f"get_endpoint_metrics, exception occured while retrieving metrics for {endpoint_name}, "
211223
f"exception={e}")
212-
224+
213225
return endpoint_metrics_df

src/fmbench/scripts/sagemaker_predictor.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,15 @@ class SageMakerPredictor(FMBenchPredictor):
2121
# overriding abstract method
2222
def __init__(self,
2323
endpoint_name: str,
24-
inference_spec: Optional[Dict]):
24+
inference_spec: Optional[Dict],
25+
metadata: Optional[Dict]):
2526
self._predictor: Optional[sagemaker.base_predictor.Predictor] = None
2627
self._endpoint_name: str = endpoint_name
2728
self._inference_spec: Dict = inference_spec
29+
self._variant_name: Optional[str] = None
30+
if metadata is not None:
31+
self._variant_name = metadata.get("variant_name")
32+
2833
try:
2934
# Create a SageMaker Predictor object
3035
self._predictor = Predictor(
@@ -115,13 +120,13 @@ def get_metrics(self,
115120
start_time: datetime,
116121
end_time: datetime,
117122
period: int = 60) -> pd.DataFrame:
118-
return get_endpoint_metrics(self._endpoint_name, start_time, end_time)
123+
return get_endpoint_metrics(self._endpoint_name, self._variant_name, start_time, end_time)
119124

120125
@property
121126
def inference_parameters(self) -> Dict:
122127
"""The inference parameters property."""
123128
return self._inference_spec.get("parameters")
124129

125130

126-
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict]):
127-
return SageMakerPredictor(endpoint_name, inference_spec)
131+
def create_predictor(endpoint_name: str, inference_spec: Optional[Dict], metadata: Optional[Dict]):
132+
return SageMakerPredictor(endpoint_name, inference_spec, metadata)

0 commit comments

Comments
 (0)