Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/madengine/mad.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,8 @@ def main():
parser_database_update_table.set_defaults(func=update_table)
# Database subcommand uploading to MongoDB
parser_database_upload_mongodb = subparsers_database.add_parser('upload-mongodb', description="Update table in DB.", help='Update table in DB')
parser_database_upload_mongodb.add_argument('--csv-file-path', type=str, default='perf_entry.csv', help='Path to the csv file')
parser_database_upload_mongodb.add_argument('--csv-file-path', type=str, default='perf_entry.csv', help='Path to the csv file (for legacy perf.csv)')
parser_database_upload_mongodb.add_argument('--json-file-path', type=str, default=None, help='Path to the json file (for perf_entry_super.json)')
parser_database_upload_mongodb.add_argument("--database-name", type=str, required=True, help="Name of the MongoDB database")
parser_database_upload_mongodb.add_argument("--collection-name", type=str, required=True, help="Name of the MongoDB collection")
parser_database_upload_mongodb.set_defaults(func=upload_mongodb)
Expand Down
83 changes: 82 additions & 1 deletion src/madengine/tools/run_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@
from madengine.core.constants import MODEL_DIR, PUBLIC_GITHUB_ROCM_KEY
from madengine.core.timeout import Timeout
from madengine.tools.update_perf_csv import update_perf_csv
from madengine.tools.update_perf_super import update_perf_super_json
from madengine.tools.csv_to_html import convert_csv_to_html
from madengine.tools.discover_models import DiscoverModels
from madengine.utils.config_parser import ConfigParser


class RunDetails:
Expand Down Expand Up @@ -83,6 +85,7 @@ class RunDetails:
data_download_duration (str): The duration of data download.
build_number (str): The CI build number.
additional_docker_run_options (str): The additional options used for docker run.
configs (dict or list or None): The configuration data from config files.
"""

# Avoiding @property for ease of code, add if needed.
Expand Down Expand Up @@ -112,6 +115,7 @@ def __init__(self):
self.data_download_duration = ""
self.build_number = ""
self.additional_docker_run_options = ""
self.configs = None

def print_perf(self):
"""Print the performance results of a model.
Expand All @@ -133,13 +137,37 @@ def generate_json(self, json_name: str, multiple_results: bool = False) -> None:
Raises:
Exception: An error occurred while generating JSON file for performance results of a model.
"""
# Exclude configs from CSV workflow as it can contain list/dict values
# that cause issues with pandas DataFrame creation
keys_to_exclude = (
{"model", "performance", "metric", "status"} if multiple_results else {}
{"model", "performance", "metric", "status", "configs"} if multiple_results
else {"configs"}
)
attributes = vars(self)
output_dict = {x: attributes[x] for x in attributes if x not in keys_to_exclude}
with open(json_name, "w") as outfile:
json.dump(output_dict, outfile)

def generate_super_json(self, json_name: str, multiple_results: bool = False) -> None:
"""Generate enhanced JSON file with config data for performance results.

This method is similar to generate_json but includes the configs field
for perf_entry_super.json generation.

Args:
json_name (str): The name of the JSON file.
multiple_results (bool): The status of multiple results. Default is False.

Raises:
Exception: An error occurred while generating JSON file for performance results of a model.
"""
keys_to_exclude = (
{"model", "performance", "metric", "status"} if multiple_results else {}
)
attributes = vars(self)
output_dict = {x: attributes[x] for x in attributes if x not in keys_to_exclude}
with open(json_name, "w") as outfile:
json.dump(output_dict, outfile, indent=2)


class RunModels:
Expand Down Expand Up @@ -915,6 +943,17 @@ def run_model(self, model_info: typing.Dict) -> bool:
# Taking gpu arch from context assumes the host image and container have the same gpu arch.
# Environment variable updates for MAD Public CI
run_details.gpu_architecture = self.context.ctx["docker_env_vars"]["MAD_SYSTEM_GPU_ARCHITECTURE"]

# Parse and load config file if present in args for perf_entry_super.json
try:
config_parser = ConfigParser(scripts_base_dir=os.path.dirname(model_info.get("scripts", "")))
run_details.configs = config_parser.parse_and_load(
model_info["args"],
model_info.get("scripts", "")
)
except Exception as e:
print(f"Warning: Could not parse config file: {e}")
run_details.configs = None

# Check the setting of shared memory size
if "SHM_SIZE" in self.context.ctx:
Expand Down Expand Up @@ -955,6 +994,14 @@ def run_model(self, model_info: typing.Dict) -> bool:
# generate exception for testing
run_details.generate_json("perf_entry.json")
update_perf_csv(exception_result="perf_entry.json", perf_csv=self.args.output)

# Generate perf_entry_super.json
run_details.generate_super_json("perf_entry_super.json")
update_perf_super_json(
exception_result="perf_entry_super.json",
perf_super_json="perf_entry_super.json",
scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
)
else:
print(
f"Running model {run_details.model} on {run_details.gpu_architecture} architecture."
Expand Down Expand Up @@ -1056,12 +1103,30 @@ def run_model(self, model_info: typing.Dict) -> bool:
model_name=run_details.model,
common_info="common_info.json",
)

# Generate perf_entry_super.json
run_details.generate_super_json("common_info_super.json", multiple_results=True)
update_perf_super_json(
multiple_results=model_info['multiple_results'],
perf_super_json="perf_entry_super.json",
model_name=run_details.model,
common_info="common_info_super.json",
scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
)
else:
run_details.generate_json("perf_entry.json")
update_perf_csv(
single_result="perf_entry.json",
perf_csv=self.args.output,
)

# Generate perf_entry_super.json
run_details.generate_super_json("perf_entry_super.json")
update_perf_super_json(
single_result="perf_entry_super.json",
perf_super_json="perf_entry_super.json",
scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
)

self.return_status &= (run_details.status == 'SUCCESS')

Expand All @@ -1078,6 +1143,14 @@ def run_model(self, model_info: typing.Dict) -> bool:
exception_result="perf_entry.json",
perf_csv=self.args.output,
)

# Generate perf_entry_super.json
run_details.generate_super_json("perf_entry_super.json")
update_perf_super_json(
exception_result="perf_entry_super.json",
perf_super_json="perf_entry_super.json",
scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
)

except Exception as e:
self.return_status = False
Expand All @@ -1092,6 +1165,14 @@ def run_model(self, model_info: typing.Dict) -> bool:
exception_result="perf_entry.json",
perf_csv=self.args.output,
)

# Generate perf_entry_super.json
run_details.generate_super_json("perf_entry_super.json")
update_perf_super_json(
exception_result="perf_entry_super.json",
perf_super_json="perf_entry_super.json",
scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
)

return self.return_status

Expand Down
31 changes: 28 additions & 3 deletions src/madengine/tools/update_perf_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,18 @@ def perf_entry_dict_to_csv(perf_entry: typing.Dict) -> None:
perf_entry: The performance entry dictionary.
"""
flatten_tags(perf_entry)
js_df = pd.DataFrame(perf_entry, index=[0])

# Convert any non-scalar values (list/dict) to JSON strings
# to avoid DataFrame creation errors when values don't match index length
perf_entry_safe = {}
for key, value in perf_entry.items():
if isinstance(value, (list, dict)):
# Convert lists and dicts to JSON strings
perf_entry_safe[key] = json.dumps(value) if value is not None else None
else:
perf_entry_safe[key] = value

js_df = pd.DataFrame(perf_entry_safe, index=[0])
perf_entry_df_to_csv(js_df)


Expand Down Expand Up @@ -116,15 +127,29 @@ def handle_multiple_results(
row = common_info_json.copy()
model = r.pop("model")
row["model"] = model_name + "_" + str(model)
row.update(r)

# Only extract essential result columns for perf.csv
# The full details with all metrics are preserved in perf_entry_super.json
row["performance"] = r.get("performance")
row["metric"] = r.get("metric")

if row["performance"] is not None and pd.notna(row["performance"]):
row["status"] = "SUCCESS"
else:
row["status"] = "FAILURE"

# Convert any non-scalar values (list/dict) to JSON strings
# to avoid DataFrame creation errors when values don't match index length
row_safe = {}
for key, value in row.items():
if isinstance(value, (list, dict)):
# Convert lists and dicts to JSON strings
row_safe[key] = json.dumps(value) if value is not None else None
else:
row_safe[key] = value

final_multiple_results_df = pd.concat(
[final_multiple_results_df, pd.DataFrame(row, index=[0])], ignore_index=True
[final_multiple_results_df, pd.DataFrame(row_safe, index=[0])], ignore_index=True
)
# Reorder columns according to existing perf csv
columns = perf_csv_df.columns.tolist()
Expand Down
Loading