diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index f04744dcd5f..9011f8aec6d 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -59,19 +59,29 @@ def main(): ) path_data_dir = os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/data") - # Generate the JSON overviews and detail markdown pages. + # Generate the JSON overviews if args.eessi: - modules = modules_eesi() + modules = modules_eessi() else: - modules = modules_ugent() + modules, paths = modules_ugent() + print(paths) print(modules) print("Generate JSON overview... ", end="", flush=True) generate_json_overview(modules, path_data_dir) print("Done!") + + # Generate the JSON detail + json_data = generate_json_detailed_data(modules) + if args.eessi: + json_data = json_data + else: + json_data = get_extra_info_ugent(json_data, paths) print("Generate JSON detailed... ", end="", flush=True) - json_path = generate_json_detailed(modules, path_data_dir) + json_path = generate_json_detailed(json_data, path_data_dir) print("Done!") + + # Generate detail markdown pages print("Generate detailed pages... ", end="", flush=True) generate_detail_pages(json_path, os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/detail")) print("Done!") @@ -168,6 +178,35 @@ def module_whatis(name: str) -> dict: return whatis +def module_info(info: str) -> dict: + """ + Function to parse through lua file. + + @param info: String with the contents of the lua file. + """ + whatis = {} + data = np.array(info.split("\n")) + # index of start description to handle multi lined description + i = np.flatnonzero(np.char.startswith(data, "whatis([==[Description"))[0] + if np.char.endswith(data[i], "]==])"): + content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=data[i]).strip('"') + else: + description = re.sub(pattern=r'whatis\(\[==\[(.*)', repl='\\1', string=data[i]).strip('"') + while not np.char.endswith(data[i], "]==])"): + i += 1 + description += data[i] + content = re.sub(pattern=r'(.*)\]==\]\)', repl='\\1', string=description).strip('"') + key, value = tuple(content.split(":", maxsplit=1)) + whatis[key.strip()] = value.strip() + + for line in data[np.char.startswith(data, "whatis")]: + if not np.char.startswith(line, "whatis([==[Description"): + content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=line).strip('"') + key, value = tuple(content.split(":", maxsplit=1)) + whatis[key.strip()] = value.strip() + return whatis + + # -------------------------------------------------------------------------------------------------------- # Fetch data EESSI # -------------------------------------------------------------------------------------------------------- @@ -199,7 +238,7 @@ def clusters_eessi() -> np.ndarray: return clusters -def modules_eesi() -> dict: +def modules_eessi() -> dict: """ Returns names of all software module that are installed on EESSI. They are grouped by cluster. @@ -250,6 +289,15 @@ def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray: ] +def filter_fn_gent_software_path(data: np.ndarray) -> np.ndarray: + """ + Filter function for the software path of the cluster + @param data: Output + @return: Filtered output + """ + return data[np.char.endswith(data, "/modules/all:")] + + def clusters_ugent() -> np.ndarray: """ Returns all the cluster names of the HPC at UGent. @@ -259,6 +307,45 @@ def clusters_ugent() -> np.ndarray: return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) +def get_extra_info_ugent(json_data, paths) -> dict: + """ + add a list of extentions to all modules with extensions + @return: Dictionary with all the modules and their site_packages + """ + modules = json_data['software'] + for software in modules: + for mod in modules[software]['versions']: + cluster = modules[software]['versions'][mod]['clusters'][0] + if software == "Java": + # Java has a strange naming sceme which causes probplems + continue + if mod in ["imkl/2020.4.304-NVHPC-21.2"]: + base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" + elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', + 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', + 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', + 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', + 'Triton/1.1.1-foss-2022a-CUDA-11.7.0']: + base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/" + elif cluster == "donphan": + base_path = "/apps/gent/RHEL8/cascadelake-ib/modules/all/" + elif cluster == "joltik": + base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" + else: + base_path = paths[cluster][0][:-1] + "/" + path = base_path + mod + ".lua" + file = open(path, "r") + info = file.read() + if info != "": + whatis = module_info(info) + json_data['software'][software]['description'] = whatis['Description'] + if "Homepage" in whatis.keys(): + json_data['software'][software]['homepage'] = whatis['Homepage'] + if "Extensions" in whatis.keys(): + json_data["software"][software]["versions"][mod]["extensions"] = whatis['Extensions'] + return json_data + + def modules_ugent() -> dict: """ Returns names of all software module that are installed on the HPC on UGent. @@ -267,15 +354,17 @@ def modules_ugent() -> dict: """ print("Start collecting modules:") data = {} + mapping = {} for cluster in clusters_ugent(): print(f"\t Collecting available modules for {cluster}... ", end="", flush=True) module_swap(cluster) cluster_name = cluster.split("/", maxsplit=1)[1] + mapping[cluster_name] = module_avail(filter_fn=filter_fn_gent_software_path) data[cluster_name] = module_avail(filter_fn=filter_fn_gent_modules) print(f"found {len(data[cluster_name])} modules!") print("All data collected!\n") - return data + return data, mapping # -------------------------------------------------------------------------------------------------------- @@ -343,7 +432,7 @@ def generate_software_table_data(software_data: dict, clusters: list) -> list: row = [module_name] for cluster in clusters: - row += ("x" if cluster in available else "-") + row += ("x" if cluster in available["clusters"] else "-") table_data += row return table_data @@ -370,6 +459,13 @@ def generate_software_detail_page( filename = f"{path}/{software_name}.md" md_file = MdUtils(file_name=filename, title=f"{software_name}") + if 'description' in software_data.keys(): + description = software_data['description'] + md_file.new_paragraph(f"{description}") + if 'homepage' in software_data.keys(): + homepage = software_data['homepage'] + md_file.new_paragraph(f"{homepage}") + md_file.new_header(level=1, title="Available modules") md_file.new_paragraph(f"The overview below shows which {software_name} installations are available per HPC-UGent " @@ -386,6 +482,13 @@ def generate_software_detail_page( text=generate_software_table_data(sorted_versions, clusters) ) + for version, details in list(sorted_versions.items())[::-1]: + if 'extensions' in details: + md_file.new_paragraph(f"### {version}") + md_file.new_paragraph("This is a list of extensions included in the module:") + packages = details['extensions'] + md_file.new_paragraph(f"{packages}") + md_file.create_md_file() # Remove the TOC @@ -570,20 +673,20 @@ def generate_json_detailed_data(modules: dict) -> dict: # If the version is not yet present, add it. if mod not in json_data["software"][software]["versions"]: - json_data["software"][software]["versions"][mod] = [] + json_data["software"][software]["versions"][mod] = {'clusters': []} # If the cluster is not yet present, add it. if cluster not in json_data["software"][software]["clusters"]: json_data["software"][software]["clusters"].append(cluster) # If the cluster is not yet present, add it. - if cluster not in json_data["software"][software]["versions"][mod]: - json_data["software"][software]["versions"][mod].append(cluster) + if cluster not in json_data["software"][software]["versions"][mod]["clusters"]: + json_data["software"][software]["versions"][mod]["clusters"].append(cluster) return json_data -def generate_json_detailed(modules: dict, path_data_dir: str) -> str: +def generate_json_detailed(json_data: dict, path_data_dir: str) -> str: """ Generate the detailed JSON. @@ -591,7 +694,6 @@ def generate_json_detailed(modules: dict, path_data_dir: str) -> str: @param path_data_dir: Path to the directory where the JSON will be placed. @return: Absolute path to the json file. """ - json_data = generate_json_detailed_data(modules) filepath = os.path.join(path_data_dir, "json_data_detail.json") with open(filepath, 'w') as outfile: json.dump(json_data, outfile) diff --git a/scripts/available_software/test.sh b/scripts/available_software/test.sh index 85fd00b1a50..33e8376a815 100755 --- a/scripts/available_software/test.sh +++ b/scripts/available_software/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s \ No newline at end of file +PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s diff --git a/scripts/available_software/tests/data/test_json_simple_sol_detail.json b/scripts/available_software/tests/data/test_json_simple_sol_detail.json index 64bc3389132..8ff9aa66455 100644 --- a/scripts/available_software/tests/data/test_json_simple_sol_detail.json +++ b/scripts/available_software/tests/data/test_json_simple_sol_detail.json @@ -1 +1 @@ -{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": ["dialga", "pikachu"], "cfd/2.0": ["dialga", "pikachu"], "cfd/24": ["dialga", "pikachu"], "cfd/5.0": ["dialga", "pikachu"], "cfd/2.0afqsdf": ["dialga", "pikachu"], "cfd/3.0": ["pikachu"]}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": ["dialga"], "Markov/hidden-1.0.10": ["dialga"]}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": ["dialga", "pikachu"], "science/7.2.0": ["dialga", "pikachu"]}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": ["pikachu"]}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"} \ No newline at end of file +{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0": {"clusters": ["dialga", "pikachu"]}, "cfd/24": {"clusters": ["dialga", "pikachu"]}, "cfd/5.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0afqsdf": {"clusters": ["dialga", "pikachu"]}, "cfd/3.0": {"clusters": ["pikachu"]}}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": {"clusters": ["dialga"]}, "Markov/hidden-1.0.10": {"clusters": ["dialga"]}}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": {"clusters": ["dialga", "pikachu"]}, "science/7.2.0": {"clusters": ["dialga", "pikachu"]}}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": {"clusters": ["pikachu"]}}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"} diff --git a/scripts/available_software/tests/test_data.py b/scripts/available_software/tests/test_data.py index 57a1b3b5073..ff358c010fa 100644 --- a/scripts/available_software/tests/test_data.py +++ b/scripts/available_software/tests/test_data.py @@ -20,7 +20,7 @@ def setup_class(cls): # --------------------------- def test_data_ugent(self): - sol = modules_ugent() + sol = modules_ugent()[0] assert len(sol) == 2 assert len(sol["dialga"]) == 13 assert len(sol["pikachu"]) == 15 diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py index dd84b92856c..80df5c2cc40 100644 --- a/scripts/available_software/tests/test_json.py +++ b/scripts/available_software/tests/test_json.py @@ -1,7 +1,8 @@ from available_software import (generate_json_overview_data, generate_json_overview, modules_ugent, - generate_json_detailed) + generate_json_detailed, + generate_json_detailed_data) import os import json @@ -32,7 +33,7 @@ def teardown_class(cls): # --------------------------- def test_json_generate_simple(self): - modules = modules_ugent() + modules = modules_ugent()[0] json_data = generate_json_overview_data(modules) assert len(json_data.keys()) == 3 assert list(json_data["clusters"]) == ["dialga", "pikachu"] @@ -44,7 +45,7 @@ def test_json_generate_simple(self): } def test_json_simple(self): - modules = modules_ugent() + modules = modules_ugent()[0] json_path = generate_json_overview(modules, ".") with open(json_path) as json_data: data_generated = json.load(json_data) @@ -57,8 +58,9 @@ def test_json_simple(self): assert data_generated["clusters"] == data_solution["clusters"] def test_json_detail_simple(self): - modules = modules_ugent() - json_path = generate_json_detailed(modules, ".") + modules = modules_ugent()[0] + json_data = generate_json_detailed_data(modules) + json_path = generate_json_detailed(json_data, ".") assert os.path.exists("json_data_detail.json") with open(json_path) as json_data: diff --git a/scripts/available_software/tests/test_md.py b/scripts/available_software/tests/test_md.py index d4b757a3fdd..7d45f68ddb6 100644 --- a/scripts/available_software/tests/test_md.py +++ b/scripts/available_software/tests/test_md.py @@ -28,7 +28,7 @@ def teardown_class(cls): # --------------------------- def test_table_generate_simple(self): - simple_data = get_unique_software_names(modules_ugent()) + simple_data = get_unique_software_names(modules_ugent()[0]) table_data, col, row = generate_table_data(simple_data) assert col == 3 assert row == 5 @@ -36,7 +36,7 @@ def test_table_generate_simple(self): def test_md_simple(self): md_file = MdUtils(file_name='test_simple', title='Overview Modules') - simple_data = get_unique_software_names(modules_ugent()) + simple_data = get_unique_software_names(modules_ugent()[0]) generate_module_table(simple_data, md_file) md_file.create_md_file() assert os.path.exists("test_simple.md")