diff --git a/.gitignore b/.gitignore index 631c2447..6e8d6b80 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ # Ignore build directories build/ Testing/ +# Ignore python runtime caches +tools/__pycache__/ diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 875f42c3..14aeb610 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -2,7 +2,6 @@ project(msft_proxy_docs) find_package(Python3 REQUIRED COMPONENTS Interpreter) -file(GLOB_RECURSE DOC_FILES "*.md") set(EXTRACTION_SCRIPT ${CMAKE_SOURCE_DIR}/tools/extract_example_code_from_docs.py) set(EXAMPLES_DIR ${CMAKE_BINARY_DIR}/examples_from_docs) file(MAKE_DIRECTORY "${EXAMPLES_DIR}") @@ -12,17 +11,7 @@ execute_process( COMMAND_ERROR_IS_FATAL ANY ) -file(GLOB EXAMPLE_SOURCES "${EXAMPLES_DIR}/*.cpp") -set_source_files_properties(${EXAMPLE_SOURCES} PROPERTIES GENERATED TRUE) -foreach(SOURCE ${EXAMPLE_SOURCES}) - get_filename_component(EXECUTABLE_NAME ${SOURCE} NAME_WE) - add_executable(${EXECUTABLE_NAME} ${SOURCE}) - target_link_libraries(${EXECUTABLE_NAME} PRIVATE msft_proxy) - if (MSVC) - target_compile_options(${EXECUTABLE_NAME} PRIVATE /W4) - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - target_compile_options(${EXECUTABLE_NAME} PRIVATE -Wall -Wextra -Wpedantic -Wno-c++2b-extensions) - else() - target_compile_options(${EXECUTABLE_NAME} PRIVATE -Wall -Wextra -Wpedantic) - endif() -endforeach() +add_subdirectory( + ${EXAMPLES_DIR} + ${EXAMPLES_DIR}/build +) \ No newline at end of file diff --git a/docs/cpp20_modules_support.cmake.in b/docs/cpp20_modules_support.cmake.in new file mode 100644 index 00000000..7a1e6bf7 --- /dev/null +++ b/docs/cpp20_modules_support.cmake.in @@ -0,0 +1,14 @@ +if(PROXY_BUILD_MODULES) + add_executable($NAME$ code_2.cpp) + + target_link_libraries($NAME$ msft_proxy::module) + target_compile_features($NAME$ PRIVATE cxx_std_20) + + target_sources($NAME$ PRIVATE + FILE_SET CXX_MODULES + FILES code_1.cpp + ) + + $COMMON$ + +endif() diff --git a/tools/extract_example_code_from_docs.py b/tools/extract_example_code_from_docs.py index 69730ac6..19020240 100644 --- a/tools/extract_example_code_from_docs.py +++ b/tools/extract_example_code_from_docs.py @@ -1,42 +1,243 @@ +import filecmp import os -import re +import shutil import sys +import markdown_parser +from typing import List, Generator -def extract_cpp_code(md_path, cpp_path): + +def extract_cpp_code_in_example(text: str) -> List[str]: + """ + Extract all C++ code blocks that occur within the "Example" section of a Markdown document. + + Args: + text (str): The input Markdown text. + + Returns: + List[str]: A list of C++ code block contents found within the "Example" section. + """ + elements: Generator = markdown_parser.parse_markdown_elements(text) + in_example: bool = False + cpp_code_blocks: List[str] = [] + + for element_type, content in elements: + if element_type == 'heading': + level = content['level'] + heading_text = content['text'].strip() + if level == 2: + if heading_text == 'Example': + in_example = True + elif in_example: + in_example = False + elif element_type == 'code' and in_example: + language = content['language'].lower() + if language == 'cpp': + cpp_code_blocks.append(content['content']) + + return cpp_code_blocks + + +def extract_cpp_code_example_from_md_file(md_path) -> List[str]: + """ + Extract all C++ code blocks under the '## Example' section of a Markdown file. + + Args: + md_path (str): Path to the Markdown file. + + Returns: + List[str]: A list of strings, each containing a C++ code block. + """ with open(md_path, 'r', encoding='utf-8') as f: content = f.read() + return extract_cpp_code_in_example(content) - pattern = r'## Example\r?\n\r?\n```cpp\r?\n(.*?)\r?\n```' - code_blocks = re.findall(pattern, content, re.DOTALL) - if len(code_blocks) == 0: - return # No match, skip - elif len(code_blocks) > 1: - raise ValueError(f"File '{md_path}' contains more than one '## Example' C++ code block.") +def write_code_file(cpp_path, code, md_path): + """ + Write the extracted C++ code to a file with a header. - cpp_code = code_blocks[0] + Args: + cpp_path (str): Path to the output C++ file. + code (str): The C++ code to write. + md_path (str): Path to the original Markdown file for the header. + """ header = f"// This file was auto-generated from: {md_path}\n// Do not edit this file manually.\n\n" - with open(cpp_path, 'w', encoding='utf-8') as out: out.write(header) - out.write(cpp_code) + out.write(code) + + +def generate_subdir_cmake(subdir_path, target_name, cpp_filenames, md_path): + """ + Generate a CMakeLists.txt in the subdirectory using a customizable template. + + The following placeholders are replaced: + - $COMMON$ -> Currently expands to: + - $DIAGNOSTIC_FLAGS$ + + - $NAME$ -> Sub-directory name + + - $FILES$ -> A list of all cpp files + + - $DIAGNOSTIC_FLAGS$ -> See below + + Args: + subdir_path (str): Output directory path. + target_name (str): Name of the target. + cpp_filenames (list): List of generated .cpp files. + md_path (str): Path to the original .md file (to find template). + """ + # Path to the custom template file + template_path: str = md_path.replace('.md', '.cmake.in') + + # Use the custom template if it exists + if os.path.exists(template_path): + with open(template_path, 'r', encoding='utf-8') as f: + template_content = f.read() + else: + # Fallback to default template + template_content = """ +add_executable($NAME$ $FILES$) +target_link_libraries($NAME$ PRIVATE msft_proxy) + +$COMMON$ + +""" + pass + + # Note the indent here because it's Python. + common_snippet = """ +$DIAGNOSTIC_FLAGS$ +""" + + diagnostic_flags = """ +if (MSVC) + target_compile_options($NAME$ PRIVATE /W4) +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + target_compile_options($NAME$ PRIVATE -Wall -Wextra -Wpedantic -Wno-c++2b-extensions) +else() + target_compile_options($NAME$ PRIVATE -Wall -Wextra -Wpedantic) +endif() +""" + + # Replace placeholders + files_str = ' '.join(cpp_filenames) + + # Note: Be aware of the `replace` order. This is not the (turing complete) C++ template, + # it's not going to recursively expand the template automatically. + cmake_content = (template_content + .replace('$COMMON$', common_snippet) + .replace('$DIAGNOSTIC_FLAGS$', diagnostic_flags) + .replace('$NAME$', target_name) + .replace('$FILES$', files_str)) + + # Write the final CMakeLists.txt + cmake_path = os.path.join(subdir_path, "CMakeLists.txt") + with open(cmake_path, 'w', encoding='utf-8') as f: + f.write(f"# This file was auto-generated from: {md_path}\n# Do not edit this file manually.\n\n") + f.write(cmake_content) + + +def move_tmp_contents_to_persistent(tmp_dir: str, persistent_dir: str): + """ + Move the contents of the temporary directory to the persistent directory, + overwriting only the files that have changed. + + Note: The tmp_dir is removed after the operation. + """ + # Walk through the temporary directory and its subdirectories + for root, dirs, files in os.walk(tmp_dir): + # Get the relative path from the temporary directory to the current subdirectory + rel_path = os.path.relpath(root, tmp_dir) + + # Iterate over each file in the current subdirectory + for file in files: + # Skip the VOLATILE_DIRECTORY_DO_NOT_USE.txt file in the .tmp root + if rel_path == '.' and file == 'VOLATILE_DIRECTORY_DO_NOT_USE.txt': + continue + + # Construct the full paths to the temporary file and its corresponding parent file + tmp_file_path = os.path.join(root, file) + persistent_file_path = os.path.join(persistent_dir, rel_path, file) + + # Create the parent directory if it does not exist + parent_dir_path = os.path.dirname(persistent_file_path) + os.makedirs(parent_dir_path, exist_ok=True) + + # Check if the parent file exists and is different from the temporary file + if not os.path.exists(persistent_file_path) or not filecmp.cmp(tmp_file_path, persistent_file_path): + # Move the temporary file to the parent directory, overwriting the existing file if necessary + shutil.move(tmp_file_path, persistent_file_path) + + # Remove the temporary directory after moving its contents + shutil.rmtree(tmp_dir) + def main(): + """ + Main function to process Markdown files and generate C++ code and CMake configurations. + """ if len(sys.argv) != 3: print("Usage: python extract_example_code_from_docs.py ") sys.exit(1) - input_dir = sys.argv[1] - output_dir = sys.argv[2] + input_dir: str = sys.argv[1] + output_dir: str = sys.argv[2] + + + temp_output_dir = os.path.join(output_dir, '.tmp') + if os.path.exists(temp_output_dir): + shutil.rmtree(temp_output_dir) + os.makedirs(temp_output_dir) + + warning_file_path = os.path.join(temp_output_dir, 'VOLATILE_DIRECTORY_DO_NOT_USE.txt') + with open(warning_file_path, 'w', encoding='utf-8') as f: + f.write("This is a volatile directory that is subject to be removed. Do not store persistent files here.") + + + toplevel_cmake_lines = [] # For top-level CMakeLists.txt for root, _, files in os.walk(input_dir): for file in files: if file.endswith('.md'): - md_path = os.path.join(root, file) - rel_path = os.path.relpath(md_path, input_dir) - rel_base = os.path.splitext(rel_path)[0].replace(os.sep, '_') - cpp_path = os.path.join(output_dir, f"example_{rel_base}.cpp") - extract_cpp_code(md_path, cpp_path) + md_path: str = os.path.join(root, file) + rel_path: str = os.path.relpath(md_path, input_dir) + rel_base: str = os.path.splitext(rel_path)[0].replace(os.sep, '_') + + # Extract C++ code blocks + code_blocks: list[str] = extract_cpp_code_example_from_md_file(md_path) + if not code_blocks: + continue # Skip if no code + + # Create subdirectory for this example + subdir_name: str = f"example_{rel_base}" + subdir_path: str = os.path.join(temp_output_dir, subdir_name) + os.makedirs(subdir_path, exist_ok=True) + + # Generate code files and collect names + cpp_filenames: list[str] = [] + for i, code in enumerate(code_blocks, 1): + cpp_path: str = os.path.join(subdir_path, f"code_{i}.cpp") + write_code_file(cpp_path, code, md_path) + cpp_filenames.append(f"code_{i}.cpp") + + # Generate subdirectory's CMakeLists.txt + generate_subdir_cmake(subdir_path, subdir_name, cpp_filenames, md_path) + + toplevel_cmake_lines.append(f"add_subdirectory({subdir_name})") + + # Write the final top-level CMakeLists.txt + total_cmake_path = os.path.join(temp_output_dir, "CMakeLists.txt") + with open(total_cmake_path, 'w', encoding='utf-8') as f: + f.write(f"# This file was auto-generated from: {input_dir}\n# Do not edit this file manually.\n\n") + for line in toplevel_cmake_lines: + f.write(line + '\n') + + # Move the contents in temporary directory into parent directory. + # This updates only the changed files, which makes incremental builds faster + # as unchanged files don't need to be re-compiled. + move_tmp_contents_to_persistent(temp_output_dir, output_dir) + if __name__ == '__main__': main() diff --git a/tools/markdown_parser.py b/tools/markdown_parser.py new file mode 100644 index 00000000..31908e41 --- /dev/null +++ b/tools/markdown_parser.py @@ -0,0 +1,143 @@ +import dataclasses +from typing import List, Tuple, Generator, Dict + +@dataclasses.dataclass(init=True) +class MarkdownParserState: + inside_code_block: bool = False + code_lines: list[str] = dataclasses.field(default_factory=list) + required_ticks: int = 0 + current_language: str = '' + current_paragraph: list[str] = dataclasses.field(default_factory=list) + + def reset(self): + self.inside_code_block = False + self.code_lines = [] + self.required_ticks = 0 + self.current_language = '' + self.current_paragraph = [] + + def begin_code_block(self, ticks: int, language: str): + self.inside_code_block = True + self.required_ticks = ticks + self.current_language = language + self.code_lines = [] + + def end_code_block(self): + self.inside_code_block = False + self.required_ticks = 0 + self.current_language = '' + + def add_line_to_code(self, line: str): + self.code_lines.append(line) + + def get_paragraph(self) -> List[str]: + paragraph = self.current_paragraph + self.current_paragraph = [] + return paragraph + + def add_line_to_paragraph(self, line: str): + self.current_paragraph.append(line) + + def has_paragraph(self) -> bool: + return len(self.current_paragraph) > 0 and not self.has_empty_paragraph() + + def has_empty_paragraph(self) -> bool: + return len(self.current_paragraph) == 1 and len(self.current_paragraph[0].strip()) == 0 + + +def count_leading_chars(line: str, char: str) -> int: + return len(line) - len(line.lstrip(char)) + + +def parse_markdown_elements(text: str) -> Generator[Tuple[str, Dict | str], None, None]: + """ + Parse a Markdown-formatted string into a stream of structured elements, + including code blocks, headings, and paragraphs. + + Args: + text (str): The input Markdown text as a single string. + + Yields: + Generator yielding tuples where: + - First element is the type of element ('code', 'heading', or 'paragraph'). + - Second element is either a: + - dictionary: for 'code': {'language': str, 'content': str} + - dictionary: for 'heading': {'level': int, 'text': str} + - string: for 'paragraph' containing the paragraph content. + """ + + # Split the input text into individual lines for processing + lines: List[str] = text.splitlines() + + # Initialize parser state to track context like code blocks and paragraphs + state = MarkdownParserState() + + for line in lines: + if state.inside_code_block: # Handle lines inside a code block + # Count leading backticks + line_ticks: int = count_leading_chars(line, '`') + rest: str = line[line_ticks:] + + # Check if this line could close the code block + valid_closing: bool = all(c.isspace() for c in rest) or rest == "" + + if line_ticks == state.required_ticks and valid_closing: + # End of code block: yield it + code_content = '\n'.join(state.code_lines) + yield 'code', {'language': state.current_language, 'content': code_content} + state.end_code_block() + else: + # Continue collecting code lines + state.add_line_to_code(line) + + else: + # Not inside a code block: process other Markdown elements + stripped_line: str = line.strip() + + if stripped_line.startswith('#'): + # Heading detected + + # If there was an ongoing paragraph, yield it first + if state.has_paragraph(): + paragraph_lines: list[str] = state.get_paragraph() + yield 'paragraph', '\n'.join(paragraph_lines) + + # Count number of '#' characters to determine heading level + hash_count: int = count_leading_chars(stripped_line, '#') + heading_text: str = stripped_line[hash_count:].lstrip() + yield 'heading', {'level': hash_count, 'text': heading_text} + + elif line.startswith('```'): + # Start of a code block + + # If there was an ongoing paragraph, yield it first + if state.has_paragraph(): + paragraph_lines: list[str] = state.get_paragraph() + yield 'paragraph', '\n'.join(paragraph_lines) + + # Determine number of opening backticks and language + line_ticks: int = count_leading_chars(line, '`') + language: str = line[line_ticks:].strip() + state.begin_code_block(line_ticks, language) + + else: + # Regular line: check if it's blank or part of a paragraph + if stripped_line == '': + # Blank line: flush current paragraph if any + if state.has_paragraph(): + paragraph_lines: list[str] = state.get_paragraph() + yield 'paragraph', '\n'.join(paragraph_lines) + else: + # Part of a paragraph: add to current paragraph buffer + state.add_line_to_paragraph(line) + + # After loop: flush remaining paragraph if any + if state.has_paragraph(): + paragraph_lines: list[str] = state.get_paragraph() + yield 'paragraph', '\n'.join(paragraph_lines) + + # If got stuck inside a code block at the end, yield it anyway + if state.inside_code_block: + code_content: str = '\n'.join(state.code_lines) + yield 'code', {'language': state.current_language, 'content': code_content} + state.end_code_block()