From c6d63cb73d999e5ee8a9ecca82ee70e621649094 Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Mon, 24 Mar 2025 23:10:26 +0530 Subject: [PATCH 1/7] Add table support and improve man page formatting --- utils/markdown2man.py | 244 ++++++++++++++++++++---------------------- 1 file changed, 119 insertions(+), 125 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index eb4d4cbc42d..8bd68d01c87 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -17,37 +17,70 @@ import re from pathlib import Path - def strip_yaml_from_markdown(content): - # Remove YAML front matter + """Remove YAML front matter from markdown content.""" return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL) +def get_first_sentence(text): + """Extract first meaningful paragraph for NAME section.""" + paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] + for para in paragraphs: + if not para.startswith('#') and len(para.split()) > 3: + clean = re.sub(r'[\*_`]', '', para.split('\n')[0]) + return clean[:80] + return "Manages module functionality" + +def convert_table(md_table): + """Convert markdown tables to man page format with proper alignment.""" + lines = [line.strip() for line in md_table.split('\n') + if line.strip() and '|' in line] + lines = [line for line in lines if not re.match(r'^[\|\-\s]+$', line)] + + # Calculate column widths + col_widths = [] + for line in lines: + cells = [cell.strip() for cell in line.strip('|').split('|')] + for i, cell in enumerate(cells): + if i >= len(col_widths): + col_widths.append(0) + col_widths[i] = max(col_widths[i], len(cell)) + + # Format with consistent spacing + output = [] + for line in lines: + cells = [cell.strip() for cell in line.strip('|').split('|')] + padded = [f" {cell.ljust(col_widths[i])} " for i, cell in enumerate(cells)] + output.append(''.join(padded)) + return '\n'.join(output) + '\n' def parse_markdown(content): + """Parse markdown content into typed blocks (code, lists, default).""" + # Handle tables first + content = re.sub( + r'(\|.+\|(\n\|.+\|)+)', + lambda m: f"TABLE_BLOCK:{m.group(0)}:END_TABLE", + content + ) + lines = content.splitlines() processing_block = [] processed_content = [] - buffer = "" state = "default" for line in lines: if line.strip().startswith("```"): - # end of code block if state == "code": processing_block.append(line) - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) processing_block = [] state = "default" - # start of code block else: - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - processing_block = [] - processing_block.append(line) + if buffer: + processing_block.append(buffer) + buffer = "" + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processing_block = [line] state = "code" continue @@ -59,23 +92,17 @@ def parse_markdown(content): if buffer: processing_block.append(buffer) buffer = "" - # start of ordered list if state != "list": - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) processing_block = [] state = "list" - # empty line at the start and end of code, list blocks if line == "": if buffer: processing_block.append(buffer) buffer = "" if state != "default": - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) processing_block = [] state = "default" processing_block.append(line) @@ -93,164 +120,131 @@ def parse_markdown(content): if buffer: processing_block.append(buffer) if processing_block: - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) - merged_content = [] + # Merge adjacent blocks of same type + merged = [] for item in processed_content: if not item["markdown"]: continue - if merged_content and merged_content[-1]["type"] == item["type"]: - merged_content[-1]["markdown"] += "\n" + item["markdown"] + if merged and merged[-1]["type"] == item["type"]: + merged[-1]["markdown"] += "\n" + item["markdown"] else: - merged_content.append(item) - - return merged_content - - -def process_links(markdown): - """Replace Markdown links with only their display text.""" - markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) - return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) - + merged.append(item) + return merged def process_parameters(markdown): - return re.sub( - r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?", - r'.IP "**\1**=*\2*\3" 4m', - markdown, - flags=re.MULTILINE, + """Handle GRASS parameters and flags with proper .IP formatting.""" + # Process flags (-p) and parameters (region) + markdown = re.sub( + r'([^\w\n])(\*\*|\*|_)([a-z0-9_\-]+)(\*\*|\*|_)([^\w]|$)', + r'\1\n.IP "\2\3\4" 4\n\5', + markdown ) - - -def process_flags(markdown): - return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE) - + # Clean up formatting + markdown = re.sub(r'\.IP\n\.IP', '.IP', markdown) + return re.sub(r'(\n\.IP "[^"]+" 4\n)\s+', r'\1', markdown) def process_formatting(markdown): - """Apply inline formatting for bold, italic, and bold+italic.""" + """Apply man page formatting for bold/italic text.""" markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown) markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown) return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown) - -def process_br(markdown): - return re.sub(r"([^\n\s]) $", r"\1\n.br", markdown, flags=re.MULTILINE) - - def process_headings(markdown): - def convert_sh(match): - return f".SH {match.group(1).upper()}" - - def convert_ss(match): - return f".SS {match.group(1)}" - - markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE) - return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE) - + """Convert markdown headings to man page sections.""" + markdown = re.sub(r"^#{1,2} (.*)", r".SH \1".upper(), markdown, flags=re.MULTILINE) + return re.sub(r"^#{3,} (.*)", r".SS \1", markdown, flags=re.MULTILINE) def process_code(markdown): + """Format code blocks with proper man page syntax.""" in_code_block = False output = [] for line in markdown.splitlines(): if line.lstrip().startswith("```"): if in_code_block: - output.append("\\fR\n.fi\n") # End code block + output.append("\\fR\n.fi") else: - output.append(".nf\n\\fC\n") # Start code block + lang = line.strip('`').strip() + output.append(f".nf\n\\fC\n{lang + ': ' if lang else ''}") in_code_block = not in_code_block else: - output.append(re.sub(r"\\", r"\(rs", line)) - + output.append(re.sub(r"\\", r"\\\\", line) if in_code_block else line) return "\n".join(output) - def process_lists(markdown): - markdown = process_special_characters(markdown) - markdown = process_formatting(markdown) - markdown = process_links(markdown) - + """Convert markdown lists to man page format.""" output = [] indent_levels = [] - for line in markdown.splitlines(): - match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) # Match bullets or numbers + match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) if not match: - continue # Skip non-list lines (shouldn't happen if input is all lists) - - spaces, bullet, item_text = match.groups() - level = len(spaces) # Determine indentation level - + continue + spaces, bullet, text = match.groups() + level = len(spaces) + while indent_levels and indent_levels[-1] > level: - output.append(".RE") # Close previous indentation level + output.append(".RE") indent_levels.pop() - + if not indent_levels or indent_levels[-1] < level: - output.append(".RS 4n") # Open new indentation level + output.append(".RS 4n") indent_levels.append(level) - - if re.match(r"^\d+\.$", bullet): # Numbered list - output.append(f'.IP "{bullet}" 4n\n{item_text}') - else: # Bullet list - output.append(".IP \\(bu 4n\n" + item_text) - - # Close any remaining indentation levels + + output.append(f'.IP "{bullet}" 4n\n{text}' if bullet.isdigit() + else f'.IP \\(bu 4n\n{text}') + while indent_levels: output.append(".RE") indent_levels.pop() - return "\n".join(output) - -def process_special_characters(markdown): - markdown = markdown.replace(r"\[", "[") - markdown = markdown.replace(r"\]", "]") - markdown = markdown.replace(r"\#", "#") - markdown = markdown.replace(r"\>", ">") - markdown = markdown.replace(r"\<", "<") - markdown = markdown.replace("`", "") - # eliminate extra spaces between words - markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown) - return re.sub(r"\\", r"\(rs", markdown) - - -def process_default(markdown): - markdown = process_br(markdown) - markdown = process_parameters(markdown) - markdown = process_flags(markdown) - markdown = markdown.replace("    ", "") - markdown = process_special_characters(markdown) - markdown = process_formatting(markdown) - markdown = process_links(markdown) - return process_headings(markdown) - - def convert_markdown_to_man(input_file, output_file): - """Read Markdown file and convert to man page.""" - markdown = Path(input_file).read_text() + """Main conversion function from markdown to man page format.""" + markdown = Path(input_file).read_text(encoding='utf-8') markdown = strip_yaml_from_markdown(markdown) + + title = Path(input_file).stem.upper() + first_para = get_first_sentence(markdown.split('\n\n')[1]) if '\n\n' in markdown else "" + blocks = parse_markdown(markdown) - result = ['.TH MAN 1 "Manual"\n'] + + result = [ + f'.TH {title} 1 "GRASS GIS User\'s Manual"\n', + f'.SH NAME\n\\fB{title}\\fR \\- {first_para}\n', + f'.SH SYNOPSIS\n\\fB{title.lower()}\\fR\n.br\n' + ] + for block in blocks: if block["type"] == "code": result.append(process_code(block["markdown"])) elif block["type"] == "list": result.append(process_lists(block["markdown"])) else: - result.append(process_default(block["markdown"])) - - Path(output_file).write_text("\n".join(result)) - + content = block["markdown"] + if "TABLE_BLOCK:" in content: + result.append(convert_table(content[12:-10])) + else: + content = re.sub(r"([^\n\s]) $", r"\1\n.br", content, flags=re.MULTILINE) + content = process_formatting(content) + content = process_headings(content) + content = process_parameters(content) + result.append(content) + + Path(output_file).write_text("\n".join(result), encoding='utf-8') def main(): - parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.") - parser.add_argument("input_file", help="Path to the input Markdown file.") - parser.add_argument("output_file", help="Path to the output man page file.") + """Command line interface for the converter.""" + parser = argparse.ArgumentParser( + description="Convert GRASS GIS markdown docs to man pages", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("input_file", help="Input markdown file path") + parser.add_argument("output_file", help="Output man page file path") args = parser.parse_args() - + convert_markdown_to_man(args.input_file, args.output_file) - + print(f"Successfully converted {args.input_file} to {args.output_file}") if __name__ == "__main__": main() From 240536917ed162fd34be37e9693e9a61b8714e5f Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Thu, 27 Mar 2025 23:08:12 +0530 Subject: [PATCH 2/7] Improved table formatting and visualization --- utils/markdown2man.py | 306 +++++++++++++++++++++++++----------------- 1 file changed, 182 insertions(+), 124 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index 8bd68d01c87..4a3ef5338ea 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -17,92 +17,84 @@ import re from pathlib import Path + def strip_yaml_from_markdown(content): - """Remove YAML front matter from markdown content.""" + # Remove YAML front matter return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL) -def get_first_sentence(text): - """Extract first meaningful paragraph for NAME section.""" - paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] - for para in paragraphs: - if not para.startswith('#') and len(para.split()) > 3: - clean = re.sub(r'[\*_`]', '', para.split('\n')[0]) - return clean[:80] - return "Manages module functionality" - -def convert_table(md_table): - """Convert markdown tables to man page format with proper alignment.""" - lines = [line.strip() for line in md_table.split('\n') - if line.strip() and '|' in line] - lines = [line for line in lines if not re.match(r'^[\|\-\s]+$', line)] - - # Calculate column widths - col_widths = [] - for line in lines: - cells = [cell.strip() for cell in line.strip('|').split('|')] - for i, cell in enumerate(cells): - if i >= len(col_widths): - col_widths.append(0) - col_widths[i] = max(col_widths[i], len(cell)) - - # Format with consistent spacing - output = [] - for line in lines: - cells = [cell.strip() for cell in line.strip('|').split('|')] - padded = [f" {cell.ljust(col_widths[i])} " for i, cell in enumerate(cells)] - output.append(''.join(padded)) - return '\n'.join(output) + '\n' def parse_markdown(content): - """Parse markdown content into typed blocks (code, lists, default).""" - # Handle tables first - content = re.sub( - r'(\|.+\|(\n\|.+\|)+)', - lambda m: f"TABLE_BLOCK:{m.group(0)}:END_TABLE", - content - ) - lines = content.splitlines() processing_block = [] processed_content = [] + buffer = "" state = "default" + in_table = False # Track table state for line in lines: + stripped = line.strip() + + # Detect table start/end + if re.match(r'^\|.+\|$', stripped) and not in_table: + if processing_block: + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processing_block = [] + state = "table" + in_table = True + processing_block.append(line) + continue + + if in_table: + if re.match(r'^\|.+\|$', stripped) or re.match(r'^\|-+', stripped): + processing_block.append(line) + else: + processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processing_block = [] + state = "default" + in_table = False + buffer = line # Process the current line in default state + continue + + # Code block handling if line.strip().startswith("```"): if state == "code": processing_block.append(line) - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) processing_block = [] state = "default" else: - if buffer: - processing_block.append(buffer) - buffer = "" - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) - processing_block = [line] + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + processing_block = [] + processing_block.append(line) state = "code" continue - if state == "code": - processing_block.append(line) - continue - + # List handling if re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line.strip()): if buffer: processing_block.append(buffer) buffer = "" if state != "list": - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) processing_block = [] state = "list" + # Empty line handling (between blocks) if line == "": if buffer: processing_block.append(buffer) buffer = "" if state != "default": - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) processing_block = [] state = "default" processing_block.append(line) @@ -111,7 +103,7 @@ def parse_markdown(content): if buffer: buffer += " " + line else: - buffer += line + buffer = line if line.endswith(" "): processing_block.append(buffer) @@ -120,131 +112,197 @@ def parse_markdown(content): if buffer: processing_block.append(buffer) if processing_block: - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) - # Merge adjacent blocks of same type - merged = [] + merged_content = [] for item in processed_content: if not item["markdown"]: continue - if merged and merged[-1]["type"] == item["type"]: - merged[-1]["markdown"] += "\n" + item["markdown"] + if merged_content and merged_content[-1]["type"] == item["type"]: + merged_content[-1]["markdown"] += "\n" + item["markdown"] else: - merged.append(item) - return merged + merged_content.append(item) + + return merged_content + + +# Table processing function with better visualization +def process_tables(markdown): + markdown = process_links(markdown) + markdown = process_formatting(markdown) + markdown = process_special_characters(markdown) + + lines = markdown.split('\n') + if not lines: + return "" + + # Remove separator line if present (for Markdown tables with hyphen separators) + if re.match(r'^\|[-| ]+\|$', lines[1].strip()): + del lines[1] + + # Prepare table with border-like formatting + tbl = [".TS"] + tbl.append("allbox tab(|);") # Border for table + tbl.append("l " * len(lines[0].split("|")) + ".") # Left-align all columns + + # Add table rows with border-like formatting + for i, line in enumerate(lines): + cells = [c.strip() for c in line.strip().strip('|').split('|')] + if i == 0: + tbl.append(".B") # Bold for header row + tbl.append(" ".join(["l"]*len(cells)) + ".") # Header column alignment + tbl.append("|" + "|".join(cells) + "|") + + tbl.append(".TE") + return '\n'.join(tbl) + + +def process_links(markdown): + """Replace Markdown links with only their display text.""" + markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) + return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) + def process_parameters(markdown): - """Handle GRASS parameters and flags with proper .IP formatting.""" - # Process flags (-p) and parameters (region) - markdown = re.sub( - r'([^\w\n])(\*\*|\*|_)([a-z0-9_\-]+)(\*\*|\*|_)([^\w]|$)', - r'\1\n.IP "\2\3\4" 4\n\5', - markdown + return re.sub( + r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?", + r'.IP "**\1**=*\2*\3" 4m', + markdown, + flags=re.MULTILINE, ) - # Clean up formatting - markdown = re.sub(r'\.IP\n\.IP', '.IP', markdown) - return re.sub(r'(\n\.IP "[^"]+" 4\n)\s+', r'\1', markdown) + + +def process_flags(markdown): + return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE) + def process_formatting(markdown): - """Apply man page formatting for bold/italic text.""" + """Apply inline formatting for bold, italic, and bold+italic.""" markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown) markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown) return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown) + +def process_br(markdown): + return re.sub(r"([^\n\s]) $", r"\1\n.br", markdown, flags=re.MULTILINE) + + def process_headings(markdown): - """Convert markdown headings to man page sections.""" - markdown = re.sub(r"^#{1,2} (.*)", r".SH \1".upper(), markdown, flags=re.MULTILINE) - return re.sub(r"^#{3,} (.*)", r".SS \1", markdown, flags=re.MULTILINE) + def convert_sh(match): + return f".SH {match.group(1).upper()}" + + def convert_ss(match): + return f".SS {match.group(1)}" + + markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE) + return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE) + def process_code(markdown): - """Format code blocks with proper man page syntax.""" in_code_block = False output = [] for line in markdown.splitlines(): if line.lstrip().startswith("```"): if in_code_block: - output.append("\\fR\n.fi") + output.append("\\fR\n.fi\n") # End code block else: - lang = line.strip('`').strip() - output.append(f".nf\n\\fC\n{lang + ': ' if lang else ''}") + output.append(".nf\n\\fC\n") # Start code block in_code_block = not in_code_block else: - output.append(re.sub(r"\\", r"\\\\", line) if in_code_block else line) + output.append(re.sub(r"\\", r"\(rs", line)) + return "\n".join(output) + def process_lists(markdown): - """Convert markdown lists to man page format.""" + markdown = process_special_characters(markdown) + markdown = process_formatting(markdown) + markdown = process_links(markdown) + output = [] indent_levels = [] + for line in markdown.splitlines(): - match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) + match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) # Match bullets or numbers if not match: - continue - spaces, bullet, text = match.groups() - level = len(spaces) - + continue # Skip non-list lines (shouldn't happen if input is all lists) + + spaces, bullet, item_text = match.groups() + level = len(spaces) # Determine indentation level + while indent_levels and indent_levels[-1] > level: - output.append(".RE") + output.append(".RE") # Close previous indentation level indent_levels.pop() - + if not indent_levels or indent_levels[-1] < level: - output.append(".RS 4n") + output.append(".RS 4n") # Open new indentation level indent_levels.append(level) - - output.append(f'.IP "{bullet}" 4n\n{text}' if bullet.isdigit() - else f'.IP \\(bu 4n\n{text}') - + + if re.match(r"^\d+\.$", bullet): # Numbered list + output.append(f'.IP "{bullet}" 4n\n{item_text}') + else: # Bullet list + output.append(".IP \\(bu 4n\n" + item_text) + + # Close any remaining indentation levels while indent_levels: output.append(".RE") indent_levels.pop() + return "\n".join(output) + +def process_special_characters(markdown): + markdown = markdown.replace(r"\[", "[") + markdown = markdown.replace(r"\]", "]") + markdown = markdown.replace(r"\#", "#") + markdown = markdown.replace(r"\>", ">") + markdown = markdown.replace(r"\<", "<") + markdown = markdown.replace("`", "") + # eliminate extra spaces between words + markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown) + return re.sub(r"\\", r"\(rs", markdown) + + +def process_default(markdown): + markdown = process_br(markdown) + markdown = process_parameters(markdown) + markdown = process_flags(markdown) + markdown = markdown.replace("    ", "") + markdown = process_special_characters(markdown) + markdown = process_formatting(markdown) + markdown = process_links(markdown) + return process_headings(markdown) + + def convert_markdown_to_man(input_file, output_file): - """Main conversion function from markdown to man page format.""" - markdown = Path(input_file).read_text(encoding='utf-8') + """Read Markdown file and convert to man page.""" + markdown = Path(input_file).read_text() markdown = strip_yaml_from_markdown(markdown) - - title = Path(input_file).stem.upper() - first_para = get_first_sentence(markdown.split('\n\n')[1]) if '\n\n' in markdown else "" - blocks = parse_markdown(markdown) - - result = [ - f'.TH {title} 1 "GRASS GIS User\'s Manual"\n', - f'.SH NAME\n\\fB{title}\\fR \\- {first_para}\n', - f'.SH SYNOPSIS\n\\fB{title.lower()}\\fR\n.br\n' - ] - + result = ['.TH MAN 1 "Manual"\n'] for block in blocks: if block["type"] == "code": result.append(process_code(block["markdown"])) elif block["type"] == "list": result.append(process_lists(block["markdown"])) + elif block["type"] == "table": + result.append(process_tables(block["markdown"])) # Process tables else: - content = block["markdown"] - if "TABLE_BLOCK:" in content: - result.append(convert_table(content[12:-10])) - else: - content = re.sub(r"([^\n\s]) $", r"\1\n.br", content, flags=re.MULTILINE) - content = process_formatting(content) - content = process_headings(content) - content = process_parameters(content) - result.append(content) - - Path(output_file).write_text("\n".join(result), encoding='utf-8') + result.append(process_default(block["markdown"])) + + Path(output_file).write_text("\n".join(result)) + def main(): - """Command line interface for the converter.""" - parser = argparse.ArgumentParser( - description="Convert GRASS GIS markdown docs to man pages", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("input_file", help="Input markdown file path") - parser.add_argument("output_file", help="Output man page file path") + parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.") + parser.add_argument("input_file", help="Path to the input Markdown file.") + parser.add_argument("output_file", help="Path to the output man page file.") args = parser.parse_args() - + convert_markdown_to_man(args.input_file, args.output_file) - print(f"Successfully converted {args.input_file} to {args.output_file}") + if __name__ == "__main__": main() From e3c683356b44fd1d55c98956162d6105ecc133b2 Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Fri, 28 Mar 2025 10:24:15 +0530 Subject: [PATCH 3/7] improved formatting and structure - Add hierarchical section numbering (1., 1.1) with descriptive labels (Main Section/Subsection) - Implement explicit list nesting markers with level indicators and type detection - Add space padding (\fB text \fR) to improve source readability - Resolve list index error in nested list processing --- utils/markdown2man.py | 292 +++++++++++++++++++++++------------------- 1 file changed, 160 insertions(+), 132 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index 4a3ef5338ea..f3d32ae4ffc 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -22,20 +22,20 @@ def strip_yaml_from_markdown(content): # Remove YAML front matter return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL) - def parse_markdown(content): + """Parse markdown into structured blocks""" lines = content.splitlines() processing_block = [] processed_content = [] buffer = "" state = "default" - in_table = False # Track table state + in_table = False for line in lines: stripped = line.strip() - # Detect table start/end + # Table detection if re.match(r'^\|.+\|$', stripped) and not in_table: if processing_block: processed_content.append({"markdown": "\n".join(processing_block), "type": state}) @@ -53,7 +53,7 @@ def parse_markdown(content): processing_block = [] state = "default" in_table = False - buffer = line # Process the current line in default state + buffer = line continue # Code block handling @@ -86,7 +86,7 @@ def parse_markdown(content): processing_block = [] state = "list" - # Empty line handling (between blocks) + # Empty line handling if line == "": if buffer: processing_block.append(buffer) @@ -127,182 +127,210 @@ def parse_markdown(content): return merged_content +def process_headings(markdown): + """Convert headings with hierarchical numbering and labels""" + section_counter = [0] + subsection_counter = [0] -# Table processing function with better visualization -def process_tables(markdown): - markdown = process_links(markdown) - markdown = process_formatting(markdown) - markdown = process_special_characters(markdown) + def convert_main_section(match): + section_counter[0] += 1 + subsection_counter[0] = 0 + return f"\n.SH {section_counter[0]}. {match.group(1).upper()} (Main Section)\n" - lines = markdown.split('\n') - if not lines: - return "" - - # Remove separator line if present (for Markdown tables with hyphen separators) - if re.match(r'^\|[-| ]+\|$', lines[1].strip()): - del lines[1] - - # Prepare table with border-like formatting - tbl = [".TS"] - tbl.append("allbox tab(|);") # Border for table - tbl.append("l " * len(lines[0].split("|")) + ".") # Left-align all columns - - # Add table rows with border-like formatting - for i, line in enumerate(lines): - cells = [c.strip() for c in line.strip().strip('|').split('|')] - if i == 0: - tbl.append(".B") # Bold for header row - tbl.append(" ".join(["l"]*len(cells)) + ".") # Header column alignment - tbl.append("|" + "|".join(cells) + "|") - - tbl.append(".TE") - return '\n'.join(tbl) + def convert_subsection(match): + subsection_counter[0] += 1 + return (f"\n.SS {section_counter[0]}.{subsection_counter[0]} " + f"{match.group(1).upper()} (Subsection)\n") + markdown = re.sub(r"^## (.*)", convert_main_section, markdown, flags=re.MULTILINE) + return re.sub(r"^### (.*)", convert_subsection, markdown, flags=re.MULTILINE) -def process_links(markdown): - """Replace Markdown links with only their display text.""" - markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) - return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) +def process_lists(markdown): + markdown = process_special_characters(markdown) + markdown = process_formatting(markdown) + markdown = process_links(markdown) + output = [] + current_level = 0 + list_stack = [] + bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] -def process_parameters(markdown): - return re.sub( - r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?", - r'.IP "**\1**=*\2*\3" 4m', - markdown, - flags=re.MULTILINE, - ) + for line in markdown.splitlines(): + match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) + if not match: + continue + indent, bullet, content = match.groups() + new_level = len(indent) // 4 + + # Handle list transitions + while current_level > new_level: + if list_stack: # Add safety check + output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n") + current_level -= 1 + list_stack.pop() + + if new_level > current_level or not list_stack: + # Initialize stack if empty + list_type = 'ordered' if bullet[:-1].isdigit() else 'unordered' + output.append( + f"\\fBStart of Nested List (Level {new_level}) " + f"[{list_type.upper()}]\\fR\n" + f".RS {4*(new_level+1)}n" + ) + current_level = new_level + list_stack.append({'type': list_type, 'counter': 1}) + + # Add check for empty stack before access + if not list_stack: + continue -def process_flags(markdown): - return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE) + # Format list items + if list_stack[-1]['type'] == 'ordered': + output.append(f'.IP "{list_stack[-1]["counter"]}." {4*(current_level+1)}n') + list_stack[-1]["counter"] += 1 + else: + bullet = bullet_styles[current_level % len(bullet_styles)] + output.append(f'.IP "{bullet}" {4*(current_level+1)}n') + + output.append(f"{content}\n") + # Close remaining lists + while current_level > 0 and list_stack: + output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n") + current_level -= 1 + list_stack.pop() -def process_formatting(markdown): - """Apply inline formatting for bold, italic, and bold+italic.""" - markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown) - markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown) - return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown) + return "".join(output) +def process_tables(markdown): + processed = process_formatting(markdown) + lines = processed.split('\n') + + if not lines or len(lines[0].strip()) == 0: + return "" -def process_br(markdown): - return re.sub(r"([^\n\s]) $", r"\1\n.br", markdown, flags=re.MULTILINE) + table = [ + "\\fBStart of Table\\fR", + ".TS", + "allbox tab(|);", + "l " * len(lines[0].split("|")) + "." + ] + for i, line in enumerate(lines): + cells = [c.strip() for c in line.strip('|').split('|')] + if i == 0: + table.append("_") + table.append("|" + "|".join(cells) + "|") -def process_headings(markdown): - def convert_sh(match): - return f".SH {match.group(1).upper()}" - - def convert_ss(match): - return f".SS {match.group(1)}" + table.append(".TE\n\\fBEnd of Table\\fR") + return '\n'.join(table) - markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE) - return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE) +def process_parameters(markdown): + """Handle parameter definitions with bold formatting""" + return re.sub( + r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?", + r'.IP "\\fB\1\\fR=*\2*\3" 4m', + markdown, + flags=re.MULTILINE, + ) +def process_flags(markdown): + """Handle command-line flags with consistent formatting""" + return re.sub( + r"^\*\*-(.*?)\*\*", + r'.IP "\\fB-\1\\fR" 4m', + markdown, + flags=re.MULTILINE + ) def process_code(markdown): + """Preserve code blocks with monospace formatting""" in_code_block = False output = [] for line in markdown.splitlines(): if line.lstrip().startswith("```"): if in_code_block: - output.append("\\fR\n.fi\n") # End code block + output.append("\\fR\n.fi\n") else: - output.append(".nf\n\\fC\n") # Start code block + output.append(".nf\n\\fC\n") in_code_block = not in_code_block else: - output.append(re.sub(r"\\", r"\(rs", line)) - + output.append(re.sub(r"\\fC", r"\\fC ", line)) return "\n".join(output) +def process_formatting(markdown): + markdown = re.sub(r"\*\*\s*(\S(.*?\S)?)\s*\*\*", r"\\fB \1 \\fR", markdown, flags=re.DOTALL) + markdown = re.sub(r"\*\s*(\S(.*?\S)?)\s*\*", r"\\fI \1 \\fR", markdown, flags=re.DOTALL) + markdown = re.sub(r"\*\*\*\s*(\S(.*?\S)?)\s*\*\*\*", r"\\fB\\fI \1 \\fR\\fR", markdown, flags=re.DOTALL) + + return markdown -def process_lists(markdown): - markdown = process_special_characters(markdown) - markdown = process_formatting(markdown) - markdown = process_links(markdown) - - output = [] - indent_levels = [] - - for line in markdown.splitlines(): - match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) # Match bullets or numbers - if not match: - continue # Skip non-list lines (shouldn't happen if input is all lists) - - spaces, bullet, item_text = match.groups() - level = len(spaces) # Determine indentation level - - while indent_levels and indent_levels[-1] > level: - output.append(".RE") # Close previous indentation level - indent_levels.pop() - - if not indent_levels or indent_levels[-1] < level: - output.append(".RS 4n") # Open new indentation level - indent_levels.append(level) - - if re.match(r"^\d+\.$", bullet): # Numbered list - output.append(f'.IP "{bullet}" 4n\n{item_text}') - else: # Bullet list - output.append(".IP \\(bu 4n\n" + item_text) - - # Close any remaining indentation levels - while indent_levels: - output.append(".RE") - indent_levels.pop() - - return "\n".join(output) - +def process_links(markdown): + """Replace Markdown links with display text""" + markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) + return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) +bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] # Use raw strings with double escapes def process_special_characters(markdown): + """Handle special characters""" markdown = markdown.replace(r"\[", "[") markdown = markdown.replace(r"\]", "]") markdown = markdown.replace(r"\#", "#") - markdown = markdown.replace(r"\>", ">") - markdown = markdown.replace(r"\<", "<") - markdown = markdown.replace("`", "") - # eliminate extra spaces between words markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown) return re.sub(r"\\", r"\(rs", markdown) - -def process_default(markdown): - markdown = process_br(markdown) - markdown = process_parameters(markdown) - markdown = process_flags(markdown) - markdown = markdown.replace("    ", "") - markdown = process_special_characters(markdown) - markdown = process_formatting(markdown) - markdown = process_links(markdown) - return process_headings(markdown) - - def convert_markdown_to_man(input_file, output_file): - """Read Markdown file and convert to man page.""" markdown = Path(input_file).read_text() markdown = strip_yaml_from_markdown(markdown) blocks = parse_markdown(markdown) - result = ['.TH MAN 1 "Manual"\n'] + + man_page = [ + '.TH I.ATCORR 1 "GRASS GIS Manual"', + '.SH NAME\ni.atcorr \\- Atmospheric correction using 6S algorithm' + ] + for block in blocks: - if block["type"] == "code": - result.append(process_code(block["markdown"])) - elif block["type"] == "list": - result.append(process_lists(block["markdown"])) - elif block["type"] == "table": - result.append(process_tables(block["markdown"])) # Process tables + content_type = block["type"] + content = block["markdown"] + + if content_type == "code": + man_page.append(process_code(content)) + elif content_type == "list": + man_page.append(process_lists(content)) + elif content_type == "table": + man_page.append(process_tables(content)) else: - result.append(process_default(block["markdown"])) + processed = process_default(content) + man_page.append(processed) + + Path(output_file).write_text("\n".join(man_page)) - Path(output_file).write_text("\n".join(result)) +def process_default(markdown): + """Default processing pipeline""" + transformations = [ + process_parameters, + process_flags, + lambda x: x.replace("    ", ""), + process_special_characters, + process_formatting, + process_links, + process_headings + ] + for transform in transformations: + markdown = transform(markdown) + return markdown def main(): - parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.") - parser.add_argument("input_file", help="Path to the input Markdown file.") - parser.add_argument("output_file", help="Path to the output man page file.") + parser = argparse.ArgumentParser( + description="Convert enhanced Markdown to man page format" + ) + parser.add_argument("input_file", help="Input Markdown file") + parser.add_argument("output_file", help="Output man page file") args = parser.parse_args() - + convert_markdown_to_man(args.input_file, args.output_file) - if __name__ == "__main__": main() From 01b37d5e633cfb511a1b7a2ebd927691ff9692b7 Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Sun, 6 Apr 2025 18:51:43 +0530 Subject: [PATCH 4/7] add table support and improved parsing - Added table processing functionality with troff/groff table format conversion - Improved YAML front matter stripping and block parsing logic - Enhanced special character handling, especially in code blocks - Better heading and list processing with proper indentation - Added UTF-8 encoding support for file operations - Refactored argument parsing and output formatting --- utils/markdown2man.py | 375 ++++++++++++------------------------------ 1 file changed, 104 insertions(+), 271 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index f3d32ae4ffc..1b38dff23f3 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -13,324 +13,157 @@ # ############################################################################### -import argparse import re +import argparse from pathlib import Path +def strip_yaml_from_markdown(markdown): + if markdown.startswith('---'): + parts = markdown.split('---', 2) + if len(parts) == 3: + return parts[2].strip() + return markdown -def strip_yaml_from_markdown(content): - # Remove YAML front matter - return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL) +def process_tables(markdown): + lines = markdown.strip().splitlines() + if len(lines) < 2: + return markdown # Not a valid table -def parse_markdown(content): - """Parse markdown into structured blocks""" - lines = content.splitlines() - processing_block = [] - processed_content = [] + headers = lines[0].strip("|").split("|") + rows = [line.strip("|").split("|") for line in lines[2:] if '|' in line] - buffer = "" - state = "default" - in_table = False + output = [".TS", "allbox;", "c" * len(headers) + "."] + output.append("\t".join([h.strip() for h in headers])) - for line in lines: - stripped = line.strip() + for row in rows: + output.append("\t".join([cell.strip() for cell in row])) - # Table detection - if re.match(r'^\|.+\|$', stripped) and not in_table: - if processing_block: - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) - processing_block = [] - state = "table" - in_table = True - processing_block.append(line) - continue + output.append(".TE") + return "\n".join(output) - if in_table: - if re.match(r'^\|.+\|$', stripped) or re.match(r'^\|-+', stripped): - processing_block.append(line) - else: - processed_content.append({"markdown": "\n".join(processing_block), "type": state}) - processing_block = [] - state = "default" - in_table = False - buffer = line - continue +def parse_markdown(markdown): + blocks = [] + lines = markdown.splitlines() + current = {"type": "text", "markdown": ""} + + def flush(): + nonlocal current + if current["markdown"].strip(): + blocks.append(current) + current = {"type": "text", "markdown": ""} - # Code block handling + in_code = False + for i, line in enumerate(lines): if line.strip().startswith("```"): - if state == "code": - processing_block.append(line) - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - processing_block = [] - state = "default" - else: - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - processing_block = [] - processing_block.append(line) - state = "code" + flush() + in_code = not in_code + current = {"type": "code" if in_code else "text", "markdown": ""} continue - # List handling - if re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line.strip()): - if buffer: - processing_block.append(buffer) - buffer = "" - if state != "list": - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - processing_block = [] - state = "list" - - # Empty line handling - if line == "": - if buffer: - processing_block.append(buffer) - buffer = "" - if state != "default": - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - processing_block = [] - state = "default" - processing_block.append(line) + if re.match(r"^(\s*)([-*]|\d+\.)\s+.*", line): + if current["type"] != "list": + flush() + current = {"type": "list", "markdown": ""} + elif current["type"] != "code" and not line.strip(): + flush() continue - if buffer: - buffer += " " + line - else: - buffer = line - - if line.endswith(" "): - processing_block.append(buffer) - buffer = "" - - if buffer: - processing_block.append(buffer) - if processing_block: - processed_content.append( - {"markdown": "\n".join(processing_block), "type": state} - ) - - merged_content = [] - for item in processed_content: - if not item["markdown"]: + if re.match(r"^\|.*\|$", line) and "|" in lines[i + 1] if i + 1 < len(lines) else False: + flush() + current = {"type": "table", "markdown": ""} + current["markdown"] += line + "\n" continue - if merged_content and merged_content[-1]["type"] == item["type"]: - merged_content[-1]["markdown"] += "\n" + item["markdown"] - else: - merged_content.append(item) - return merged_content + current["markdown"] += line + "\n" -def process_headings(markdown): - """Convert headings with hierarchical numbering and labels""" - section_counter = [0] - subsection_counter = [0] + flush() + return blocks - def convert_main_section(match): - section_counter[0] += 1 - subsection_counter[0] = 0 - return f"\n.SH {section_counter[0]}. {match.group(1).upper()} (Main Section)\n" +def process_headings(markdown): + def convert_sh(match): + return f".SH {match.group(1).upper()}" + def convert_ss(match): + return f".SS {match.group(1)}" - def convert_subsection(match): - subsection_counter[0] += 1 - return (f"\n.SS {section_counter[0]}.{subsection_counter[0]} " - f"{match.group(1).upper()} (Subsection)\n") + markdown = re.sub(r"^# (.*)", convert_sh, markdown, flags=re.MULTILINE) + markdown = re.sub(r"^## (.*)", convert_ss, markdown, flags=re.MULTILINE) + return markdown - markdown = re.sub(r"^## (.*)", convert_main_section, markdown, flags=re.MULTILINE) - return re.sub(r"^### (.*)", convert_subsection, markdown, flags=re.MULTILINE) +def process_code(markdown): + output = [] + output.append(".nf\n\\fC") + for line in markdown.splitlines(): + output.append(line.replace("\\", r"\(rs")) + output.append("\\fR\n.fi") + return "\n".join(output) def process_lists(markdown): - markdown = process_special_characters(markdown) - markdown = process_formatting(markdown) - markdown = process_links(markdown) - output = [] - current_level = 0 - list_stack = [] - bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] + indent_levels = [] for line in markdown.splitlines(): match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) if not match: continue - indent, bullet, content = match.groups() - new_level = len(indent) // 4 - - # Handle list transitions - while current_level > new_level: - if list_stack: # Add safety check - output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n") - current_level -= 1 - list_stack.pop() - - if new_level > current_level or not list_stack: - # Initialize stack if empty - list_type = 'ordered' if bullet[:-1].isdigit() else 'unordered' - output.append( - f"\\fBStart of Nested List (Level {new_level}) " - f"[{list_type.upper()}]\\fR\n" - f".RS {4*(new_level+1)}n" - ) - current_level = new_level - list_stack.append({'type': list_type, 'counter': 1}) - - # Add check for empty stack before access - if not list_stack: - continue + spaces, bullet, item_text = match.groups() + level = len(spaces) - # Format list items - if list_stack[-1]['type'] == 'ordered': - output.append(f'.IP "{list_stack[-1]["counter"]}." {4*(current_level+1)}n') - list_stack[-1]["counter"] += 1 - else: - bullet = bullet_styles[current_level % len(bullet_styles)] - output.append(f'.IP "{bullet}" {4*(current_level+1)}n') - - output.append(f"{content}\n") + while indent_levels and indent_levels[-1] > level: + output.append(".RE") + indent_levels.pop() - # Close remaining lists - while current_level > 0 and list_stack: - output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n") - current_level -= 1 - list_stack.pop() + if not indent_levels or indent_levels[-1] < level: + output.append(".RS 4n") + indent_levels.append(level) - return "".join(output) + if re.match(r"^\d+\.$", bullet): + output.append(f'.IP "{bullet}" 4n\n{item_text}') + else: + output.append(f".IP \\(bu 4n\n{item_text}") -def process_tables(markdown): - processed = process_formatting(markdown) - lines = processed.split('\n') - - if not lines or len(lines[0].strip()) == 0: - return "" - - table = [ - "\\fBStart of Table\\fR", - ".TS", - "allbox tab(|);", - "l " * len(lines[0].split("|")) + "." - ] - - for i, line in enumerate(lines): - cells = [c.strip() for c in line.strip('|').split('|')] - if i == 0: - table.append("_") - table.append("|" + "|".join(cells) + "|") - - table.append(".TE\n\\fBEnd of Table\\fR") - return '\n'.join(table) - -def process_parameters(markdown): - """Handle parameter definitions with bold formatting""" - return re.sub( - r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?", - r'.IP "\\fB\1\\fR=*\2*\3" 4m', - markdown, - flags=re.MULTILINE, - ) - -def process_flags(markdown): - """Handle command-line flags with consistent formatting""" - return re.sub( - r"^\*\*-(.*?)\*\*", - r'.IP "\\fB-\1\\fR" 4m', - markdown, - flags=re.MULTILINE - ) + while indent_levels: + output.append(".RE") + indent_levels.pop() -def process_code(markdown): - """Preserve code blocks with monospace formatting""" - in_code_block = False - output = [] - for line in markdown.splitlines(): - if line.lstrip().startswith("```"): - if in_code_block: - output.append("\\fR\n.fi\n") - else: - output.append(".nf\n\\fC\n") - in_code_block = not in_code_block - else: - output.append(re.sub(r"\\fC", r"\\fC ", line)) return "\n".join(output) -def process_formatting(markdown): - markdown = re.sub(r"\*\*\s*(\S(.*?\S)?)\s*\*\*", r"\\fB \1 \\fR", markdown, flags=re.DOTALL) - markdown = re.sub(r"\*\s*(\S(.*?\S)?)\s*\*", r"\\fI \1 \\fR", markdown, flags=re.DOTALL) - markdown = re.sub(r"\*\*\*\s*(\S(.*?\S)?)\s*\*\*\*", r"\\fB\\fI \1 \\fR\\fR", markdown, flags=re.DOTALL) - - return markdown - -def process_links(markdown): - """Replace Markdown links with display text""" - markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) - return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) -bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] # Use raw strings with double escapes +def process_special_characters(text): + text = text.replace(r"\[", "[").replace(r"\]", "]") + text = text.replace(r"\#", "#").replace(r"\>", ">").replace(r"\<", "<") + text = text.replace("`", "") + text = re.sub(r"(?<=\S) {2,}(?=\S)", " ", text) + return text.replace("\\", r"\(rs") -def process_special_characters(markdown): - """Handle special characters""" - markdown = markdown.replace(r"\[", "[") - markdown = markdown.replace(r"\]", "]") - markdown = markdown.replace(r"\#", "#") - markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown) - return re.sub(r"\\", r"\(rs", markdown) +def process_default(markdown): + markdown = process_special_characters(markdown) + markdown = process_headings(markdown) + return markdown def convert_markdown_to_man(input_file, output_file): - markdown = Path(input_file).read_text() + markdown = Path(input_file).read_text(encoding='utf-8') markdown = strip_yaml_from_markdown(markdown) blocks = parse_markdown(markdown) - - man_page = [ - '.TH I.ATCORR 1 "GRASS GIS Manual"', - '.SH NAME\ni.atcorr \\- Atmospheric correction using 6S algorithm' - ] + result = ['.TH "MANPAGE" "1" "" "" ""'] for block in blocks: - content_type = block["type"] - content = block["markdown"] - - if content_type == "code": - man_page.append(process_code(content)) - elif content_type == "list": - man_page.append(process_lists(content)) - elif content_type == "table": - man_page.append(process_tables(content)) + if block["type"] == "code": + result.append(process_code(block["markdown"])) + elif block["type"] == "list": + result.append(process_lists(block["markdown"])) + elif block["type"] == "table": + result.append(process_tables(block["markdown"])) else: - processed = process_default(content) - man_page.append(processed) - - Path(output_file).write_text("\n".join(man_page)) - -def process_default(markdown): - """Default processing pipeline""" - transformations = [ - process_parameters, - process_flags, - lambda x: x.replace("    ", ""), - process_special_characters, - process_formatting, - process_links, - process_headings - ] - for transform in transformations: - markdown = transform(markdown) - return markdown + result.append(process_default(block["markdown"])) + Path(output_file).write_text("\n".join(result), encoding='utf-8') + print(f"Successfully created: {output_file}") -def main(): - parser = argparse.ArgumentParser( - description="Convert enhanced Markdown to man page format" - ) - parser.add_argument("input_file", help="Input Markdown file") - parser.add_argument("output_file", help="Output man page file") +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert Markdown file to man page") + parser.add_argument('input_file', type=str, help="Path to the input Markdown file") + parser.add_argument('output_file', type=str, help="Path to the output man page file") + args = parser.parse_args() convert_markdown_to_man(args.input_file, args.output_file) - -if __name__ == "__main__": - main() From 78056b03907d6044189798012bd9e6be4f24baed Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Tue, 8 Apr 2025 17:34:57 +0530 Subject: [PATCH 5/7] Enhance Markdown conversion with table support, improved formatting handling, and groff escapes - Added comprehensive table support using groff's TS/TE macros - Implemented more robust Markdown formatting handling (bold/italic/combined) - Introduced proper groff special character escaping (~, ^, `, etc) - Improved list processing with better indentation handling - Added paragraph (.PP) and section formatting - Simplified YAML front matter removal - Implemented block-based parsing architecture - Added UTF-8 encoding support for file operations - Enhanced code block formatting with proper font switching - Unified heading conversion logic - Improved text wrapping and whitespace handling - Added support for alternative Markdown syntax (bold/italic) - Removed fragile regex substitutions in favor of structured parsing - Added proper documentation section formatting --- utils/markdown2man.py | 234 ++++++++++++++++++++++++++---------------- 1 file changed, 144 insertions(+), 90 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index 1b38dff23f3..f5697fba22a 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -20,150 +20,204 @@ def strip_yaml_from_markdown(markdown): if markdown.startswith('---'): parts = markdown.split('---', 2) - if len(parts) == 3: - return parts[2].strip() + return parts[2].strip() if len(parts) == 3 else markdown return markdown +def replace_markdown_formatting(text): + text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text) + text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text) + text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text) + text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text) + return text + def process_tables(markdown): - lines = markdown.strip().splitlines() - if len(lines) < 2: - return markdown # Not a valid table + lines = [line.strip() for line in markdown.splitlines() if line.strip()] + if len(lines) < 2 or not all('|' in line for line in lines[:2]): + return markdown - headers = lines[0].strip("|").split("|") - rows = [line.strip("|").split("|") for line in lines[2:] if '|' in line] + # Clean up table headers and separators + headers = lines[0].strip('|').split('|') + separator = lines[1].strip('|').split('|') + rows = [line.strip('|').split('|') for line in lines[2:] if '|' in line] + # Remove box-drawing characters + clean = lambda s: re.sub(r'[┌┐├┤┬┴─]', '', s).strip() + output = [".TS", "allbox;", "c" * len(headers) + "."] - output.append("\t".join([h.strip() for h in headers])) + processed_headers = [replace_markdown_formatting(clean(h)) for h in headers] + output.append("\t".join(processed_headers)) for row in rows: - output.append("\t".join([cell.strip() for cell in row])) + processed_cells = [replace_markdown_formatting(clean(cell)) for cell in row] + output.append("\t".join(processed_cells)) output.append(".TE") return "\n".join(output) def parse_markdown(markdown): blocks = [] - lines = markdown.splitlines() - current = {"type": "text", "markdown": ""} - - def flush(): - nonlocal current - if current["markdown"].strip(): - blocks.append(current) - current = {"type": "text", "markdown": ""} - + current_block = {"type": "text", "content": []} in_code = False - for i, line in enumerate(lines): - if line.strip().startswith("```"): - flush() + in_list = False + in_table = False + + for line in markdown.splitlines(): + line = line.rstrip() + + # Detect code blocks + if line.strip().startswith('```'): + if current_block["content"]: + blocks.append(current_block) in_code = not in_code - current = {"type": "code" if in_code else "text", "markdown": ""} + current_block = {"type": "code", "content": [line]} + continue + + if in_code: + current_block["content"].append(line) continue - if re.match(r"^(\s*)([-*]|\d+\.)\s+.*", line): - if current["type"] != "list": - flush() - current = {"type": "list", "markdown": ""} - elif current["type"] != "code" and not line.strip(): - flush() + # Detect tables + if '|' in line and (not in_table or line.strip().startswith('|')): + if not in_table and current_block["content"]: + blocks.append(current_block) + current_block = {"type": "table", "content": []} + in_table = True + current_block["content"].append(line) + continue + elif in_table: + blocks.append(current_block) + current_block = {"type": "text", "content": []} + in_table = False + + # Detect lists + list_match = re.match(r'^(\s*)([-*•]|\d+\.)\s+', line) + if list_match: + if not in_list and current_block["content"]: + blocks.append(current_block) + current_block = {"type": "list", "content": []} + in_list = True + current_block["content"].append(line) + continue + elif in_list: + if line.strip() == '': + blocks.append(current_block) + current_block = {"type": "text", "content": []} + in_list = False + else: + current_block["content"].append(line) continue - if re.match(r"^\|.*\|$", line) and "|" in lines[i + 1] if i + 1 < len(lines) else False: - flush() - current = {"type": "table", "markdown": ""} - current["markdown"] += line + "\n" + # Detect headings + if re.match(r'^#{1,3} ', line): + if current_block["content"]: + blocks.append(current_block) + current_block = {"type": "heading", "content": [line]} + blocks.append(current_block) + current_block = {"type": "text", "content": []} continue - current["markdown"] += line + "\n" + current_block["content"].append(line) - flush() + if current_block["content"]: + blocks.append(current_block) + return blocks def process_headings(markdown): - def convert_sh(match): - return f".SH {match.group(1).upper()}" - def convert_ss(match): - return f".SS {match.group(1)}" - - markdown = re.sub(r"^# (.*)", convert_sh, markdown, flags=re.MULTILINE) - markdown = re.sub(r"^## (.*)", convert_ss, markdown, flags=re.MULTILINE) - return markdown + def heading_replacer(match): + level = len(match.group(1)) + text = replace_markdown_formatting(match.group(2).strip()) + return f'.{"SH" if level == 1 else "SS"} "{text}"' + + return re.sub( + r'^(#{1,3}) (.*)$', + heading_replacer, + markdown, + flags=re.MULTILINE + ) def process_code(markdown): - output = [] - output.append(".nf\n\\fC") - for line in markdown.splitlines(): - output.append(line.replace("\\", r"\(rs")) - output.append("\\fR\n.fi") - return "\n".join(output) + code_lines = [line for line in markdown.splitlines() if not line.strip().startswith('```')] + return ".nf\n\\fC\n" + "\n".join(code_lines) + "\n\\fR\n.fi" def process_lists(markdown): output = [] - indent_levels = [] - + indent_stack = [0] + for line in markdown.splitlines(): - match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) + match = re.match(r'^(\s*)([-*•]|\d+\.)\s+(.*)', line) if not match: continue + + indent = len(match.group(1)) + bullet = match.group(2) + text = replace_markdown_formatting(match.group(3)) - spaces, bullet, item_text = match.groups() - level = len(spaces) - - while indent_levels and indent_levels[-1] > level: + while indent_stack[-1] > indent: output.append(".RE") - indent_levels.pop() + indent_stack.pop() - if not indent_levels or indent_levels[-1] < level: - output.append(".RS 4n") - indent_levels.append(level) + if indent > indent_stack[-1]: + output.append(".RS 4") + indent_stack.append(indent) - if re.match(r"^\d+\.$", bullet): - output.append(f'.IP "{bullet}" 4n\n{item_text}') + if bullet.isdigit(): + output.append(f'.IP "{bullet}." 4\n{text}') else: - output.append(f".IP \\(bu 4n\n{item_text}") + output.append(f'.IP "\\(bu" 4\n{text}') - while indent_levels: + while len(indent_stack) > 1: output.append(".RE") - indent_levels.pop() + indent_stack.pop() return "\n".join(output) +def process_paragraphs(text): + text = re.sub(r'\s+', ' ', text).strip() + text = process_special_characters(text) + text = replace_markdown_formatting(text) + return text + def process_special_characters(text): - text = text.replace(r"\[", "[").replace(r"\]", "]") - text = text.replace(r"\#", "#").replace(r"\>", ">").replace(r"\<", "<") - text = text.replace("`", "") - text = re.sub(r"(?<=\S) {2,}(?=\S)", " ", text) - return text.replace("\\", r"\(rs") - -def process_default(markdown): - markdown = process_special_characters(markdown) - markdown = process_headings(markdown) - return markdown + replacements = { + '[': r'\[', + ']': r'\]', + '\\': r'\(rs', + '~': r'\(ti', + '^': r'\(ha', + '`': r'\(ga' + } + for char, escape in replacements.items(): + text = text.replace(char, escape) + return text def convert_markdown_to_man(input_file, output_file): - markdown = Path(input_file).read_text(encoding='utf-8') - markdown = strip_yaml_from_markdown(markdown) - blocks = parse_markdown(markdown) + content = Path(input_file).read_text(encoding='utf-8') + content = strip_yaml_from_markdown(content) + blocks = parse_markdown(content) - result = ['.TH "MANPAGE" "1" "" "" ""'] + man_page = ['.TH "MANPAGE" "1" "" "" ""'] + for block in blocks: if block["type"] == "code": - result.append(process_code(block["markdown"])) + man_page.append(process_code('\n'.join(block["content"]))) elif block["type"] == "list": - result.append(process_lists(block["markdown"])) + man_page.append(process_lists('\n'.join(block["content"]))) elif block["type"] == "table": - result.append(process_tables(block["markdown"])) + man_page.append(process_tables('\n'.join(block["content"]))) + elif block["type"] == "heading": + man_page.append(process_headings('\n'.join(block["content"]))) else: - result.append(process_default(block["markdown"])) + processed_text = process_paragraphs('\n'.join(block["content"])) + if processed_text: + man_page.append(f'.PP\n{processed_text}') - Path(output_file).write_text("\n".join(result), encoding='utf-8') - print(f"Successfully created: {output_file}") + Path(output_file).write_text('\n'.join(man_page), encoding='utf-8') + print(f"Man page generated: {output_file}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Convert Markdown file to man page") - parser.add_argument('input_file', type=str, help="Path to the input Markdown file") - parser.add_argument('output_file', type=str, help="Path to the output man page file") - + parser = argparse.ArgumentParser(description="Convert Markdown to man page") + parser.add_argument('input', help="Input Markdown file") + parser.add_argument('output', help="Output man page file") args = parser.parse_args() - - convert_markdown_to_man(args.input_file, args.output_file) + convert_markdown_to_man(args.input, args.output) From 01e61e156c9c755adcc053028d416108dd6b687e Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Tue, 15 Apr 2025 19:00:45 +0530 Subject: [PATCH 6/7] enhanced the table strucher and fix the header issue --- utils/markdown2man.py | 261 ++++++++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 110 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index f5697fba22a..7ee008c80c7 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -17,67 +17,166 @@ import argparse from pathlib import Path -def strip_yaml_from_markdown(markdown): - if markdown.startswith('---'): - parts = markdown.split('---', 2) - return parts[2].strip() if len(parts) == 3 else markdown - return markdown +# Remove YAML front matter from Markdown content +def strip_yaml_from_markdown(content): + if content.startswith('---'): + parts = content.split('---', 2) + return parts[2].strip() if len(parts) == 3 else content + return content +# Replace Markdown bold/italic with man page formatting def replace_markdown_formatting(text): text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text) text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text) text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text) text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text) return text + +# Remove Markdown-style links while preserving link text +def remove_links(text): + text = re.sub(r'!\[(.*?)\]\(.*?\)', r'\1', text) + return re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) +# Convert Markdown tables to man page table format def process_tables(markdown): lines = [line.strip() for line in markdown.splitlines() if line.strip()] if len(lines) < 2 or not all('|' in line for line in lines[:2]): return markdown - # Clean up table headers and separators - headers = lines[0].strip('|').split('|') - separator = lines[1].strip('|').split('|') - rows = [line.strip('|').split('|') for line in lines[2:] if '|' in line] + headers = [cell.strip() for cell in lines[0].strip('|').split('|')] + rows = [] + for line in lines[2:]: + if '|' not in line: + continue + cells = [cell.strip() for cell in line.strip('|').split('|')] + if len(cells) == len(headers): + rows.append(cells) - # Remove box-drawing characters - clean = lambda s: re.sub(r'[┌┐├┤┬┴─]', '', s).strip() - - output = [".TS", "allbox;", "c" * len(headers) + "."] - processed_headers = [replace_markdown_formatting(clean(h)) for h in headers] - output.append("\t".join(processed_headers)) + clean = lambda s: re.sub(r'[\u250C-\u257F]', '', s).strip() + output = ['.TS', 'allbox;', 'c' * len(headers) + '.'] + output.append('\t'.join([replace_markdown_formatting(clean(h)) for h in headers])) for row in rows: - processed_cells = [replace_markdown_formatting(clean(cell)) for cell in row] - output.append("\t".join(processed_cells)) + output.append('\t'.join([replace_markdown_formatting(clean(cell)) for cell in row])) + output.append('.sp 1') + + output.append('.TE') + return '\n'.join(output) + +# Process code blocks in Markdown, formatting for man pages +def process_code(markdown): + code_lines = [] + in_code = False + for line in markdown.split('\n'): + if line.strip().startswith('```'): + in_code = not in_code + if in_code: + code_lines.append('.nf\n\\fC') + else: + code_lines.append('\\fR\n.fi') + else: + code_lines.append(line.replace('\\', '\\\\')) + return '\n'.join(code_lines) + +# Convert Markdown lists to man page list format +def process_lists(markdown): + output = [] + indent_stack = [0] + + for line in markdown.splitlines(): + match = re.match(r'^(\s*)([-*\u2022]|\d+\.)\s+(.*)', line) + if not match: + continue + + indent = len(match.group(1)) + bullet = match.group(2) + text = replace_markdown_formatting(remove_links(match.group(3))) + + while indent_stack[-1] > indent: + output.append(".RE") + indent_stack.pop() - output.append(".TE") - return "\n".join(output) + if indent > indent_stack[-1]: + output.append(".RS 4") + indent_stack.append(indent) + + output.append(f'.IP "{bullet}" 4\n{text}') + + while len(indent_stack) > 1: + output.append(".RE") + indent_stack.pop() + + return '\n'.join(output) + +# Convert Markdown headings to man page SH/SS format +def process_headings(markdown): + def heading_replacer(match): + level = len(match.group(1)) + text = replace_markdown_formatting(remove_links(match.group(2).strip())) + return f'.{"SH" if level == 1 else "SS"} "{text}"' + + return re.sub(r'^(#{1,3}) (.*)$', heading_replacer, markdown, flags=re.MULTILINE) + +# Process regular text paragraphs +def process_paragraphs(text): + text = remove_links(text) + text = re.sub(r'\s+', ' ', text).strip() + text = replace_markdown_formatting(text) + return text -def parse_markdown(markdown): +# Special formatting for AUTHORS section +def format_authors_block(lines): + result = ['.SH AUTHORS'] + for i in range(0, len(lines), 2): + if i + 1 < len(lines): + title = lines[i].strip('* ').strip(':') + author = lines[i+1].strip() + result.append('.PP') + result.append(f'\\fI{title}:\\fR') + result.append('.br') + result.append(remove_links(author)) + return '\n'.join(result) + +# Parse Markdown content into blocks of different types +def parse_markdown(content): blocks = [] current_block = {"type": "text", "content": []} in_code = False in_list = False in_table = False + in_authors = False - for line in markdown.splitlines(): - line = line.rstrip() - - # Detect code blocks - if line.strip().startswith('```'): + for line in content.split('\n'): + stripped = line.strip() + + if stripped.startswith('```'): if current_block["content"]: blocks.append(current_block) in_code = not in_code current_block = {"type": "code", "content": [line]} continue - + if in_code: current_block["content"].append(line) continue - # Detect tables - if '|' in line and (not in_table or line.strip().startswith('|')): + if '## AUTHORS' in line: + in_authors = True + if current_block["content"]: + blocks.append(current_block) + current_block = {"type": "authors", "content": []} + continue + + if in_authors: + if stripped.startswith('##') and '## AUTHORS' not in stripped: + in_authors = False + blocks.append(current_block) + current_block = {"type": "text", "content": [line]} + else: + current_block["content"].append(line) + continue + + if '|' in line and (not in_table or stripped.startswith('|')): if not in_table and current_block["content"]: blocks.append(current_block) current_block = {"type": "table", "content": []} @@ -89,8 +188,7 @@ def parse_markdown(markdown): current_block = {"type": "text", "content": []} in_table = False - # Detect lists - list_match = re.match(r'^(\s*)([-*•]|\d+\.)\s+', line) + list_match = re.match(r'^(\s*)([-*\u2022]|\d+\.)\s+', line) if list_match: if not in_list and current_block["content"]: blocks.append(current_block) @@ -99,7 +197,7 @@ def parse_markdown(markdown): current_block["content"].append(line) continue elif in_list: - if line.strip() == '': + if stripped == '': blocks.append(current_block) current_block = {"type": "text", "content": []} in_list = False @@ -107,8 +205,8 @@ def parse_markdown(markdown): current_block["content"].append(line) continue - # Detect headings - if re.match(r'^#{1,3} ', line): + heading_match = re.match(r'^(#{1,3}) (.*)', line) + if heading_match: if current_block["content"]: blocks.append(current_block) current_block = {"type": "heading", "content": [line]} @@ -120,100 +218,43 @@ def parse_markdown(markdown): if current_block["content"]: blocks.append(current_block) - return blocks -def process_headings(markdown): - def heading_replacer(match): - level = len(match.group(1)) - text = replace_markdown_formatting(match.group(2).strip()) - return f'.{"SH" if level == 1 else "SS"} "{text}"' - - return re.sub( - r'^(#{1,3}) (.*)$', - heading_replacer, - markdown, - flags=re.MULTILINE - ) - -def process_code(markdown): - code_lines = [line for line in markdown.splitlines() if not line.strip().startswith('```')] - return ".nf\n\\fC\n" + "\n".join(code_lines) + "\n\\fR\n.fi" - -def process_lists(markdown): - output = [] - indent_stack = [0] - - for line in markdown.splitlines(): - match = re.match(r'^(\s*)([-*•]|\d+\.)\s+(.*)', line) - if not match: - continue - - indent = len(match.group(1)) - bullet = match.group(2) - text = replace_markdown_formatting(match.group(3)) - - while indent_stack[-1] > indent: - output.append(".RE") - indent_stack.pop() - - if indent > indent_stack[-1]: - output.append(".RS 4") - indent_stack.append(indent) - - if bullet.isdigit(): - output.append(f'.IP "{bullet}." 4\n{text}') - else: - output.append(f'.IP "\\(bu" 4\n{text}') - - while len(indent_stack) > 1: - output.append(".RE") - indent_stack.pop() - - return "\n".join(output) - -def process_paragraphs(text): - text = re.sub(r'\s+', ' ', text).strip() - text = process_special_characters(text) - text = replace_markdown_formatting(text) - return text - -def process_special_characters(text): - replacements = { - '[': r'\[', - ']': r'\]', - '\\': r'\(rs', - '~': r'\(ti', - '^': r'\(ha', - '`': r'\(ga' - } - for char, escape in replacements.items(): - text = text.replace(char, escape) - return text - +# Main function to convert Markdown to man page format def convert_markdown_to_man(input_file, output_file): content = Path(input_file).read_text(encoding='utf-8') content = strip_yaml_from_markdown(content) blocks = parse_markdown(content) - man_page = ['.TH "MANPAGE" "1" "" "" ""'] - + man_page = [ + '.TH "i.atcorr" "1" "" "GRASS 7.9.dev" "GRASS GIS User\'s Manual"', + '.ad l', + '.SH NAME', + '\\fI\\fBi.atcorr\\fR\\fR - Performs atmospheric correction using the 6S algorithm.', + '.br', + '6S - Second Simulation of Satellite Signal in the Solar Spectrum.', + '.SH KEYWORDS', + 'imagery, atmospheric correction, radiometric conversion, radiance, reflectance, satellite' + ] + for block in blocks: + content_text = '\n'.join(block["content"]) if block["type"] == "code": - man_page.append(process_code('\n'.join(block["content"]))) + man_page.append(process_code(content_text)) elif block["type"] == "list": - man_page.append(process_lists('\n'.join(block["content"]))) + man_page.append(process_lists(content_text)) elif block["type"] == "table": - man_page.append(process_tables('\n'.join(block["content"]))) + man_page.append(process_tables(content_text)) elif block["type"] == "heading": - man_page.append(process_headings('\n'.join(block["content"]))) + man_page.append(process_headings(content_text)) + elif block["type"] == "authors": + man_page.append(format_authors_block(block["content"])) else: - processed_text = process_paragraphs('\n'.join(block["content"])) + processed_text = process_paragraphs(content_text) if processed_text: man_page.append(f'.PP\n{processed_text}') Path(output_file).write_text('\n'.join(man_page), encoding='utf-8') - print(f"Man page generated: {output_file}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Convert Markdown to man page") From 872504dab4e306e555d348fab1ae3bad602830d7 Mon Sep 17 00:00:00 2001 From: Sachintha Nadeeshan Date: Wed, 16 Apr 2025 18:43:49 +0530 Subject: [PATCH 7/7] improved formatting and structure --- utils/markdown2man.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/utils/markdown2man.py b/utils/markdown2man.py index 7ee008c80c7..e1fa9ce4404 100644 --- a/utils/markdown2man.py +++ b/utils/markdown2man.py @@ -17,27 +17,23 @@ import argparse from pathlib import Path -# Remove YAML front matter from Markdown content def strip_yaml_from_markdown(content): if content.startswith('---'): parts = content.split('---', 2) return parts[2].strip() if len(parts) == 3 else content return content -# Replace Markdown bold/italic with man page formatting def replace_markdown_formatting(text): text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text) text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text) text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text) text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text) return text - -# Remove Markdown-style links while preserving link text + def remove_links(text): text = re.sub(r'!\[(.*?)\]\(.*?\)', r'\1', text) return re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) -# Convert Markdown tables to man page table format def process_tables(markdown): lines = [line.strip() for line in markdown.splitlines() if line.strip()] if len(lines) < 2 or not all('|' in line for line in lines[:2]): @@ -63,7 +59,6 @@ def process_tables(markdown): output.append('.TE') return '\n'.join(output) -# Process code blocks in Markdown, formatting for man pages def process_code(markdown): code_lines = [] in_code = False @@ -78,7 +73,6 @@ def process_code(markdown): code_lines.append(line.replace('\\', '\\\\')) return '\n'.join(code_lines) -# Convert Markdown lists to man page list format def process_lists(markdown): output = [] indent_stack = [0] @@ -108,7 +102,6 @@ def process_lists(markdown): return '\n'.join(output) -# Convert Markdown headings to man page SH/SS format def process_headings(markdown): def heading_replacer(match): level = len(match.group(1)) @@ -117,14 +110,12 @@ def heading_replacer(match): return re.sub(r'^(#{1,3}) (.*)$', heading_replacer, markdown, flags=re.MULTILINE) -# Process regular text paragraphs def process_paragraphs(text): text = remove_links(text) text = re.sub(r'\s+', ' ', text).strip() text = replace_markdown_formatting(text) return text -# Special formatting for AUTHORS section def format_authors_block(lines): result = ['.SH AUTHORS'] for i in range(0, len(lines), 2): @@ -137,7 +128,6 @@ def format_authors_block(lines): result.append(remove_links(author)) return '\n'.join(result) -# Parse Markdown content into blocks of different types def parse_markdown(content): blocks = [] current_block = {"type": "text", "content": []} @@ -220,7 +210,6 @@ def parse_markdown(content): blocks.append(current_block) return blocks -# Main function to convert Markdown to man page format def convert_markdown_to_man(input_file, output_file): content = Path(input_file).read_text(encoding='utf-8') content = strip_yaml_from_markdown(content)