From c6d63cb73d999e5ee8a9ecca82ee70e621649094 Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Mon, 24 Mar 2025 23:10:26 +0530
Subject: [PATCH 1/7] Add table support and improve man page formatting

---
 utils/markdown2man.py | 244 ++++++++++++++++++++----------------------
 1 file changed, 119 insertions(+), 125 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index eb4d4cbc42d..8bd68d01c87 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -17,37 +17,70 @@
 import re
 from pathlib import Path
 
-
 def strip_yaml_from_markdown(content):
-    # Remove YAML front matter
+    """Remove YAML front matter from markdown content."""
     return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL)
 
+def get_first_sentence(text):
+    """Extract first meaningful paragraph for NAME section."""
+    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
+    for para in paragraphs:
+        if not para.startswith('#') and len(para.split()) > 3:
+            clean = re.sub(r'[\*_`]', '', para.split('\n')[0])
+            return clean[:80]
+    return "Manages module functionality"
+
+def convert_table(md_table):
+    """Convert markdown tables to man page format with proper alignment."""
+    lines = [line.strip() for line in md_table.split('\n') 
+             if line.strip() and '|' in line]
+    lines = [line for line in lines if not re.match(r'^[\|\-\s]+$', line)]
+    
+    # Calculate column widths
+    col_widths = []
+    for line in lines:
+        cells = [cell.strip() for cell in line.strip('|').split('|')]
+        for i, cell in enumerate(cells):
+            if i >= len(col_widths):
+                col_widths.append(0)
+            col_widths[i] = max(col_widths[i], len(cell))
+    
+    # Format with consistent spacing
+    output = []
+    for line in lines:
+        cells = [cell.strip() for cell in line.strip('|').split('|')]
+        padded = [f" {cell.ljust(col_widths[i])} " for i, cell in enumerate(cells)]
+        output.append(''.join(padded))
+    return '\n'.join(output) + '\n'
 
 def parse_markdown(content):
+    """Parse markdown content into typed blocks (code, lists, default)."""
+    # Handle tables first
+    content = re.sub(
+        r'(\|.+\|(\n\|.+\|)+)',
+        lambda m: f"TABLE_BLOCK:{m.group(0)}:END_TABLE",
+        content
+    )
+
     lines = content.splitlines()
     processing_block = []
     processed_content = []
-
     buffer = ""
     state = "default"
 
     for line in lines:
         if line.strip().startswith("```"):
-            # end of code block
             if state == "code":
                 processing_block.append(line)
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
                 processing_block = []
                 state = "default"
-            # start of code block
             else:
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
-                processing_block = []
-                processing_block.append(line)
+                if buffer:
+                    processing_block.append(buffer)
+                    buffer = ""
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processing_block = [line]
                 state = "code"
             continue
 
@@ -59,23 +92,17 @@ def parse_markdown(content):
             if buffer:
                 processing_block.append(buffer)
                 buffer = ""
-            # start of ordered list
             if state != "list":
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
                 processing_block = []
                 state = "list"
 
-        # empty line at the start and end of code, list blocks
         if line == "":
             if buffer:
                 processing_block.append(buffer)
                 buffer = ""
             if state != "default":
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
                 processing_block = []
                 state = "default"
             processing_block.append(line)
@@ -93,164 +120,131 @@ def parse_markdown(content):
     if buffer:
         processing_block.append(buffer)
     if processing_block:
-        processed_content.append(
-            {"markdown": "\n".join(processing_block), "type": state}
-        )
+        processed_content.append({"markdown": "\n".join(processing_block), "type": state})
 
-    merged_content = []
+    # Merge adjacent blocks of same type
+    merged = []
     for item in processed_content:
         if not item["markdown"]:
             continue
-        if merged_content and merged_content[-1]["type"] == item["type"]:
-            merged_content[-1]["markdown"] += "\n" + item["markdown"]
+        if merged and merged[-1]["type"] == item["type"]:
+            merged[-1]["markdown"] += "\n" + item["markdown"]
         else:
-            merged_content.append(item)
-
-    return merged_content
-
-
-def process_links(markdown):
-    """Replace Markdown links with only their display text."""
-    markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown)
-    return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown)
-
+            merged.append(item)
+    return merged
 
 def process_parameters(markdown):
-    return re.sub(
-        r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?",
-        r'.IP "**\1**=*\2*\3" 4m',
-        markdown,
-        flags=re.MULTILINE,
+    """Handle GRASS parameters and flags with proper .IP formatting."""
+    # Process flags (-p) and parameters (region)
+    markdown = re.sub(
+        r'([^\w\n])(\*\*|\*|_)([a-z0-9_\-]+)(\*\*|\*|_)([^\w]|$)',
+        r'\1\n.IP "\2\3\4" 4\n\5',
+        markdown
     )
-
-
-def process_flags(markdown):
-    return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE)
-
+    # Clean up formatting
+    markdown = re.sub(r'\.IP\n\.IP', '.IP', markdown)
+    return re.sub(r'(\n\.IP "[^"]+" 4\n)\s+', r'\1', markdown)
 
 def process_formatting(markdown):
-    """Apply inline formatting for bold, italic, and bold+italic."""
+    """Apply man page formatting for bold/italic text."""
     markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown)
     markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown)
     return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown)
 
-
-def process_br(markdown):
-    return re.sub(r"([^\n\s])  $", r"\1\n.br", markdown, flags=re.MULTILINE)
-
-
 def process_headings(markdown):
-    def convert_sh(match):
-        return f".SH {match.group(1).upper()}"
-
-    def convert_ss(match):
-        return f".SS {match.group(1)}"
-
-    markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE)
-    return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE)
-
+    """Convert markdown headings to man page sections."""
+    markdown = re.sub(r"^#{1,2} (.*)", r".SH \1".upper(), markdown, flags=re.MULTILINE)
+    return re.sub(r"^#{3,} (.*)", r".SS \1", markdown, flags=re.MULTILINE)
 
 def process_code(markdown):
+    """Format code blocks with proper man page syntax."""
     in_code_block = False
     output = []
     for line in markdown.splitlines():
         if line.lstrip().startswith("```"):
             if in_code_block:
-                output.append("\\fR\n.fi\n")  # End code block
+                output.append("\\fR\n.fi")
             else:
-                output.append(".nf\n\\fC\n")  # Start code block
+                lang = line.strip('`').strip()
+                output.append(f".nf\n\\fC\n{lang + ': ' if lang else ''}")
             in_code_block = not in_code_block
         else:
-            output.append(re.sub(r"\\", r"\(rs", line))
-
+            output.append(re.sub(r"\\", r"\\\\", line) if in_code_block else line)
     return "\n".join(output)
 
-
 def process_lists(markdown):
-    markdown = process_special_characters(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_links(markdown)
-
+    """Convert markdown lists to man page format."""
     output = []
     indent_levels = []
-
     for line in markdown.splitlines():
-        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)  # Match bullets or numbers
+        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)
         if not match:
-            continue  # Skip non-list lines (shouldn't happen if input is all lists)
-
-        spaces, bullet, item_text = match.groups()
-        level = len(spaces)  # Determine indentation level
-
+            continue
+        spaces, bullet, text = match.groups()
+        level = len(spaces)
+        
         while indent_levels and indent_levels[-1] > level:
-            output.append(".RE")  # Close previous indentation level
+            output.append(".RE")
             indent_levels.pop()
-
+            
         if not indent_levels or indent_levels[-1] < level:
-            output.append(".RS 4n")  # Open new indentation level
+            output.append(".RS 4n")
             indent_levels.append(level)
-
-        if re.match(r"^\d+\.$", bullet):  # Numbered list
-            output.append(f'.IP "{bullet}" 4n\n{item_text}')
-        else:  # Bullet list
-            output.append(".IP \\(bu 4n\n" + item_text)
-
-    # Close any remaining indentation levels
+            
+        output.append(f'.IP "{bullet}" 4n\n{text}' if bullet.isdigit() 
+                     else f'.IP \\(bu 4n\n{text}')
+    
     while indent_levels:
         output.append(".RE")
         indent_levels.pop()
-
     return "\n".join(output)
 
-
-def process_special_characters(markdown):
-    markdown = markdown.replace(r"\[", "[")
-    markdown = markdown.replace(r"\]", "]")
-    markdown = markdown.replace(r"\#", "#")
-    markdown = markdown.replace(r"\>", ">")
-    markdown = markdown.replace(r"\<", "<")
-    markdown = markdown.replace("`", "")
-    # eliminate extra spaces between words
-    markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown)
-    return re.sub(r"\\", r"\(rs", markdown)
-
-
-def process_default(markdown):
-    markdown = process_br(markdown)
-    markdown = process_parameters(markdown)
-    markdown = process_flags(markdown)
-    markdown = markdown.replace("&nbsp;&nbsp;&nbsp;&nbsp;", "")
-    markdown = process_special_characters(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_links(markdown)
-    return process_headings(markdown)
-
-
 def convert_markdown_to_man(input_file, output_file):
-    """Read Markdown file and convert to man page."""
-    markdown = Path(input_file).read_text()
+    """Main conversion function from markdown to man page format."""
+    markdown = Path(input_file).read_text(encoding='utf-8')
     markdown = strip_yaml_from_markdown(markdown)
+    
+    title = Path(input_file).stem.upper()
+    first_para = get_first_sentence(markdown.split('\n\n')[1]) if '\n\n' in markdown else ""
+    
     blocks = parse_markdown(markdown)
-    result = ['.TH MAN 1 "Manual"\n']
+    
+    result = [
+        f'.TH {title} 1 "GRASS GIS User\'s Manual"\n',
+        f'.SH NAME\n\\fB{title}\\fR \\- {first_para}\n',
+        f'.SH SYNOPSIS\n\\fB{title.lower()}\\fR\n.br\n'
+    ]
+    
     for block in blocks:
         if block["type"] == "code":
             result.append(process_code(block["markdown"]))
         elif block["type"] == "list":
             result.append(process_lists(block["markdown"]))
         else:
-            result.append(process_default(block["markdown"]))
-
-    Path(output_file).write_text("\n".join(result))
-
+            content = block["markdown"]
+            if "TABLE_BLOCK:" in content:
+                result.append(convert_table(content[12:-10]))
+            else:
+                content = re.sub(r"([^\n\s])  $", r"\1\n.br", content, flags=re.MULTILINE)
+                content = process_formatting(content)
+                content = process_headings(content)
+                content = process_parameters(content)
+                result.append(content)
+    
+    Path(output_file).write_text("\n".join(result), encoding='utf-8')
 
 def main():
-    parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.")
-    parser.add_argument("input_file", help="Path to the input Markdown file.")
-    parser.add_argument("output_file", help="Path to the output man page file.")
+    """Command line interface for the converter."""
+    parser = argparse.ArgumentParser(
+        description="Convert GRASS GIS markdown docs to man pages",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("input_file", help="Input markdown file path")
+    parser.add_argument("output_file", help="Output man page file path")
     args = parser.parse_args()
-
+    
     convert_markdown_to_man(args.input_file, args.output_file)
-
+    print(f"Successfully converted {args.input_file} to {args.output_file}")
 
 if __name__ == "__main__":
     main()

From 240536917ed162fd34be37e9693e9a61b8714e5f Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Thu, 27 Mar 2025 23:08:12 +0530
Subject: [PATCH 2/7] Improved table formatting and visualization

---
 utils/markdown2man.py | 306 +++++++++++++++++++++++++-----------------
 1 file changed, 182 insertions(+), 124 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index 8bd68d01c87..4a3ef5338ea 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -17,92 +17,84 @@
 import re
 from pathlib import Path
 
+
 def strip_yaml_from_markdown(content):
-    """Remove YAML front matter from markdown content."""
+    # Remove YAML front matter
     return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL)
 
-def get_first_sentence(text):
-    """Extract first meaningful paragraph for NAME section."""
-    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
-    for para in paragraphs:
-        if not para.startswith('#') and len(para.split()) > 3:
-            clean = re.sub(r'[\*_`]', '', para.split('\n')[0])
-            return clean[:80]
-    return "Manages module functionality"
-
-def convert_table(md_table):
-    """Convert markdown tables to man page format with proper alignment."""
-    lines = [line.strip() for line in md_table.split('\n') 
-             if line.strip() and '|' in line]
-    lines = [line for line in lines if not re.match(r'^[\|\-\s]+$', line)]
-    
-    # Calculate column widths
-    col_widths = []
-    for line in lines:
-        cells = [cell.strip() for cell in line.strip('|').split('|')]
-        for i, cell in enumerate(cells):
-            if i >= len(col_widths):
-                col_widths.append(0)
-            col_widths[i] = max(col_widths[i], len(cell))
-    
-    # Format with consistent spacing
-    output = []
-    for line in lines:
-        cells = [cell.strip() for cell in line.strip('|').split('|')]
-        padded = [f" {cell.ljust(col_widths[i])} " for i, cell in enumerate(cells)]
-        output.append(''.join(padded))
-    return '\n'.join(output) + '\n'
 
 def parse_markdown(content):
-    """Parse markdown content into typed blocks (code, lists, default)."""
-    # Handle tables first
-    content = re.sub(
-        r'(\|.+\|(\n\|.+\|)+)',
-        lambda m: f"TABLE_BLOCK:{m.group(0)}:END_TABLE",
-        content
-    )
-
     lines = content.splitlines()
     processing_block = []
     processed_content = []
+
     buffer = ""
     state = "default"
+    in_table = False  # Track table state
 
     for line in lines:
+        stripped = line.strip()
+
+        # Detect table start/end
+        if re.match(r'^\|.+\|$', stripped) and not in_table:
+            if processing_block:
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processing_block = []
+            state = "table"
+            in_table = True
+            processing_block.append(line)
+            continue
+
+        if in_table:
+            if re.match(r'^\|.+\|$', stripped) or re.match(r'^\|-+', stripped):
+                processing_block.append(line)
+            else:
+                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processing_block = []
+                state = "default"
+                in_table = False
+                buffer = line  # Process the current line in default state
+            continue
+
+        # Code block handling
         if line.strip().startswith("```"):
             if state == "code":
                 processing_block.append(line)
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processed_content.append(
+                    {"markdown": "\n".join(processing_block), "type": state}
+                )
                 processing_block = []
                 state = "default"
             else:
-                if buffer:
-                    processing_block.append(buffer)
-                    buffer = ""
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
-                processing_block = [line]
+                processed_content.append(
+                    {"markdown": "\n".join(processing_block), "type": state}
+                )
+                processing_block = []
+                processing_block.append(line)
                 state = "code"
             continue
 
-        if state == "code":
-            processing_block.append(line)
-            continue
-
+        # List handling
         if re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line.strip()):
             if buffer:
                 processing_block.append(buffer)
                 buffer = ""
             if state != "list":
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processed_content.append(
+                    {"markdown": "\n".join(processing_block), "type": state}
+                )
                 processing_block = []
                 state = "list"
 
+        # Empty line handling (between blocks)
         if line == "":
             if buffer:
                 processing_block.append(buffer)
                 buffer = ""
             if state != "default":
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+                processed_content.append(
+                    {"markdown": "\n".join(processing_block), "type": state}
+                )
                 processing_block = []
                 state = "default"
             processing_block.append(line)
@@ -111,7 +103,7 @@ def parse_markdown(content):
         if buffer:
             buffer += " " + line
         else:
-            buffer += line
+            buffer = line
 
         if line.endswith("  "):
             processing_block.append(buffer)
@@ -120,131 +112,197 @@ def parse_markdown(content):
     if buffer:
         processing_block.append(buffer)
     if processing_block:
-        processed_content.append({"markdown": "\n".join(processing_block), "type": state})
+        processed_content.append(
+            {"markdown": "\n".join(processing_block), "type": state}
+        )
 
-    # Merge adjacent blocks of same type
-    merged = []
+    merged_content = []
     for item in processed_content:
         if not item["markdown"]:
             continue
-        if merged and merged[-1]["type"] == item["type"]:
-            merged[-1]["markdown"] += "\n" + item["markdown"]
+        if merged_content and merged_content[-1]["type"] == item["type"]:
+            merged_content[-1]["markdown"] += "\n" + item["markdown"]
         else:
-            merged.append(item)
-    return merged
+            merged_content.append(item)
+
+    return merged_content
+
+
+# Table processing function with better visualization
+def process_tables(markdown):
+    markdown = process_links(markdown)
+    markdown = process_formatting(markdown)
+    markdown = process_special_characters(markdown)
+
+    lines = markdown.split('\n')
+    if not lines:
+        return ""
+    
+    # Remove separator line if present (for Markdown tables with hyphen separators)
+    if re.match(r'^\|[-| ]+\|$', lines[1].strip()):
+        del lines[1]
+
+    # Prepare table with border-like formatting
+    tbl = [".TS"]
+    tbl.append("allbox tab(|);")  # Border for table
+    tbl.append("l " * len(lines[0].split("|")) + ".")  # Left-align all columns
+    
+    # Add table rows with border-like formatting
+    for i, line in enumerate(lines):
+        cells = [c.strip() for c in line.strip().strip('|').split('|')]
+        if i == 0:
+            tbl.append(".B")  # Bold for header row
+            tbl.append(" ".join(["l"]*len(cells)) + ".")  # Header column alignment
+        tbl.append("|" + "|".join(cells) + "|")
+    
+    tbl.append(".TE")
+    return '\n'.join(tbl)
+
+
+def process_links(markdown):
+    """Replace Markdown links with only their display text."""
+    markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown)
+    return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown)
+
 
 def process_parameters(markdown):
-    """Handle GRASS parameters and flags with proper .IP formatting."""
-    # Process flags (-p) and parameters (region)
-    markdown = re.sub(
-        r'([^\w\n])(\*\*|\*|_)([a-z0-9_\-]+)(\*\*|\*|_)([^\w]|$)',
-        r'\1\n.IP "\2\3\4" 4\n\5',
-        markdown
+    return re.sub(
+        r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?",
+        r'.IP "**\1**=*\2*\3" 4m',
+        markdown,
+        flags=re.MULTILINE,
     )
-    # Clean up formatting
-    markdown = re.sub(r'\.IP\n\.IP', '.IP', markdown)
-    return re.sub(r'(\n\.IP "[^"]+" 4\n)\s+', r'\1', markdown)
+
+
+def process_flags(markdown):
+    return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE)
+
 
 def process_formatting(markdown):
-    """Apply man page formatting for bold/italic text."""
+    """Apply inline formatting for bold, italic, and bold+italic."""
     markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown)
     markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown)
     return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown)
 
+
+def process_br(markdown):
+    return re.sub(r"([^\n\s])  $", r"\1\n.br", markdown, flags=re.MULTILINE)
+
+
 def process_headings(markdown):
-    """Convert markdown headings to man page sections."""
-    markdown = re.sub(r"^#{1,2} (.*)", r".SH \1".upper(), markdown, flags=re.MULTILINE)
-    return re.sub(r"^#{3,} (.*)", r".SS \1", markdown, flags=re.MULTILINE)
+    def convert_sh(match):
+        return f".SH {match.group(1).upper()}"
+    
+    def convert_ss(match):
+        return f".SS {match.group(1)}"
+
+    markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE)
+    return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE)
+
 
 def process_code(markdown):
-    """Format code blocks with proper man page syntax."""
     in_code_block = False
     output = []
     for line in markdown.splitlines():
         if line.lstrip().startswith("```"):
             if in_code_block:
-                output.append("\\fR\n.fi")
+                output.append("\\fR\n.fi\n")  # End code block
             else:
-                lang = line.strip('`').strip()
-                output.append(f".nf\n\\fC\n{lang + ': ' if lang else ''}")
+                output.append(".nf\n\\fC\n")  # Start code block
             in_code_block = not in_code_block
         else:
-            output.append(re.sub(r"\\", r"\\\\", line) if in_code_block else line)
+            output.append(re.sub(r"\\", r"\(rs", line))
+
     return "\n".join(output)
 
+
 def process_lists(markdown):
-    """Convert markdown lists to man page format."""
+    markdown = process_special_characters(markdown)
+    markdown = process_formatting(markdown)
+    markdown = process_links(markdown)
+
     output = []
     indent_levels = []
+
     for line in markdown.splitlines():
-        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)
+        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)  # Match bullets or numbers
         if not match:
-            continue
-        spaces, bullet, text = match.groups()
-        level = len(spaces)
-        
+            continue  # Skip non-list lines (shouldn't happen if input is all lists)
+
+        spaces, bullet, item_text = match.groups()
+        level = len(spaces)  # Determine indentation level
+
         while indent_levels and indent_levels[-1] > level:
-            output.append(".RE")
+            output.append(".RE")  # Close previous indentation level
             indent_levels.pop()
-            
+
         if not indent_levels or indent_levels[-1] < level:
-            output.append(".RS 4n")
+            output.append(".RS 4n")  # Open new indentation level
             indent_levels.append(level)
-            
-        output.append(f'.IP "{bullet}" 4n\n{text}' if bullet.isdigit() 
-                     else f'.IP \\(bu 4n\n{text}')
-    
+
+        if re.match(r"^\d+\.$", bullet):  # Numbered list
+            output.append(f'.IP "{bullet}" 4n\n{item_text}')
+        else:  # Bullet list
+            output.append(".IP \\(bu 4n\n" + item_text)
+
+    # Close any remaining indentation levels
     while indent_levels:
         output.append(".RE")
         indent_levels.pop()
+
     return "\n".join(output)
 
+
+def process_special_characters(markdown):
+    markdown = markdown.replace(r"\[", "[")
+    markdown = markdown.replace(r"\]", "]")
+    markdown = markdown.replace(r"\#", "#")
+    markdown = markdown.replace(r"\>", ">")
+    markdown = markdown.replace(r"\<", "<")
+    markdown = markdown.replace("`", "")
+    # eliminate extra spaces between words
+    markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown)
+    return re.sub(r"\\", r"\(rs", markdown)
+
+
+def process_default(markdown):
+    markdown = process_br(markdown)
+    markdown = process_parameters(markdown)
+    markdown = process_flags(markdown)
+    markdown = markdown.replace("&nbsp;&nbsp;&nbsp;&nbsp;", "")
+    markdown = process_special_characters(markdown)
+    markdown = process_formatting(markdown)
+    markdown = process_links(markdown)
+    return process_headings(markdown)
+
+
 def convert_markdown_to_man(input_file, output_file):
-    """Main conversion function from markdown to man page format."""
-    markdown = Path(input_file).read_text(encoding='utf-8')
+    """Read Markdown file and convert to man page."""
+    markdown = Path(input_file).read_text()
     markdown = strip_yaml_from_markdown(markdown)
-    
-    title = Path(input_file).stem.upper()
-    first_para = get_first_sentence(markdown.split('\n\n')[1]) if '\n\n' in markdown else ""
-    
     blocks = parse_markdown(markdown)
-    
-    result = [
-        f'.TH {title} 1 "GRASS GIS User\'s Manual"\n',
-        f'.SH NAME\n\\fB{title}\\fR \\- {first_para}\n',
-        f'.SH SYNOPSIS\n\\fB{title.lower()}\\fR\n.br\n'
-    ]
-    
+    result = ['.TH MAN 1 "Manual"\n']
     for block in blocks:
         if block["type"] == "code":
             result.append(process_code(block["markdown"]))
         elif block["type"] == "list":
             result.append(process_lists(block["markdown"]))
+        elif block["type"] == "table":
+            result.append(process_tables(block["markdown"]))  # Process tables
         else:
-            content = block["markdown"]
-            if "TABLE_BLOCK:" in content:
-                result.append(convert_table(content[12:-10]))
-            else:
-                content = re.sub(r"([^\n\s])  $", r"\1\n.br", content, flags=re.MULTILINE)
-                content = process_formatting(content)
-                content = process_headings(content)
-                content = process_parameters(content)
-                result.append(content)
-    
-    Path(output_file).write_text("\n".join(result), encoding='utf-8')
+            result.append(process_default(block["markdown"]))
+
+    Path(output_file).write_text("\n".join(result))
+
 
 def main():
-    """Command line interface for the converter."""
-    parser = argparse.ArgumentParser(
-        description="Convert GRASS GIS markdown docs to man pages",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("input_file", help="Input markdown file path")
-    parser.add_argument("output_file", help="Output man page file path")
+    parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.")
+    parser.add_argument("input_file", help="Path to the input Markdown file.")
+    parser.add_argument("output_file", help="Path to the output man page file.")
     args = parser.parse_args()
-    
+
     convert_markdown_to_man(args.input_file, args.output_file)
-    print(f"Successfully converted {args.input_file} to {args.output_file}")
+
 
 if __name__ == "__main__":
     main()

From e3c683356b44fd1d55c98956162d6105ecc133b2 Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Fri, 28 Mar 2025 10:24:15 +0530
Subject: [PATCH 3/7] improved formatting and structure

- Add hierarchical section numbering (1., 1.1) with descriptive labels (Main Section/Subsection)
- Implement explicit list nesting markers with level indicators and type detection
- Add space padding  (\fB text \fR) to improve source readability
- Resolve list index error in nested list processing
---
 utils/markdown2man.py | 292 +++++++++++++++++++++++-------------------
 1 file changed, 160 insertions(+), 132 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index 4a3ef5338ea..f3d32ae4ffc 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -22,20 +22,20 @@ def strip_yaml_from_markdown(content):
     # Remove YAML front matter
     return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL)
 
-
 def parse_markdown(content):
+    """Parse markdown into structured blocks"""
     lines = content.splitlines()
     processing_block = []
     processed_content = []
 
     buffer = ""
     state = "default"
-    in_table = False  # Track table state
+    in_table = False
 
     for line in lines:
         stripped = line.strip()
 
-        # Detect table start/end
+        # Table detection
         if re.match(r'^\|.+\|$', stripped) and not in_table:
             if processing_block:
                 processed_content.append({"markdown": "\n".join(processing_block), "type": state})
@@ -53,7 +53,7 @@ def parse_markdown(content):
                 processing_block = []
                 state = "default"
                 in_table = False
-                buffer = line  # Process the current line in default state
+                buffer = line
             continue
 
         # Code block handling
@@ -86,7 +86,7 @@ def parse_markdown(content):
                 processing_block = []
                 state = "list"
 
-        # Empty line handling (between blocks)
+        # Empty line handling
         if line == "":
             if buffer:
                 processing_block.append(buffer)
@@ -127,182 +127,210 @@ def parse_markdown(content):
 
     return merged_content
 
+def process_headings(markdown):
+    """Convert headings with hierarchical numbering and labels"""
+    section_counter = [0]
+    subsection_counter = [0]
 
-# Table processing function with better visualization
-def process_tables(markdown):
-    markdown = process_links(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_special_characters(markdown)
+    def convert_main_section(match):
+        section_counter[0] += 1
+        subsection_counter[0] = 0
+        return f"\n.SH {section_counter[0]}. {match.group(1).upper()} (Main Section)\n"
 
-    lines = markdown.split('\n')
-    if not lines:
-        return ""
-    
-    # Remove separator line if present (for Markdown tables with hyphen separators)
-    if re.match(r'^\|[-| ]+\|$', lines[1].strip()):
-        del lines[1]
-
-    # Prepare table with border-like formatting
-    tbl = [".TS"]
-    tbl.append("allbox tab(|);")  # Border for table
-    tbl.append("l " * len(lines[0].split("|")) + ".")  # Left-align all columns
-    
-    # Add table rows with border-like formatting
-    for i, line in enumerate(lines):
-        cells = [c.strip() for c in line.strip().strip('|').split('|')]
-        if i == 0:
-            tbl.append(".B")  # Bold for header row
-            tbl.append(" ".join(["l"]*len(cells)) + ".")  # Header column alignment
-        tbl.append("|" + "|".join(cells) + "|")
-    
-    tbl.append(".TE")
-    return '\n'.join(tbl)
+    def convert_subsection(match):
+        subsection_counter[0] += 1
+        return (f"\n.SS {section_counter[0]}.{subsection_counter[0]} "
+                f"{match.group(1).upper()} (Subsection)\n")
 
+    markdown = re.sub(r"^## (.*)", convert_main_section, markdown, flags=re.MULTILINE)
+    return re.sub(r"^### (.*)", convert_subsection, markdown, flags=re.MULTILINE)
 
-def process_links(markdown):
-    """Replace Markdown links with only their display text."""
-    markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown)
-    return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown)
+def process_lists(markdown):
+    markdown = process_special_characters(markdown)
+    markdown = process_formatting(markdown)
+    markdown = process_links(markdown)
 
+    output = []
+    current_level = 0
+    list_stack = []
+    bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] 
 
-def process_parameters(markdown):
-    return re.sub(
-        r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?",
-        r'.IP "**\1**=*\2*\3" 4m',
-        markdown,
-        flags=re.MULTILINE,
-    )
+    for line in markdown.splitlines():
+        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)
+        if not match:
+            continue
 
+        indent, bullet, content = match.groups()
+        new_level = len(indent) // 4
+
+        # Handle list transitions
+        while current_level > new_level:
+            if list_stack:  # Add safety check
+                output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n")
+                current_level -= 1
+                list_stack.pop()
+
+        if new_level > current_level or not list_stack:
+            # Initialize stack if empty
+            list_type = 'ordered' if bullet[:-1].isdigit() else 'unordered'
+            output.append(
+                f"\\fBStart of Nested List (Level {new_level}) "
+                f"[{list_type.upper()}]\\fR\n"
+                f".RS {4*(new_level+1)}n"
+            )
+            current_level = new_level
+            list_stack.append({'type': list_type, 'counter': 1})
+
+        # Add check for empty stack before access
+        if not list_stack:
+            continue
 
-def process_flags(markdown):
-    return re.sub(r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', markdown, flags=re.MULTILINE)
+        # Format list items
+        if list_stack[-1]['type'] == 'ordered':
+            output.append(f'.IP "{list_stack[-1]["counter"]}." {4*(current_level+1)}n')
+            list_stack[-1]["counter"] += 1
+        else:
+            bullet = bullet_styles[current_level % len(bullet_styles)]
+            output.append(f'.IP "{bullet}" {4*(current_level+1)}n')
+        
+        output.append(f"{content}\n")
 
+    # Close remaining lists
+    while current_level > 0 and list_stack:
+        output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n")
+        current_level -= 1
+        list_stack.pop()
 
-def process_formatting(markdown):
-    """Apply inline formatting for bold, italic, and bold+italic."""
-    markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown)
-    markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown)
-    return re.sub(r"\*(.+?)\*", r"\\fI\1\\fR", markdown)
+    return "".join(output)
 
+def process_tables(markdown):
+    processed = process_formatting(markdown)
+    lines = processed.split('\n')
+    
+    if not lines or len(lines[0].strip()) == 0:
+        return ""
 
-def process_br(markdown):
-    return re.sub(r"([^\n\s])  $", r"\1\n.br", markdown, flags=re.MULTILINE)
+    table = [
+        "\\fBStart of Table\\fR",
+        ".TS",
+        "allbox tab(|);",
+        "l " * len(lines[0].split("|")) + "."
+    ]
 
+    for i, line in enumerate(lines):
+        cells = [c.strip() for c in line.strip('|').split('|')]
+        if i == 0:
+            table.append("_")
+        table.append("|" + "|".join(cells) + "|")
 
-def process_headings(markdown):
-    def convert_sh(match):
-        return f".SH {match.group(1).upper()}"
-    
-    def convert_ss(match):
-        return f".SS {match.group(1)}"
+    table.append(".TE\n\\fBEnd of Table\\fR")
+    return '\n'.join(table)
 
-    markdown = re.sub(r"^#{1,2} (.*)", convert_sh, markdown, flags=re.MULTILINE)
-    return re.sub(r"^#{3,} (.*)", convert_ss, markdown, flags=re.MULTILINE)
+def process_parameters(markdown):
+    """Handle parameter definitions with bold formatting"""
+    return re.sub(
+        r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?",
+        r'.IP "\\fB\1\\fR=*\2*\3" 4m',
+        markdown,
+        flags=re.MULTILINE,
+    )
 
+def process_flags(markdown):
+    """Handle command-line flags with consistent formatting"""
+    return re.sub(
+        r"^\*\*-(.*?)\*\*", 
+        r'.IP "\\fB-\1\\fR" 4m', 
+        markdown, 
+        flags=re.MULTILINE
+    )
 
 def process_code(markdown):
+    """Preserve code blocks with monospace formatting"""
     in_code_block = False
     output = []
     for line in markdown.splitlines():
         if line.lstrip().startswith("```"):
             if in_code_block:
-                output.append("\\fR\n.fi\n")  # End code block
+                output.append("\\fR\n.fi\n")
             else:
-                output.append(".nf\n\\fC\n")  # Start code block
+                output.append(".nf\n\\fC\n")
             in_code_block = not in_code_block
         else:
-            output.append(re.sub(r"\\", r"\(rs", line))
-
+            output.append(re.sub(r"\\fC", r"\\fC ", line))
     return "\n".join(output)
 
+def process_formatting(markdown):
+    markdown = re.sub(r"\*\*\s*(\S(.*?\S)?)\s*\*\*", r"\\fB \1 \\fR", markdown, flags=re.DOTALL)
+    markdown = re.sub(r"\*\s*(\S(.*?\S)?)\s*\*", r"\\fI \1 \\fR", markdown, flags=re.DOTALL)
+    markdown = re.sub(r"\*\*\*\s*(\S(.*?\S)?)\s*\*\*\*", r"\\fB\\fI \1 \\fR\\fR", markdown, flags=re.DOTALL)
+    
+    return markdown
 
-def process_lists(markdown):
-    markdown = process_special_characters(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_links(markdown)
-
-    output = []
-    indent_levels = []
-
-    for line in markdown.splitlines():
-        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)  # Match bullets or numbers
-        if not match:
-            continue  # Skip non-list lines (shouldn't happen if input is all lists)
-
-        spaces, bullet, item_text = match.groups()
-        level = len(spaces)  # Determine indentation level
-
-        while indent_levels and indent_levels[-1] > level:
-            output.append(".RE")  # Close previous indentation level
-            indent_levels.pop()
-
-        if not indent_levels or indent_levels[-1] < level:
-            output.append(".RS 4n")  # Open new indentation level
-            indent_levels.append(level)
-
-        if re.match(r"^\d+\.$", bullet):  # Numbered list
-            output.append(f'.IP "{bullet}" 4n\n{item_text}')
-        else:  # Bullet list
-            output.append(".IP \\(bu 4n\n" + item_text)
-
-    # Close any remaining indentation levels
-    while indent_levels:
-        output.append(".RE")
-        indent_levels.pop()
-
-    return "\n".join(output)
-
+def process_links(markdown):
+    """Replace Markdown links with display text"""
+    markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown)
+    return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown)
+bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"]  # Use raw strings with double escapes
 
 def process_special_characters(markdown):
+    """Handle special characters"""
     markdown = markdown.replace(r"\[", "[")
     markdown = markdown.replace(r"\]", "]")
     markdown = markdown.replace(r"\#", "#")
-    markdown = markdown.replace(r"\>", ">")
-    markdown = markdown.replace(r"\<", "<")
-    markdown = markdown.replace("`", "")
-    # eliminate extra spaces between words
     markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown)
     return re.sub(r"\\", r"\(rs", markdown)
 
-
-def process_default(markdown):
-    markdown = process_br(markdown)
-    markdown = process_parameters(markdown)
-    markdown = process_flags(markdown)
-    markdown = markdown.replace("&nbsp;&nbsp;&nbsp;&nbsp;", "")
-    markdown = process_special_characters(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_links(markdown)
-    return process_headings(markdown)
-
-
 def convert_markdown_to_man(input_file, output_file):
-    """Read Markdown file and convert to man page."""
     markdown = Path(input_file).read_text()
     markdown = strip_yaml_from_markdown(markdown)
     blocks = parse_markdown(markdown)
-    result = ['.TH MAN 1 "Manual"\n']
+    
+    man_page = [
+        '.TH I.ATCORR 1 "GRASS GIS Manual"',
+        '.SH NAME\ni.atcorr \\- Atmospheric correction using 6S algorithm'
+    ]
+
     for block in blocks:
-        if block["type"] == "code":
-            result.append(process_code(block["markdown"]))
-        elif block["type"] == "list":
-            result.append(process_lists(block["markdown"]))
-        elif block["type"] == "table":
-            result.append(process_tables(block["markdown"]))  # Process tables
+        content_type = block["type"]
+        content = block["markdown"]
+
+        if content_type == "code":
+            man_page.append(process_code(content))
+        elif content_type == "list":
+            man_page.append(process_lists(content))
+        elif content_type == "table":
+            man_page.append(process_tables(content))
         else:
-            result.append(process_default(block["markdown"]))
+            processed = process_default(content)
+            man_page.append(processed)
+
+    Path(output_file).write_text("\n".join(man_page))
 
-    Path(output_file).write_text("\n".join(result))
+def process_default(markdown):
+    """Default processing pipeline"""
+    transformations = [
+        process_parameters,
+        process_flags,
+        lambda x: x.replace("&nbsp;&nbsp;&nbsp;&nbsp;", ""),
+        process_special_characters,
+        process_formatting,
+        process_links,
+        process_headings
+    ]
+    for transform in transformations:
+        markdown = transform(markdown)
+    return markdown
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.")
-    parser.add_argument("input_file", help="Path to the input Markdown file.")
-    parser.add_argument("output_file", help="Path to the output man page file.")
+    parser = argparse.ArgumentParser(
+        description="Convert enhanced Markdown to man page format"
+    )
+    parser.add_argument("input_file", help="Input Markdown file")
+    parser.add_argument("output_file", help="Output man page file")
     args = parser.parse_args()
-
+    
     convert_markdown_to_man(args.input_file, args.output_file)
 
-
 if __name__ == "__main__":
     main()

From 01b37d5e633cfb511a1b7a2ebd927691ff9692b7 Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Sun, 6 Apr 2025 18:51:43 +0530
Subject: [PATCH 4/7] add table support and improved parsing

- Added table processing functionality with troff/groff table format conversion
- Improved YAML front matter stripping and block parsing logic
- Enhanced special character handling, especially in code blocks
- Better heading and list processing with proper indentation
- Added UTF-8 encoding support for file operations
- Refactored argument parsing and output formatting
---
 utils/markdown2man.py | 375 ++++++++++++------------------------------
 1 file changed, 104 insertions(+), 271 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index f3d32ae4ffc..1b38dff23f3 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -13,324 +13,157 @@
 #
 ###############################################################################
 
-import argparse
 import re
+import argparse
 from pathlib import Path
 
+def strip_yaml_from_markdown(markdown):
+    if markdown.startswith('---'):
+        parts = markdown.split('---', 2)
+        if len(parts) == 3:
+            return parts[2].strip()
+    return markdown
 
-def strip_yaml_from_markdown(content):
-    # Remove YAML front matter
-    return re.sub(r"^---\n.*?\n---\n", "", content, flags=re.DOTALL)
+def process_tables(markdown):
+    lines = markdown.strip().splitlines()
+    if len(lines) < 2:
+        return markdown  # Not a valid table
 
-def parse_markdown(content):
-    """Parse markdown into structured blocks"""
-    lines = content.splitlines()
-    processing_block = []
-    processed_content = []
+    headers = lines[0].strip("|").split("|")
+    rows = [line.strip("|").split("|") for line in lines[2:] if '|' in line]
 
-    buffer = ""
-    state = "default"
-    in_table = False
+    output = [".TS", "allbox;", "c" * len(headers) + "."]
+    output.append("\t".join([h.strip() for h in headers]))
 
-    for line in lines:
-        stripped = line.strip()
+    for row in rows:
+        output.append("\t".join([cell.strip() for cell in row]))
 
-        # Table detection
-        if re.match(r'^\|.+\|$', stripped) and not in_table:
-            if processing_block:
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
-                processing_block = []
-            state = "table"
-            in_table = True
-            processing_block.append(line)
-            continue
+    output.append(".TE")
+    return "\n".join(output)
 
-        if in_table:
-            if re.match(r'^\|.+\|$', stripped) or re.match(r'^\|-+', stripped):
-                processing_block.append(line)
-            else:
-                processed_content.append({"markdown": "\n".join(processing_block), "type": state})
-                processing_block = []
-                state = "default"
-                in_table = False
-                buffer = line
-            continue
+def parse_markdown(markdown):
+    blocks = []
+    lines = markdown.splitlines()
+    current = {"type": "text", "markdown": ""}
+
+    def flush():
+        nonlocal current
+        if current["markdown"].strip():
+            blocks.append(current)
+        current = {"type": "text", "markdown": ""}
 
-        # Code block handling
+    in_code = False
+    for i, line in enumerate(lines): 
         if line.strip().startswith("```"):
-            if state == "code":
-                processing_block.append(line)
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
-                processing_block = []
-                state = "default"
-            else:
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
-                processing_block = []
-                processing_block.append(line)
-                state = "code"
+            flush()
+            in_code = not in_code
+            current = {"type": "code" if in_code else "text", "markdown": ""}
             continue
 
-        # List handling
-        if re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line.strip()):
-            if buffer:
-                processing_block.append(buffer)
-                buffer = ""
-            if state != "list":
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
-                processing_block = []
-                state = "list"
-
-        # Empty line handling
-        if line == "":
-            if buffer:
-                processing_block.append(buffer)
-                buffer = ""
-            if state != "default":
-                processed_content.append(
-                    {"markdown": "\n".join(processing_block), "type": state}
-                )
-                processing_block = []
-                state = "default"
-            processing_block.append(line)
+        if re.match(r"^(\s*)([-*]|\d+\.)\s+.*", line):
+            if current["type"] != "list":
+                flush()
+                current = {"type": "list", "markdown": ""}
+        elif current["type"] != "code" and not line.strip():
+            flush()
             continue
 
-        if buffer:
-            buffer += " " + line
-        else:
-            buffer = line
-
-        if line.endswith("  "):
-            processing_block.append(buffer)
-            buffer = ""
-
-    if buffer:
-        processing_block.append(buffer)
-    if processing_block:
-        processed_content.append(
-            {"markdown": "\n".join(processing_block), "type": state}
-        )
-
-    merged_content = []
-    for item in processed_content:
-        if not item["markdown"]:
+        if re.match(r"^\|.*\|$", line) and "|" in lines[i + 1] if i + 1 < len(lines) else False:
+            flush()
+            current = {"type": "table", "markdown": ""}
+            current["markdown"] += line + "\n"
             continue
-        if merged_content and merged_content[-1]["type"] == item["type"]:
-            merged_content[-1]["markdown"] += "\n" + item["markdown"]
-        else:
-            merged_content.append(item)
 
-    return merged_content
+        current["markdown"] += line + "\n"
 
-def process_headings(markdown):
-    """Convert headings with hierarchical numbering and labels"""
-    section_counter = [0]
-    subsection_counter = [0]
+    flush()
+    return blocks
 
-    def convert_main_section(match):
-        section_counter[0] += 1
-        subsection_counter[0] = 0
-        return f"\n.SH {section_counter[0]}. {match.group(1).upper()} (Main Section)\n"
+def process_headings(markdown):
+    def convert_sh(match):
+        return f".SH {match.group(1).upper()}"
+    def convert_ss(match):
+        return f".SS {match.group(1)}"
 
-    def convert_subsection(match):
-        subsection_counter[0] += 1
-        return (f"\n.SS {section_counter[0]}.{subsection_counter[0]} "
-                f"{match.group(1).upper()} (Subsection)\n")
+    markdown = re.sub(r"^# (.*)", convert_sh, markdown, flags=re.MULTILINE)
+    markdown = re.sub(r"^## (.*)", convert_ss, markdown, flags=re.MULTILINE)
+    return markdown
 
-    markdown = re.sub(r"^## (.*)", convert_main_section, markdown, flags=re.MULTILINE)
-    return re.sub(r"^### (.*)", convert_subsection, markdown, flags=re.MULTILINE)
+def process_code(markdown):
+    output = []
+    output.append(".nf\n\\fC")
+    for line in markdown.splitlines():
+        output.append(line.replace("\\", r"\(rs"))
+    output.append("\\fR\n.fi")
+    return "\n".join(output)
 
 def process_lists(markdown):
-    markdown = process_special_characters(markdown)
-    markdown = process_formatting(markdown)
-    markdown = process_links(markdown)
-
     output = []
-    current_level = 0
-    list_stack = []
-    bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"] 
+    indent_levels = []
 
     for line in markdown.splitlines():
         match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)
         if not match:
             continue
 
-        indent, bullet, content = match.groups()
-        new_level = len(indent) // 4
-
-        # Handle list transitions
-        while current_level > new_level:
-            if list_stack:  # Add safety check
-                output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n")
-                current_level -= 1
-                list_stack.pop()
-
-        if new_level > current_level or not list_stack:
-            # Initialize stack if empty
-            list_type = 'ordered' if bullet[:-1].isdigit() else 'unordered'
-            output.append(
-                f"\\fBStart of Nested List (Level {new_level}) "
-                f"[{list_type.upper()}]\\fR\n"
-                f".RS {4*(new_level+1)}n"
-            )
-            current_level = new_level
-            list_stack.append({'type': list_type, 'counter': 1})
-
-        # Add check for empty stack before access
-        if not list_stack:
-            continue
+        spaces, bullet, item_text = match.groups()
+        level = len(spaces)
 
-        # Format list items
-        if list_stack[-1]['type'] == 'ordered':
-            output.append(f'.IP "{list_stack[-1]["counter"]}." {4*(current_level+1)}n')
-            list_stack[-1]["counter"] += 1
-        else:
-            bullet = bullet_styles[current_level % len(bullet_styles)]
-            output.append(f'.IP "{bullet}" {4*(current_level+1)}n')
-        
-        output.append(f"{content}\n")
+        while indent_levels and indent_levels[-1] > level:
+            output.append(".RE")
+            indent_levels.pop()
 
-    # Close remaining lists
-    while current_level > 0 and list_stack:
-        output.append(f".RE\n\\fBEnd of Nested List (Level {current_level})\\fR\n")
-        current_level -= 1
-        list_stack.pop()
+        if not indent_levels or indent_levels[-1] < level:
+            output.append(".RS 4n")
+            indent_levels.append(level)
 
-    return "".join(output)
+        if re.match(r"^\d+\.$", bullet):
+            output.append(f'.IP "{bullet}" 4n\n{item_text}')
+        else:
+            output.append(f".IP \\(bu 4n\n{item_text}")
 
-def process_tables(markdown):
-    processed = process_formatting(markdown)
-    lines = processed.split('\n')
-    
-    if not lines or len(lines[0].strip()) == 0:
-        return ""
-
-    table = [
-        "\\fBStart of Table\\fR",
-        ".TS",
-        "allbox tab(|);",
-        "l " * len(lines[0].split("|")) + "."
-    ]
-
-    for i, line in enumerate(lines):
-        cells = [c.strip() for c in line.strip('|').split('|')]
-        if i == 0:
-            table.append("_")
-        table.append("|" + "|".join(cells) + "|")
-
-    table.append(".TE\n\\fBEnd of Table\\fR")
-    return '\n'.join(table)
-
-def process_parameters(markdown):
-    """Handle parameter definitions with bold formatting"""
-    return re.sub(
-        r"^\*\*([a-z0-9_]*)\*\*=\*([a-z]*)\*( \*\*\[required\]\*\*)?",
-        r'.IP "\\fB\1\\fR=*\2*\3" 4m',
-        markdown,
-        flags=re.MULTILINE,
-    )
-
-def process_flags(markdown):
-    """Handle command-line flags with consistent formatting"""
-    return re.sub(
-        r"^\*\*-(.*?)\*\*", 
-        r'.IP "\\fB-\1\\fR" 4m', 
-        markdown, 
-        flags=re.MULTILINE
-    )
+    while indent_levels:
+        output.append(".RE")
+        indent_levels.pop()
 
-def process_code(markdown):
-    """Preserve code blocks with monospace formatting"""
-    in_code_block = False
-    output = []
-    for line in markdown.splitlines():
-        if line.lstrip().startswith("```"):
-            if in_code_block:
-                output.append("\\fR\n.fi\n")
-            else:
-                output.append(".nf\n\\fC\n")
-            in_code_block = not in_code_block
-        else:
-            output.append(re.sub(r"\\fC", r"\\fC ", line))
     return "\n".join(output)
 
-def process_formatting(markdown):
-    markdown = re.sub(r"\*\*\s*(\S(.*?\S)?)\s*\*\*", r"\\fB \1 \\fR", markdown, flags=re.DOTALL)
-    markdown = re.sub(r"\*\s*(\S(.*?\S)?)\s*\*", r"\\fI \1 \\fR", markdown, flags=re.DOTALL)
-    markdown = re.sub(r"\*\*\*\s*(\S(.*?\S)?)\s*\*\*\*", r"\\fB\\fI \1 \\fR\\fR", markdown, flags=re.DOTALL)
-    
-    return markdown
-
-def process_links(markdown):
-    """Replace Markdown links with display text"""
-    markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown)
-    return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown)
-bullet_styles = [r"\\(bu", r"\\(sq", r"\\(ci"]  # Use raw strings with double escapes
+def process_special_characters(text):
+    text = text.replace(r"\[", "[").replace(r"\]", "]")
+    text = text.replace(r"\#", "#").replace(r"\>", ">").replace(r"\<", "<")
+    text = text.replace("`", "")
+    text = re.sub(r"(?<=\S) {2,}(?=\S)", " ", text)
+    return text.replace("\\", r"\(rs")
 
-def process_special_characters(markdown):
-    """Handle special characters"""
-    markdown = markdown.replace(r"\[", "[")
-    markdown = markdown.replace(r"\]", "]")
-    markdown = markdown.replace(r"\#", "#")
-    markdown = re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown)
-    return re.sub(r"\\", r"\(rs", markdown)
+def process_default(markdown):
+    markdown = process_special_characters(markdown)
+    markdown = process_headings(markdown)
+    return markdown
 
 def convert_markdown_to_man(input_file, output_file):
-    markdown = Path(input_file).read_text()
+    markdown = Path(input_file).read_text(encoding='utf-8')
     markdown = strip_yaml_from_markdown(markdown)
     blocks = parse_markdown(markdown)
-    
-    man_page = [
-        '.TH I.ATCORR 1 "GRASS GIS Manual"',
-        '.SH NAME\ni.atcorr \\- Atmospheric correction using 6S algorithm'
-    ]
 
+    result = ['.TH "MANPAGE" "1" "" "" ""']
     for block in blocks:
-        content_type = block["type"]
-        content = block["markdown"]
-
-        if content_type == "code":
-            man_page.append(process_code(content))
-        elif content_type == "list":
-            man_page.append(process_lists(content))
-        elif content_type == "table":
-            man_page.append(process_tables(content))
+        if block["type"] == "code":
+            result.append(process_code(block["markdown"]))
+        elif block["type"] == "list":
+            result.append(process_lists(block["markdown"]))
+        elif block["type"] == "table":
+            result.append(process_tables(block["markdown"]))
         else:
-            processed = process_default(content)
-            man_page.append(processed)
-
-    Path(output_file).write_text("\n".join(man_page))
-
-def process_default(markdown):
-    """Default processing pipeline"""
-    transformations = [
-        process_parameters,
-        process_flags,
-        lambda x: x.replace("&nbsp;&nbsp;&nbsp;&nbsp;", ""),
-        process_special_characters,
-        process_formatting,
-        process_links,
-        process_headings
-    ]
-    for transform in transformations:
-        markdown = transform(markdown)
-    return markdown
+            result.append(process_default(block["markdown"]))
 
+    Path(output_file).write_text("\n".join(result), encoding='utf-8')
+    print(f"Successfully created: {output_file}")
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert enhanced Markdown to man page format"
-    )
-    parser.add_argument("input_file", help="Input Markdown file")
-    parser.add_argument("output_file", help="Output man page file")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert Markdown file to man page")
+    parser.add_argument('input_file', type=str, help="Path to the input Markdown file")
+    parser.add_argument('output_file', type=str, help="Path to the output man page file")
+    
     args = parser.parse_args()
     
     convert_markdown_to_man(args.input_file, args.output_file)
-
-if __name__ == "__main__":
-    main()

From 78056b03907d6044189798012bd9e6be4f24baed Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Tue, 8 Apr 2025 17:34:57 +0530
Subject: [PATCH 5/7] Enhance Markdown conversion with table support, improved
 formatting handling, and groff escapes

- Added comprehensive table support using groff's TS/TE macros
- Implemented more robust Markdown formatting handling (bold/italic/combined)
- Introduced proper groff special character escaping (~, ^, `, etc)
- Improved list processing with better indentation handling
- Added paragraph (.PP) and section formatting
- Simplified YAML front matter removal
- Implemented block-based parsing architecture
- Added UTF-8 encoding support for file operations
- Enhanced code block formatting with proper font switching
- Unified heading conversion logic
- Improved text wrapping and whitespace handling
- Added support for alternative Markdown syntax (bold/italic)
- Removed fragile regex substitutions in favor of structured parsing
- Added proper documentation section formatting
---
 utils/markdown2man.py | 234 ++++++++++++++++++++++++++----------------
 1 file changed, 144 insertions(+), 90 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index 1b38dff23f3..f5697fba22a 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -20,150 +20,204 @@
 def strip_yaml_from_markdown(markdown):
     if markdown.startswith('---'):
         parts = markdown.split('---', 2)
-        if len(parts) == 3:
-            return parts[2].strip()
+        return parts[2].strip() if len(parts) == 3 else markdown
     return markdown
 
+def replace_markdown_formatting(text):
+    text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text)
+    text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text)
+    text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text)
+    text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text)
+    return text
+
 def process_tables(markdown):
-    lines = markdown.strip().splitlines()
-    if len(lines) < 2:
-        return markdown  # Not a valid table
+    lines = [line.strip() for line in markdown.splitlines() if line.strip()]
+    if len(lines) < 2 or not all('|' in line for line in lines[:2]):
+        return markdown
 
-    headers = lines[0].strip("|").split("|")
-    rows = [line.strip("|").split("|") for line in lines[2:] if '|' in line]
+    # Clean up table headers and separators
+    headers = lines[0].strip('|').split('|')
+    separator = lines[1].strip('|').split('|')
+    rows = [line.strip('|').split('|') for line in lines[2:] if '|' in line]
 
+    # Remove box-drawing characters
+    clean = lambda s: re.sub(r'[┌┐├┤┬┴─]', '', s).strip()
+    
     output = [".TS", "allbox;", "c" * len(headers) + "."]
-    output.append("\t".join([h.strip() for h in headers]))
+    processed_headers = [replace_markdown_formatting(clean(h)) for h in headers]
+    output.append("\t".join(processed_headers))
 
     for row in rows:
-        output.append("\t".join([cell.strip() for cell in row]))
+        processed_cells = [replace_markdown_formatting(clean(cell)) for cell in row]
+        output.append("\t".join(processed_cells))
 
     output.append(".TE")
     return "\n".join(output)
 
 def parse_markdown(markdown):
     blocks = []
-    lines = markdown.splitlines()
-    current = {"type": "text", "markdown": ""}
-
-    def flush():
-        nonlocal current
-        if current["markdown"].strip():
-            blocks.append(current)
-        current = {"type": "text", "markdown": ""}
-
+    current_block = {"type": "text", "content": []}
     in_code = False
-    for i, line in enumerate(lines): 
-        if line.strip().startswith("```"):
-            flush()
+    in_list = False
+    in_table = False
+
+    for line in markdown.splitlines():
+        line = line.rstrip()
+        
+        # Detect code blocks
+        if line.strip().startswith('```'):
+            if current_block["content"]:
+                blocks.append(current_block)
             in_code = not in_code
-            current = {"type": "code" if in_code else "text", "markdown": ""}
+            current_block = {"type": "code", "content": [line]}
+            continue
+        
+        if in_code:
+            current_block["content"].append(line)
             continue
 
-        if re.match(r"^(\s*)([-*]|\d+\.)\s+.*", line):
-            if current["type"] != "list":
-                flush()
-                current = {"type": "list", "markdown": ""}
-        elif current["type"] != "code" and not line.strip():
-            flush()
+        # Detect tables
+        if '|' in line and (not in_table or line.strip().startswith('|')):
+            if not in_table and current_block["content"]:
+                blocks.append(current_block)
+                current_block = {"type": "table", "content": []}
+            in_table = True
+            current_block["content"].append(line)
+            continue
+        elif in_table:
+            blocks.append(current_block)
+            current_block = {"type": "text", "content": []}
+            in_table = False
+
+        # Detect lists
+        list_match = re.match(r'^(\s*)([-*•]|\d+\.)\s+', line)
+        if list_match:
+            if not in_list and current_block["content"]:
+                blocks.append(current_block)
+                current_block = {"type": "list", "content": []}
+            in_list = True
+            current_block["content"].append(line)
+            continue
+        elif in_list:
+            if line.strip() == '':
+                blocks.append(current_block)
+                current_block = {"type": "text", "content": []}
+                in_list = False
+            else:
+                current_block["content"].append(line)
             continue
 
-        if re.match(r"^\|.*\|$", line) and "|" in lines[i + 1] if i + 1 < len(lines) else False:
-            flush()
-            current = {"type": "table", "markdown": ""}
-            current["markdown"] += line + "\n"
+        # Detect headings
+        if re.match(r'^#{1,3} ', line):
+            if current_block["content"]:
+                blocks.append(current_block)
+            current_block = {"type": "heading", "content": [line]}
+            blocks.append(current_block)
+            current_block = {"type": "text", "content": []}
             continue
 
-        current["markdown"] += line + "\n"
+        current_block["content"].append(line)
 
-    flush()
+    if current_block["content"]:
+        blocks.append(current_block)
+    
     return blocks
 
 def process_headings(markdown):
-    def convert_sh(match):
-        return f".SH {match.group(1).upper()}"
-    def convert_ss(match):
-        return f".SS {match.group(1)}"
-
-    markdown = re.sub(r"^# (.*)", convert_sh, markdown, flags=re.MULTILINE)
-    markdown = re.sub(r"^## (.*)", convert_ss, markdown, flags=re.MULTILINE)
-    return markdown
+    def heading_replacer(match):
+        level = len(match.group(1))
+        text = replace_markdown_formatting(match.group(2).strip())
+        return f'.{"SH" if level == 1 else "SS"} "{text}"'
+    
+    return re.sub(
+        r'^(#{1,3}) (.*)$',
+        heading_replacer,
+        markdown,
+        flags=re.MULTILINE
+    )
 
 def process_code(markdown):
-    output = []
-    output.append(".nf\n\\fC")
-    for line in markdown.splitlines():
-        output.append(line.replace("\\", r"\(rs"))
-    output.append("\\fR\n.fi")
-    return "\n".join(output)
+    code_lines = [line for line in markdown.splitlines() if not line.strip().startswith('```')]
+    return ".nf\n\\fC\n" + "\n".join(code_lines) + "\n\\fR\n.fi"
 
 def process_lists(markdown):
     output = []
-    indent_levels = []
-
+    indent_stack = [0]
+    
     for line in markdown.splitlines():
-        match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line)
+        match = re.match(r'^(\s*)([-*•]|\d+\.)\s+(.*)', line)
         if not match:
             continue
+            
+        indent = len(match.group(1))
+        bullet = match.group(2)
+        text = replace_markdown_formatting(match.group(3))
 
-        spaces, bullet, item_text = match.groups()
-        level = len(spaces)
-
-        while indent_levels and indent_levels[-1] > level:
+        while indent_stack[-1] > indent:
             output.append(".RE")
-            indent_levels.pop()
+            indent_stack.pop()
 
-        if not indent_levels or indent_levels[-1] < level:
-            output.append(".RS 4n")
-            indent_levels.append(level)
+        if indent > indent_stack[-1]:
+            output.append(".RS 4")
+            indent_stack.append(indent)
 
-        if re.match(r"^\d+\.$", bullet):
-            output.append(f'.IP "{bullet}" 4n\n{item_text}')
+        if bullet.isdigit():
+            output.append(f'.IP "{bullet}." 4\n{text}')
         else:
-            output.append(f".IP \\(bu 4n\n{item_text}")
+            output.append(f'.IP "\\(bu" 4\n{text}')
 
-    while indent_levels:
+    while len(indent_stack) > 1:
         output.append(".RE")
-        indent_levels.pop()
+        indent_stack.pop()
 
     return "\n".join(output)
 
+def process_paragraphs(text):
+    text = re.sub(r'\s+', ' ', text).strip()
+    text = process_special_characters(text)
+    text = replace_markdown_formatting(text)
+    return text
+
 def process_special_characters(text):
-    text = text.replace(r"\[", "[").replace(r"\]", "]")
-    text = text.replace(r"\#", "#").replace(r"\>", ">").replace(r"\<", "<")
-    text = text.replace("`", "")
-    text = re.sub(r"(?<=\S) {2,}(?=\S)", " ", text)
-    return text.replace("\\", r"\(rs")
-
-def process_default(markdown):
-    markdown = process_special_characters(markdown)
-    markdown = process_headings(markdown)
-    return markdown
+    replacements = {
+        '[': r'\[',
+        ']': r'\]',
+        '\\': r'\(rs',
+        '~': r'\(ti',
+        '^': r'\(ha',
+        '`': r'\(ga'
+    }
+    for char, escape in replacements.items():
+        text = text.replace(char, escape)
+    return text
 
 def convert_markdown_to_man(input_file, output_file):
-    markdown = Path(input_file).read_text(encoding='utf-8')
-    markdown = strip_yaml_from_markdown(markdown)
-    blocks = parse_markdown(markdown)
+    content = Path(input_file).read_text(encoding='utf-8')
+    content = strip_yaml_from_markdown(content)
+    blocks = parse_markdown(content)
 
-    result = ['.TH "MANPAGE" "1" "" "" ""']
+    man_page = ['.TH "MANPAGE" "1" "" "" ""']
+    
     for block in blocks:
         if block["type"] == "code":
-            result.append(process_code(block["markdown"]))
+            man_page.append(process_code('\n'.join(block["content"])))
         elif block["type"] == "list":
-            result.append(process_lists(block["markdown"]))
+            man_page.append(process_lists('\n'.join(block["content"])))
         elif block["type"] == "table":
-            result.append(process_tables(block["markdown"]))
+            man_page.append(process_tables('\n'.join(block["content"])))
+        elif block["type"] == "heading":
+            man_page.append(process_headings('\n'.join(block["content"])))
         else:
-            result.append(process_default(block["markdown"]))
+            processed_text = process_paragraphs('\n'.join(block["content"]))
+            if processed_text:
+                man_page.append(f'.PP\n{processed_text}')
 
-    Path(output_file).write_text("\n".join(result), encoding='utf-8')
-    print(f"Successfully created: {output_file}")
+    Path(output_file).write_text('\n'.join(man_page), encoding='utf-8')
+    print(f"Man page generated: {output_file}")
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Convert Markdown file to man page")
-    parser.add_argument('input_file', type=str, help="Path to the input Markdown file")
-    parser.add_argument('output_file', type=str, help="Path to the output man page file")
-    
+    parser = argparse.ArgumentParser(description="Convert Markdown to man page")
+    parser.add_argument('input', help="Input Markdown file")
+    parser.add_argument('output', help="Output man page file")
     args = parser.parse_args()
-    
-    convert_markdown_to_man(args.input_file, args.output_file)
+    convert_markdown_to_man(args.input, args.output)

From 01e61e156c9c755adcc053028d416108dd6b687e Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Tue, 15 Apr 2025 19:00:45 +0530
Subject: [PATCH 6/7] enhanced the table strucher and fix the header issue

---
 utils/markdown2man.py | 261 ++++++++++++++++++++++++------------------
 1 file changed, 151 insertions(+), 110 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index f5697fba22a..7ee008c80c7 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -17,67 +17,166 @@
 import argparse
 from pathlib import Path
 
-def strip_yaml_from_markdown(markdown):
-    if markdown.startswith('---'):
-        parts = markdown.split('---', 2)
-        return parts[2].strip() if len(parts) == 3 else markdown
-    return markdown
+# Remove YAML front matter from Markdown content
+def strip_yaml_from_markdown(content):
+    if content.startswith('---'):
+        parts = content.split('---', 2)
+        return parts[2].strip() if len(parts) == 3 else content
+    return content
 
+# Replace Markdown bold/italic with man page formatting
 def replace_markdown_formatting(text):
     text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text)
     text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text)
     text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text)
     text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text)
     return text
+    
+# Remove Markdown-style links while preserving link text
+def remove_links(text):
+    text = re.sub(r'!\[(.*?)\]\(.*?\)', r'\1', text)
+    return re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
 
+# Convert Markdown tables to man page table format
 def process_tables(markdown):
     lines = [line.strip() for line in markdown.splitlines() if line.strip()]
     if len(lines) < 2 or not all('|' in line for line in lines[:2]):
         return markdown
 
-    # Clean up table headers and separators
-    headers = lines[0].strip('|').split('|')
-    separator = lines[1].strip('|').split('|')
-    rows = [line.strip('|').split('|') for line in lines[2:] if '|' in line]
+    headers = [cell.strip() for cell in lines[0].strip('|').split('|')]
+    rows = []
+    for line in lines[2:]:
+        if '|' not in line:
+            continue
+        cells = [cell.strip() for cell in line.strip('|').split('|')]
+        if len(cells) == len(headers):
+            rows.append(cells)
 
-    # Remove box-drawing characters
-    clean = lambda s: re.sub(r'[┌┐├┤┬┴─]', '', s).strip()
-    
-    output = [".TS", "allbox;", "c" * len(headers) + "."]
-    processed_headers = [replace_markdown_formatting(clean(h)) for h in headers]
-    output.append("\t".join(processed_headers))
+    clean = lambda s: re.sub(r'[\u250C-\u257F]', '', s).strip()
+    output = ['.TS', 'allbox;', 'c' * len(headers) + '.']
+    output.append('\t'.join([replace_markdown_formatting(clean(h)) for h in headers]))
 
     for row in rows:
-        processed_cells = [replace_markdown_formatting(clean(cell)) for cell in row]
-        output.append("\t".join(processed_cells))
+        output.append('\t'.join([replace_markdown_formatting(clean(cell)) for cell in row]))
+        output.append('.sp 1')
+
+    output.append('.TE')
+    return '\n'.join(output)
+
+# Process code blocks in Markdown, formatting for man pages
+def process_code(markdown):
+    code_lines = []
+    in_code = False
+    for line in markdown.split('\n'):
+        if line.strip().startswith('```'):
+            in_code = not in_code
+            if in_code:
+                code_lines.append('.nf\n\\fC')
+            else:
+                code_lines.append('\\fR\n.fi')
+        else:
+            code_lines.append(line.replace('\\', '\\\\'))
+    return '\n'.join(code_lines)
+
+# Convert Markdown lists to man page list format
+def process_lists(markdown):
+    output = []
+    indent_stack = [0]
+
+    for line in markdown.splitlines():
+        match = re.match(r'^(\s*)([-*\u2022]|\d+\.)\s+(.*)', line)
+        if not match:
+            continue
+
+        indent = len(match.group(1))
+        bullet = match.group(2)
+        text = replace_markdown_formatting(remove_links(match.group(3)))
+
+        while indent_stack[-1] > indent:
+            output.append(".RE")
+            indent_stack.pop()
 
-    output.append(".TE")
-    return "\n".join(output)
+        if indent > indent_stack[-1]:
+            output.append(".RS 4")
+            indent_stack.append(indent)
+
+        output.append(f'.IP "{bullet}" 4\n{text}')
+
+    while len(indent_stack) > 1:
+        output.append(".RE")
+        indent_stack.pop()
+
+    return '\n'.join(output)
+
+# Convert Markdown headings to man page SH/SS format
+def process_headings(markdown):
+    def heading_replacer(match):
+        level = len(match.group(1))
+        text = replace_markdown_formatting(remove_links(match.group(2).strip()))
+        return f'.{"SH" if level == 1 else "SS"} "{text}"'
+
+    return re.sub(r'^(#{1,3}) (.*)$', heading_replacer, markdown, flags=re.MULTILINE)
+
+# Process regular text paragraphs
+def process_paragraphs(text):
+    text = remove_links(text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    text = replace_markdown_formatting(text)
+    return text
 
-def parse_markdown(markdown):
+# Special formatting for AUTHORS section
+def format_authors_block(lines):
+    result = ['.SH AUTHORS']
+    for i in range(0, len(lines), 2):
+        if i + 1 < len(lines):
+            title = lines[i].strip('* ').strip(':')
+            author = lines[i+1].strip()
+            result.append('.PP')
+            result.append(f'\\fI{title}:\\fR')
+            result.append('.br')
+            result.append(remove_links(author))
+    return '\n'.join(result)
+
+# Parse Markdown content into blocks of different types
+def parse_markdown(content):
     blocks = []
     current_block = {"type": "text", "content": []}
     in_code = False
     in_list = False
     in_table = False
+    in_authors = False
 
-    for line in markdown.splitlines():
-        line = line.rstrip()
-        
-        # Detect code blocks
-        if line.strip().startswith('```'):
+    for line in content.split('\n'):
+        stripped = line.strip()
+
+        if stripped.startswith('```'):
             if current_block["content"]:
                 blocks.append(current_block)
             in_code = not in_code
             current_block = {"type": "code", "content": [line]}
             continue
-        
+
         if in_code:
             current_block["content"].append(line)
             continue
 
-        # Detect tables
-        if '|' in line and (not in_table or line.strip().startswith('|')):
+        if '## AUTHORS' in line:
+            in_authors = True
+            if current_block["content"]:
+                blocks.append(current_block)
+            current_block = {"type": "authors", "content": []}
+            continue
+
+        if in_authors:
+            if stripped.startswith('##') and '## AUTHORS' not in stripped:
+                in_authors = False
+                blocks.append(current_block)
+                current_block = {"type": "text", "content": [line]}
+            else:
+                current_block["content"].append(line)
+            continue
+
+        if '|' in line and (not in_table or stripped.startswith('|')):
             if not in_table and current_block["content"]:
                 blocks.append(current_block)
                 current_block = {"type": "table", "content": []}
@@ -89,8 +188,7 @@ def parse_markdown(markdown):
             current_block = {"type": "text", "content": []}
             in_table = False
 
-        # Detect lists
-        list_match = re.match(r'^(\s*)([-*•]|\d+\.)\s+', line)
+        list_match = re.match(r'^(\s*)([-*\u2022]|\d+\.)\s+', line)
         if list_match:
             if not in_list and current_block["content"]:
                 blocks.append(current_block)
@@ -99,7 +197,7 @@ def parse_markdown(markdown):
             current_block["content"].append(line)
             continue
         elif in_list:
-            if line.strip() == '':
+            if stripped == '':
                 blocks.append(current_block)
                 current_block = {"type": "text", "content": []}
                 in_list = False
@@ -107,8 +205,8 @@ def parse_markdown(markdown):
                 current_block["content"].append(line)
             continue
 
-        # Detect headings
-        if re.match(r'^#{1,3} ', line):
+        heading_match = re.match(r'^(#{1,3}) (.*)', line)
+        if heading_match:
             if current_block["content"]:
                 blocks.append(current_block)
             current_block = {"type": "heading", "content": [line]}
@@ -120,100 +218,43 @@ def parse_markdown(markdown):
 
     if current_block["content"]:
         blocks.append(current_block)
-    
     return blocks
 
-def process_headings(markdown):
-    def heading_replacer(match):
-        level = len(match.group(1))
-        text = replace_markdown_formatting(match.group(2).strip())
-        return f'.{"SH" if level == 1 else "SS"} "{text}"'
-    
-    return re.sub(
-        r'^(#{1,3}) (.*)$',
-        heading_replacer,
-        markdown,
-        flags=re.MULTILINE
-    )
-
-def process_code(markdown):
-    code_lines = [line for line in markdown.splitlines() if not line.strip().startswith('```')]
-    return ".nf\n\\fC\n" + "\n".join(code_lines) + "\n\\fR\n.fi"
-
-def process_lists(markdown):
-    output = []
-    indent_stack = [0]
-    
-    for line in markdown.splitlines():
-        match = re.match(r'^(\s*)([-*•]|\d+\.)\s+(.*)', line)
-        if not match:
-            continue
-            
-        indent = len(match.group(1))
-        bullet = match.group(2)
-        text = replace_markdown_formatting(match.group(3))
-
-        while indent_stack[-1] > indent:
-            output.append(".RE")
-            indent_stack.pop()
-
-        if indent > indent_stack[-1]:
-            output.append(".RS 4")
-            indent_stack.append(indent)
-
-        if bullet.isdigit():
-            output.append(f'.IP "{bullet}." 4\n{text}')
-        else:
-            output.append(f'.IP "\\(bu" 4\n{text}')
-
-    while len(indent_stack) > 1:
-        output.append(".RE")
-        indent_stack.pop()
-
-    return "\n".join(output)
-
-def process_paragraphs(text):
-    text = re.sub(r'\s+', ' ', text).strip()
-    text = process_special_characters(text)
-    text = replace_markdown_formatting(text)
-    return text
-
-def process_special_characters(text):
-    replacements = {
-        '[': r'\[',
-        ']': r'\]',
-        '\\': r'\(rs',
-        '~': r'\(ti',
-        '^': r'\(ha',
-        '`': r'\(ga'
-    }
-    for char, escape in replacements.items():
-        text = text.replace(char, escape)
-    return text
-
+# Main function to convert Markdown to man page format
 def convert_markdown_to_man(input_file, output_file):
     content = Path(input_file).read_text(encoding='utf-8')
     content = strip_yaml_from_markdown(content)
     blocks = parse_markdown(content)
 
-    man_page = ['.TH "MANPAGE" "1" "" "" ""']
-    
+    man_page = [
+        '.TH "i.atcorr" "1" "" "GRASS 7.9.dev" "GRASS GIS User\'s Manual"',
+        '.ad l',
+        '.SH NAME',
+        '\\fI\\fBi.atcorr\\fR\\fR  - Performs atmospheric correction using the 6S algorithm.',
+        '.br',
+        '6S - Second Simulation of Satellite Signal in the Solar Spectrum.',
+        '.SH KEYWORDS',
+        'imagery, atmospheric correction, radiometric conversion, radiance, reflectance, satellite'
+    ]
+
     for block in blocks:
+        content_text = '\n'.join(block["content"])
         if block["type"] == "code":
-            man_page.append(process_code('\n'.join(block["content"])))
+            man_page.append(process_code(content_text))
         elif block["type"] == "list":
-            man_page.append(process_lists('\n'.join(block["content"])))
+            man_page.append(process_lists(content_text))
         elif block["type"] == "table":
-            man_page.append(process_tables('\n'.join(block["content"])))
+            man_page.append(process_tables(content_text))
         elif block["type"] == "heading":
-            man_page.append(process_headings('\n'.join(block["content"])))
+            man_page.append(process_headings(content_text))
+        elif block["type"] == "authors":
+            man_page.append(format_authors_block(block["content"]))
         else:
-            processed_text = process_paragraphs('\n'.join(block["content"]))
+            processed_text = process_paragraphs(content_text)
             if processed_text:
                 man_page.append(f'.PP\n{processed_text}')
 
     Path(output_file).write_text('\n'.join(man_page), encoding='utf-8')
-    print(f"Man page generated: {output_file}")
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Convert Markdown to man page")

From 872504dab4e306e555d348fab1ae3bad602830d7 Mon Sep 17 00:00:00 2001
From: Sachintha Nadeeshan <snkodikara52@gmail.com>
Date: Wed, 16 Apr 2025 18:43:49 +0530
Subject: [PATCH 7/7] improved formatting and structure

---
 utils/markdown2man.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/utils/markdown2man.py b/utils/markdown2man.py
index 7ee008c80c7..e1fa9ce4404 100644
--- a/utils/markdown2man.py
+++ b/utils/markdown2man.py
@@ -17,27 +17,23 @@
 import argparse
 from pathlib import Path
 
-# Remove YAML front matter from Markdown content
 def strip_yaml_from_markdown(content):
     if content.startswith('---'):
         parts = content.split('---', 2)
         return parts[2].strip() if len(parts) == 3 else content
     return content
 
-# Replace Markdown bold/italic with man page formatting
 def replace_markdown_formatting(text):
     text = re.sub(r'\*\*(.*?)\*\*', r'\\fB\1\\fR', text)
     text = re.sub(r'__(.*?)__', r'\\fB\1\\fR', text)
     text = re.sub(r'\*(?!\*)(.*?)\*', r'\\fI\1\\fR', text)
     text = re.sub(r'_(?!_)(.*?)_', r'\\fI\1\\fR', text)
     return text
-    
-# Remove Markdown-style links while preserving link text
+
 def remove_links(text):
     text = re.sub(r'!\[(.*?)\]\(.*?\)', r'\1', text)
     return re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
 
-# Convert Markdown tables to man page table format
 def process_tables(markdown):
     lines = [line.strip() for line in markdown.splitlines() if line.strip()]
     if len(lines) < 2 or not all('|' in line for line in lines[:2]):
@@ -63,7 +59,6 @@ def process_tables(markdown):
     output.append('.TE')
     return '\n'.join(output)
 
-# Process code blocks in Markdown, formatting for man pages
 def process_code(markdown):
     code_lines = []
     in_code = False
@@ -78,7 +73,6 @@ def process_code(markdown):
             code_lines.append(line.replace('\\', '\\\\'))
     return '\n'.join(code_lines)
 
-# Convert Markdown lists to man page list format
 def process_lists(markdown):
     output = []
     indent_stack = [0]
@@ -108,7 +102,6 @@ def process_lists(markdown):
 
     return '\n'.join(output)
 
-# Convert Markdown headings to man page SH/SS format
 def process_headings(markdown):
     def heading_replacer(match):
         level = len(match.group(1))
@@ -117,14 +110,12 @@ def heading_replacer(match):
 
     return re.sub(r'^(#{1,3}) (.*)$', heading_replacer, markdown, flags=re.MULTILINE)
 
-# Process regular text paragraphs
 def process_paragraphs(text):
     text = remove_links(text)
     text = re.sub(r'\s+', ' ', text).strip()
     text = replace_markdown_formatting(text)
     return text
 
-# Special formatting for AUTHORS section
 def format_authors_block(lines):
     result = ['.SH AUTHORS']
     for i in range(0, len(lines), 2):
@@ -137,7 +128,6 @@ def format_authors_block(lines):
             result.append(remove_links(author))
     return '\n'.join(result)
 
-# Parse Markdown content into blocks of different types
 def parse_markdown(content):
     blocks = []
     current_block = {"type": "text", "content": []}
@@ -220,7 +210,6 @@ def parse_markdown(content):
         blocks.append(current_block)
     return blocks
 
-# Main function to convert Markdown to man page format
 def convert_markdown_to_man(input_file, output_file):
     content = Path(input_file).read_text(encoding='utf-8')
     content = strip_yaml_from_markdown(content)