|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Generate by-month.html listing all tools grouped by creation month.""" |
| 3 | + |
| 4 | +from __future__ import annotations |
| 5 | + |
| 6 | +import json |
| 7 | +from collections import defaultdict |
| 8 | +from datetime import datetime |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | + |
| 12 | +GATHERED_LINKS_PATH = Path("gathered_links.json") |
| 13 | +OUTPUT_PATH = Path("by-month.html") |
| 14 | + |
| 15 | + |
| 16 | +def _parse_iso_datetime(value: str | None) -> datetime | None: |
| 17 | + if not value: |
| 18 | + return None |
| 19 | + try: |
| 20 | + cleaned = value.replace("Z", "+00:00") |
| 21 | + return datetime.fromisoformat(cleaned) |
| 22 | + except ValueError: |
| 23 | + return None |
| 24 | + |
| 25 | + |
| 26 | +def _get_first_n_words(text: str, n: int = 15) -> str: |
| 27 | + """Extract the first n words from text.""" |
| 28 | + words = text.split() |
| 29 | + if len(words) <= n: |
| 30 | + return text |
| 31 | + return " ".join(words[:n]) + "..." |
| 32 | + |
| 33 | + |
| 34 | +def _extract_summary(docs_path: Path, word_limit: int = 30) -> str: |
| 35 | + """Extract the first paragraph of the docs file, limited to word_limit words.""" |
| 36 | + if not docs_path.exists(): |
| 37 | + return "" |
| 38 | + |
| 39 | + try: |
| 40 | + content = docs_path.read_text("utf-8").strip() |
| 41 | + except OSError: |
| 42 | + return "" |
| 43 | + |
| 44 | + # Remove HTML comments |
| 45 | + if "<!--" in content: |
| 46 | + content = content.split("<!--", 1)[0] |
| 47 | + |
| 48 | + # Get first paragraph (skip headings) |
| 49 | + lines = [] |
| 50 | + for line in content.splitlines(): |
| 51 | + stripped = line.strip() |
| 52 | + if not stripped: |
| 53 | + if lines: |
| 54 | + break |
| 55 | + continue |
| 56 | + # Skip markdown headings |
| 57 | + if stripped.startswith("#"): |
| 58 | + continue |
| 59 | + lines.append(stripped) |
| 60 | + |
| 61 | + paragraph = " ".join(lines) |
| 62 | + return _get_first_n_words(paragraph, word_limit) |
| 63 | + |
| 64 | + |
| 65 | +def _load_gathered_links() -> dict: |
| 66 | + if not GATHERED_LINKS_PATH.exists(): |
| 67 | + return {} |
| 68 | + with GATHERED_LINKS_PATH.open("r", encoding="utf-8") as fp: |
| 69 | + return json.load(fp) |
| 70 | + |
| 71 | + |
| 72 | +def build_by_month() -> None: |
| 73 | + data = _load_gathered_links() |
| 74 | + pages = data.get("pages", {}) |
| 75 | + |
| 76 | + if not pages: |
| 77 | + print("No pages found in gathered_links.json") |
| 78 | + return |
| 79 | + |
| 80 | + # Group tools by month of creation |
| 81 | + tools_by_month: dict[str, list[dict]] = defaultdict(list) |
| 82 | + |
| 83 | + for page_name, page_data in pages.items(): |
| 84 | + commits = page_data.get("commits", []) |
| 85 | + if not commits: |
| 86 | + continue |
| 87 | + |
| 88 | + # Get the oldest commit (creation date) - commits are newest first |
| 89 | + oldest_commit = commits[-1] |
| 90 | + created_date = _parse_iso_datetime(oldest_commit.get("date")) |
| 91 | + |
| 92 | + if created_date is None: |
| 93 | + continue |
| 94 | + |
| 95 | + # Format month key for sorting (YYYY-MM) and display |
| 96 | + month_key = created_date.strftime("%Y-%m") |
| 97 | + |
| 98 | + # Get the docs summary |
| 99 | + slug = page_name.replace(".html", "") |
| 100 | + docs_path = Path(f"{slug}.docs.md") |
| 101 | + summary = _extract_summary(docs_path) |
| 102 | + |
| 103 | + tools_by_month[month_key].append({ |
| 104 | + "filename": page_name, |
| 105 | + "slug": slug, |
| 106 | + "created": created_date, |
| 107 | + "summary": summary, |
| 108 | + }) |
| 109 | + |
| 110 | + # Sort months in reverse chronological order |
| 111 | + sorted_months = sorted(tools_by_month.keys(), reverse=True) |
| 112 | + |
| 113 | + # Sort tools within each month by creation date (newest first) |
| 114 | + for month_key in sorted_months: |
| 115 | + tools_by_month[month_key].sort(key=lambda t: t["created"], reverse=True) |
| 116 | + |
| 117 | + # Count total tools |
| 118 | + tool_count = sum(len(tools) for tools in tools_by_month.values()) |
| 119 | + |
| 120 | + # Build HTML |
| 121 | + html_content = """<!DOCTYPE html> |
| 122 | +<html lang="en"> |
| 123 | +<head> |
| 124 | + <meta charset="UTF-8"> |
| 125 | + <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 126 | + <title>Tools by month - tools.simonwillison.net</title> |
| 127 | + <style> |
| 128 | + body { |
| 129 | + font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; |
| 130 | + line-height: 1.5; |
| 131 | + max-width: 800px; |
| 132 | + margin: 0 auto; |
| 133 | + padding: 1rem; |
| 134 | + color: #1a1a1a; |
| 135 | + } |
| 136 | + h1 { |
| 137 | + border-bottom: 2px solid #f0f0f0; |
| 138 | + padding-bottom: 0.5rem; |
| 139 | + margin-top: 2rem; |
| 140 | + } |
| 141 | + h2 { |
| 142 | + margin-top: 2rem; |
| 143 | + font-size: 1.4rem; |
| 144 | + border-bottom: 1px solid #f0f0f0; |
| 145 | + padding-bottom: 0.3rem; |
| 146 | + } |
| 147 | + a { |
| 148 | + color: #0066cc; |
| 149 | + text-decoration: none; |
| 150 | + } |
| 151 | + a:hover { |
| 152 | + text-decoration: underline; |
| 153 | + } |
| 154 | + a.hashref:link, |
| 155 | + a.hashref:visited, |
| 156 | + a.hashref:hover, |
| 157 | + a.hashref:focus, |
| 158 | + a.hashref:active { |
| 159 | + color: #666; |
| 160 | + margin-right: 0.3rem; |
| 161 | + } |
| 162 | + .tool-list { |
| 163 | + list-style: none; |
| 164 | + margin: 0; |
| 165 | + padding: 0; |
| 166 | + } |
| 167 | + .tool-item { |
| 168 | + margin-bottom: 1rem; |
| 169 | + padding-bottom: 0.75rem; |
| 170 | + border-bottom: 1px solid #f8f8f8; |
| 171 | + } |
| 172 | + .tool-item:last-child { |
| 173 | + border-bottom: none; |
| 174 | + } |
| 175 | + .tool-name { |
| 176 | + font-weight: 600; |
| 177 | + } |
| 178 | + .tool-links { |
| 179 | + font-size: 0.9rem; |
| 180 | + color: #666; |
| 181 | + } |
| 182 | + .tool-summary { |
| 183 | + margin-top: 0.25rem; |
| 184 | + color: #444; |
| 185 | + font-size: 0.95rem; |
| 186 | + } |
| 187 | + .back-link { |
| 188 | + margin-bottom: 1rem; |
| 189 | + } |
| 190 | + </style> |
| 191 | +</head> |
| 192 | +<body> |
| 193 | + <p class="back-link"><a href="/">← Back to tools.simonwillison.net</a></p> |
| 194 | + <h1>Tools by month</h1> |
| 195 | +""" |
| 196 | + |
| 197 | + html_content += f" <p>{tool_count} tools, grouped by the month they were created.</p>\n" |
| 198 | + |
| 199 | + for month_key in sorted_months: |
| 200 | + tools = tools_by_month[month_key] |
| 201 | + # Format month display (e.g., "November 2024") |
| 202 | + month_date = datetime.strptime(month_key, "%Y-%m") |
| 203 | + month_display = month_date.strftime("%B %Y") |
| 204 | + tool_word = "tool" if len(tools) == 1 else "tools" |
| 205 | + |
| 206 | + html_content += f'\n <h2 id="{month_key}"><a class="hashref" href="#{month_key}">#</a>{month_display} ({len(tools)} {tool_word})</h2>\n' |
| 207 | + html_content += ' <ul class="tool-list">\n' |
| 208 | + |
| 209 | + for tool in tools: |
| 210 | + slug = tool["slug"] |
| 211 | + filename = tool["filename"] |
| 212 | + summary = tool["summary"] |
| 213 | + tool_url = f"https://tools.simonwillison.net/{slug}" |
| 214 | + colophon_url = f"https://tools.simonwillison.net/colophon#{filename}" |
| 215 | + |
| 216 | + html_content += f' <li class="tool-item">\n' |
| 217 | + html_content += f' <span class="tool-name"><a href="{tool_url}">{slug}</a></span>\n' |
| 218 | + html_content += f' <span class="tool-links">(<a href="{colophon_url}">colophon</a>)</span>\n' |
| 219 | + if summary: |
| 220 | + html_content += f' <div class="tool-summary">{summary}</div>\n' |
| 221 | + html_content += ' </li>\n' |
| 222 | + |
| 223 | + html_content += ' </ul>\n' |
| 224 | + |
| 225 | + html_content += """</body> |
| 226 | +</html> |
| 227 | +""" |
| 228 | + |
| 229 | + OUTPUT_PATH.write_text(html_content, "utf-8") |
| 230 | + print(f"by-month.html created successfully ({tool_count} tools)") |
| 231 | + |
| 232 | + |
| 233 | +if __name__ == "__main__": |
| 234 | + build_by_month() |
0 commit comments