diff --git a/README.md b/README.md index e4957497a3d..214fb7c4c47 100644 --- a/README.md +++ b/README.md @@ -134,9 +134,9 @@ Please assign any pull request (PR) against an issue; this helps the docs team t Check out the GitHub docs for a refresher on [how to create a pull request](https://docs.github.com/en/desktop/working-with-your-remote-repository-on-github-or-github-enterprise/creating-an-issue-or-pull-request-from-github-desktop). -### Style guidelines +### Style and contribution guidelines -For documentation style guidelines, see ["Style guide"](/contribute/style-guide.md). +For documentation style guidelines, see ["Style guide"](/contribute/style-guide.md). To check spelling and markdown is correct locally run: diff --git a/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml b/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml new file mode 100644 index 00000000000..25059c60598 --- /dev/null +++ b/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml @@ -0,0 +1,43 @@ +receivers: + filelog: + include: + - /opt/data/logs/access-unstructured.log + start_at: beginning + operators: + - type: regex_parser + regex: '^(?P[\d.]+)\s+-\s+-\s+\[(?P[^\]]+)\]\s+"(?P[A-Z]+)\s+(?P[^\s]+)\s+HTTP/[^\s]+"\s+(?P\d+)\s+(?P\d+)\s+"(?P[^"]*)"\s+"(?P[^"]*)"' + timestamp: + parse_from: attributes.timestamp + layout: '%d/%b/%Y:%H:%M:%S %z' + #22/Jan/2019:03:56:14 +0330 +processors: + batch: + timeout: 1s + send_batch_size: 100 + memory_limiter: + check_interval: 1s + limit_mib: 2048 + spike_limit_mib: 256 +exporters: + # HTTP setup + otlphttp/hdx: + endpoint: 'http://localhost:4318' + headers: + authorization: + compression: gzip + + # gRPC setup (alternative) + otlp/hdx: + endpoint: 'localhost:4317' + headers: + authorization: + compression: gzip +service: + telemetry: + metrics: + address: 0.0.0.0:9888 # Modified as 2 collectors running on same host + pipelines: + logs: + receivers: [filelog] + processors: [batch] + exporters: [otlphttp/hdx] diff --git a/contribute/style-guide.md b/contribute/style-guide.md index 02519179d42..60162654227 100644 --- a/contribute/style-guide.md +++ b/contribute/style-guide.md @@ -112,12 +112,73 @@ SELECT * FROM system.contributors; \``` ``` +Note: in the snippet above `\` is used only for formatting purposes in this guide. +You should not include it when you write markdown. + Code blocks: - Should always have a language defined immediately next to the opening 3 backticks, without any space. - Have a title (optional) such as 'Query' or 'Response' - Use language `response` if it is for the result of a query. +#### Importing code from files or URLs + +There are a few additional parameters you can include on a code block if you want +to import code. + +To import from a file use `file=`: + +```text +\```python file=code_snippets/integrations/example.py +Code will be inserted here +\``` +``` + +When `yarn build` is run, the code from the file will be inserted as text into +the code block. + +To import from a url use `url=`: + +```text +\```python url=https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/refs/heads/main/examples/pandas_examples.py +Code will be inserted here +\``` +``` + +You should commit the code inserted to the snippet as we want people (or LLMs) +reading the markdown to be able to see the code. The advantage of importing code +to snippets this way is that you can test your snippets externally or store them +wherever you want. + +If you want to only import a section from a file, surround the section with `docs-start` +and `docs-end` comments, for example: + +```python +a = 200 +b = 33 +#docs-start +if b > a: + print("b is greater than a") +elif a == b: + print("a and b are equal") +else: + print("a is greater than b") +#docs-end +``` + +Only the code between those comments will be pulled. + +If you want to make multiple code snippets from one file then you can use the `snippet` parameter: + +```markdown + +\```python url=https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/refs/heads/main/examples/pandas_examples.py snippet=1 +Code will be inserted here +\``` +``` + +You will then use `docs-start-1`, `docs-end-1` comments for the first snippet, `docs-start-2`, `docs-end-2` for the second snippet and so on. + ### Highlighting You can highlight lines in a code block using the following keywords: diff --git a/docs/use-cases/observability/clickstack/ingesting-data/collector.md b/docs/use-cases/observability/clickstack/ingesting-data/collector.md index 8e61466701c..e55460191a0 100644 --- a/docs/use-cases/observability/clickstack/ingesting-data/collector.md +++ b/docs/use-cases/observability/clickstack/ingesting-data/collector.md @@ -161,8 +161,7 @@ The following configuration shows collection of this [unstructured log file](htt Note the use of operators to extract structure from the log lines (`regex_parser`) and filter events, along with a processor to batch events and limit memory usage. -```yaml -# config-unstructured-logs-with-processor.yaml +```yaml file=code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml receivers: filelog: include: @@ -190,7 +189,7 @@ exporters: headers: authorization: compression: gzip - + # gRPC setup (alternative) otlp/hdx: endpoint: 'localhost:4317' diff --git a/docusaurus.config.en.js b/docusaurus.config.en.js index ad0d063e997..dea996041ed 100644 --- a/docusaurus.config.en.js +++ b/docusaurus.config.en.js @@ -5,6 +5,7 @@ import chHeader from "./plugins/header.js"; import fixLinks from "./src/hooks/fixLinks.js"; const path = require('path'); const remarkCustomBlocks = require('./plugins/remark-custom-blocks'); +const codeImportPlugin = require('./plugins/code-import-plugin'); // Import custom plugins const { customParseFrontMatter } = require('./plugins/frontmatter-validation/customParseFrontMatter'); @@ -355,6 +356,10 @@ const config = { [ './plugins/tailwind-config.js', {} + ], + [ + codeImportPlugin, + {} ] ], customFields: { diff --git a/plugins/code-import-plugin.js b/plugins/code-import-plugin.js new file mode 100644 index 00000000000..e9ae6d6ad72 --- /dev/null +++ b/plugins/code-import-plugin.js @@ -0,0 +1,160 @@ +const fs = require('fs'); +const path = require('path'); +const glob = require('glob'); +const https = require('https'); +const http = require('http'); + +// Helper function to fetch content from URL +function fetchUrl(url) { + return new Promise((resolve, reject) => { + const client = url.startsWith('https:') ? https : http; + + client.get(url, (res) => { + if (res.statusCode !== 200) { + reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`)); + return; + } + + let data = ''; + res.on('data', chunk => data += chunk); + res.on('end', () => resolve(data)); + }).on('error', reject); + }); +} + +// Helper function to extract snippet from content using comment markers +function extractSnippet(content, snippetId = null) { + const lines = content.split('\n'); + + // Define comment patterns for different languages + const commentPatterns = [ + // Hash-style comments (Python, Ruby, Shell, YAML, etc.) + { start: `#docs-start${snippetId ? `-${snippetId}` : ''}`, end: `#docs-end${snippetId ? `-${snippetId}` : ''}` }, + // Double-slash comments (JavaScript, Java, C++, etc.) + { start: `//docs-start${snippetId ? `-${snippetId}` : ''}`, end: `//docs-end${snippetId ? `-${snippetId}` : ''}` }, + // Block comments (CSS, SQL, etc.) + { start: `/*docs-start${snippetId ? `-${snippetId}` : ''}*/`, end: `/*docs-end${snippetId ? `-${snippetId}` : ''}*/` }, + // XML/HTML comments + { start: ``, end: `` } + ]; + + for (const pattern of commentPatterns) { + let startIndex = -1; + let endIndex = -1; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + if (line.includes(pattern.start)) { + startIndex = i + 1; // Start from the line after the start marker + } else if (line.includes(pattern.end) && startIndex !== -1) { + endIndex = i; // End at the line before the end marker + break; + } + } + + if (startIndex !== -1 && endIndex !== -1 && startIndex < endIndex) { + return lines.slice(startIndex, endIndex).join('\n'); + } + } + + // If no snippet markers found, return original content + return content; +} + +function codeImportPlugin(context, options) { + return { + name: 'code-import-plugin', + async loadContent() { + // Find all markdown files in docs directory that might contain code imports + const docsPath = path.join(context.siteDir, 'docs'); + + const markdownFiles = [ + ...glob.sync('**/*.md', { cwd: docsPath, absolute: true }), + ...glob.sync('**/*.mdx', { cwd: docsPath, absolute: true }), + ]; + + // Process each markdown file for code imports + const processedFiles = []; + + for (const filePath of markdownFiles) { + try { + let content = fs.readFileSync(filePath, 'utf8'); + let modified = false; + + // Process code blocks with file= or url= syntax + const fileUrlRegex = /```(\w+)?\s*((?:file|url)=[^\s\n]+)([^\n]*)\n([^`]*?)```/g; + const matches = [...content.matchAll(fileUrlRegex)]; + + for (const match of matches) { + const [fullMatch, lang, param, additionalMeta, existingContent] = match; + + // Parse snippet parameter from additional metadata + const snippetMatch = additionalMeta.match(/snippet=(\w+)/); + const snippetId = snippetMatch ? snippetMatch[1] : null; + + try { + let importedContent; + + if (param.startsWith('file=')) { + // Handle file import + const importPath = param.replace('file=', ''); + const absoluteImportPath = path.resolve(context.siteDir, importPath); + const rawContent = fs.readFileSync(absoluteImportPath, 'utf8'); + importedContent = extractSnippet(rawContent, snippetId); + } else if (param.startsWith('url=')) { + // Handle URL import + const url = param.replace('url=', ''); + try { + const rawContent = await fetchUrl(url); + importedContent = extractSnippet(rawContent, snippetId); + } catch (urlError) { + console.warn(`Could not fetch URL ${url} in ${filePath}: ${urlError.message}`); + continue; // Skip this replacement if URL fetch fails + } + } + + // Preserve the complete metadata + const fullMeta = `${param}${additionalMeta}`; + const metaStr = fullMeta ? ` ${fullMeta}` : ''; + const replacement = `\`\`\`${lang || ''}${metaStr}\n${importedContent}\n\`\`\``; + + content = content.replace(fullMatch, replacement); + modified = true; + + } catch (error) { + console.warn(`Could not process ${param} in ${filePath}: ${error.message}`); + } + } + + if (modified) { + processedFiles.push({ + path: filePath, + content: content, + originalPath: filePath + }); + } + } catch (error) { + console.warn(`Error processing file ${filePath}: ${error.message}`); + } + } + + return { processedFiles }; + }, + + async contentLoaded({ content, actions }) { + const { processedFiles } = content; + + // Write processed files back to disk during build + for (const file of processedFiles) { + try { + fs.writeFileSync(file.path, file.content, 'utf8'); + console.log(`Processed code imports in: ${path.relative(context.siteDir, file.path)}`); + } catch (error) { + console.error(`Error writing processed file ${file.path}: ${error.message}`); + } + } + } + }; +} + +module.exports = codeImportPlugin;