From ccae52c6884ce4302a2a8fdd94c05542a4fec5f8 Mon Sep 17 00:00:00 2001 From: Andrew Kuny Date: Mon, 2 Mar 2026 10:49:39 -0500 Subject: [PATCH] 1703: return "none-specified" if a response does not have a content-type header --- libs/core-scanner/src/util.spec.ts | 14 +++- libs/core-scanner/src/util.ts | 120 ++++++++++++++++++----------- 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/libs/core-scanner/src/util.spec.ts b/libs/core-scanner/src/util.spec.ts index 610736e7..65667842 100644 --- a/libs/core-scanner/src/util.spec.ts +++ b/libs/core-scanner/src/util.spec.ts @@ -74,6 +74,13 @@ describe('core-scanner util', () => { const result = getMIMEType(mockResponse); expect(result).toBe('text/html'); }); + + it('defaults to "none-specified" if a Puppeteer HTTPResponse instance does not have a content-type header', () => { + const mockResponse = mock(); + mockResponse.headers.calledWith().mockReturnValue({}); + const result = getMIMEType(mockResponse); + expect(result).toBe('none-specified'); + }); }); describe('getWithSubdomain', () => { @@ -124,7 +131,9 @@ describe('core-scanner util', () => { const url = 'https://poena.inl.gov:7004/console-selfservice/SelfService.do?ThisWillBeRemoved'; const result = getTruncatedUrl(url); - expect(result).toBe('https://poena.inl.gov:7004/console-selfservice/SelfService.do'); + expect(result).toBe( + 'https://poena.inl.gov:7004/console-selfservice/SelfService.do', + ); }); it('does not truncate a url that does not contain query parameters', () => { @@ -133,5 +142,4 @@ describe('core-scanner util', () => { expect(result).toBe('https://gsa.gov'); }); }); - -}); \ No newline at end of file +}); diff --git a/libs/core-scanner/src/util.ts b/libs/core-scanner/src/util.ts index 9b740c6e..4117c7cc 100644 --- a/libs/core-scanner/src/util.ts +++ b/libs/core-scanner/src/util.ts @@ -44,7 +44,7 @@ export const getMIMEType = (res: HTTPResponse): string => { const mimetype = contentType.split(';')[0]; return mimetype; } else { - return 'unknown'; + return 'none-specified'; } }; @@ -78,48 +78,80 @@ export function getTruncatedUrl(url: string): string { } export function createRequestHandlers(page: Page, logger: Logger) { - page.on('console', (message) => logger.debug({sseMessage: message }, `Page Log: ${message.text()}`)); - page.on('error', (error) => logger.warn({ error }, `Page Error: ${error.message}`)); - page.on('response', (response)=> response.status() !== 200 && logger.debug({ sseResponseUrl: response.url(), sseResponseStatus: response.status()}, `A ${response.status()} was returned from: ${getTruncatedUrl(response.url())} `)); - page.on('requestfailed', (request) => logger.warn({ sseRequestUrl: request.url() }, `Request failed: ${getTruncatedUrl(request.url())}`)); -}; - + page.on('console', (message) => + logger.debug({ sseMessage: message }, `Page Log: ${message.text()}`), + ); + page.on('error', (error) => + logger.warn({ error }, `Page Error: ${error.message}`), + ); + page.on( + 'response', + (response) => + response.status() !== 200 && + logger.debug( + { + sseResponseUrl: response.url(), + sseResponseStatus: response.status(), + }, + `A ${response.status()} was returned from: ${getTruncatedUrl(response.url())} `, + ), + ); + page.on('requestfailed', (request) => + logger.warn( + { sseRequestUrl: request.url() }, + `Request failed: ${getTruncatedUrl(request.url())}`, + ), + ); +} export function logRunningProcesses(logger: Logger, scanStage: string): void { exec('ps aux', (error, stdout, stderr) => { - if (error) { - logger.error({ sseRunningProcError: error, sseScanStage: scanStage }, `Error executing ps command: ${error.message}`); - return; - } - if (stderr) { - logger.error({ sseRunningProcError: stderr, sseScanStage: scanStage }, `stderr: ${stderr}`); - return; - } - - const lines = stdout.trim().split('\n'); - const processCount = lines.length - 1; // Subtract 1 to exclude the header - const headers = lines[0].split(/\s+/); // Split by whitespace - - const processes = []; - - for (let i = 1; i < lines.length; i++) { - const values = lines[i].split(/\s+/); - const process = {}; - - for (let j = 0; j < headers.length; j++) { - process[headers[j]] = values[j]; - } - - processes.push(process); + if (error) { + logger.error( + { sseRunningProcError: error, sseScanStage: scanStage }, + `Error executing ps command: ${error.message}`, + ); + return; + } + if (stderr) { + logger.error( + { sseRunningProcError: stderr, sseScanStage: scanStage }, + `stderr: ${stderr}`, + ); + return; + } + + const lines = stdout.trim().split('\n'); + const processCount = lines.length - 1; // Subtract 1 to exclude the header + const headers = lines[0].split(/\s+/); // Split by whitespace + + const processes = []; + + for (let i = 1; i < lines.length; i++) { + const values = lines[i].split(/\s+/); + const process = {}; + + for (let j = 0; j < headers.length; j++) { + process[headers[j]] = values[j]; } - const combinedProcesses = processes.map((process) => process.COMMAND).join(','); - const processJson = []; - processes.forEach((process) => { - processJson.push(process.COMMAND); - }); - - logCount(logger, {}, `${scanStage}.process.count`, `${processCount} processes running at the '${scanStage}' of scan.`, processCount); + processes.push(process); + } + const combinedProcesses = processes + .map((process) => process.COMMAND) + .join(','); + const processJson = []; + processes.forEach((process) => { + processJson.push(process.COMMAND); + }); + + logCount( + logger, + {}, + `${scanStage}.process.count`, + `${processCount} processes running at the '${scanStage}' of scan.`, + processCount, + ); }); } @@ -130,20 +162,22 @@ export function printMemoryUsage(logger: Logger, metadata: any) { const used = process.memoryUsage(); for (const key in used) { const valueMb = Math.round((used[key] / 1024 / 1024) * 100) / 100; - logCount(logger, { + logCount( + logger, + { metricUnit: 'megabytes', metadata, }, `scanner.core.memory.used.${key}.mb`, `Memory used: ${key}: ${valueMb} MB`, - valueMb + valueMb, ); } } /** * Generates an MD5 hash of the HTML source of a page. - * + * * @param page The Puppeteer Page object representing the page to hash. * @returns A promise that resolves to the MD5 hash of the page's HTML source or null if the page source is empty. */ @@ -151,10 +185,10 @@ export async function getPageMd5Hash(page: Page): Promise { const pageSource = await page.content(); const hash = crypto.createHash('md5'); - if(!pageSource) { + if (!pageSource) { return null; } hash.update(pageSource); return hash.digest('hex'); -} \ No newline at end of file +}