diff --git a/core-libs/schematics/src/add-ssr/__snapshots__/index_spec.ts.snap b/core-libs/schematics/src/add-ssr/__snapshots__/index_spec.ts.snap index aafeafe8194..ee6931f3f20 100644 --- a/core-libs/schematics/src/add-ssr/__snapshots__/index_spec.ts.snap +++ b/core-libs/schematics/src/add-ssr/__snapshots__/index_spec.ts.snap @@ -227,6 +227,7 @@ exports[`add-ssr server.ts should be configured properly 1`] = ` "import { APP_BASE_HREF } from '@angular/common'; import { NgExpressEngineDecorator, + createRobotsTxtHandler, defaultExpressErrorHandlers, ngExpressEngine as engine, } from '@spartacus/setup/ssr'; @@ -248,6 +249,11 @@ export function app(): express.Express { server.set('trust proxy', 'loopback'); + const robotsTxtHandler = createRobotsTxtHandler(); + if (robotsTxtHandler) { + server.get('/robots.txt', robotsTxtHandler); + } + server.engine( 'html', ngExpressEngine({ diff --git a/core-libs/schematics/src/add-ssr/files/server.__typescriptExt__ b/core-libs/schematics/src/add-ssr/files/server.__typescriptExt__ index aa07eda12eb..34202867aa2 100644 --- a/core-libs/schematics/src/add-ssr/files/server.__typescriptExt__ +++ b/core-libs/schematics/src/add-ssr/files/server.__typescriptExt__ @@ -1,6 +1,7 @@ import { APP_BASE_HREF } from '@angular/common'; import { NgExpressEngineDecorator, + createRobotsTxtHandler, defaultExpressErrorHandlers, ngExpressEngine as engine, } from '@spartacus/setup/ssr'; @@ -22,6 +23,11 @@ export function app(): express.Express { server.set('trust proxy', 'loopback'); + const robotsTxtHandler = createRobotsTxtHandler(); + if (robotsTxtHandler) { + server.get('/robots.txt', robotsTxtHandler); + } + server.engine( 'html', ngExpressEngine({ diff --git a/core-libs/setup/ssr/public_api.ts b/core-libs/setup/ssr/public_api.ts index ebe3ac27d8a..07eed391170 100644 --- a/core-libs/setup/ssr/public_api.ts +++ b/core-libs/setup/ssr/public_api.ts @@ -11,5 +11,6 @@ export * from './error-handling/index'; export * from './logger/index'; export * from './optimized-engine/index'; export * from './providers/index'; +export * from './robots-txt/index'; export * from './testing/index'; export * from './tokens/express.tokens'; diff --git a/core-libs/setup/ssr/robots-txt/index.ts b/core-libs/setup/ssr/robots-txt/index.ts new file mode 100644 index 00000000000..e854d813147 --- /dev/null +++ b/core-libs/setup/ssr/robots-txt/index.ts @@ -0,0 +1,9 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +export * from './robots-txt-default-content'; +export * from './robots-txt.handler'; +export * from './robots-txt.model'; diff --git a/core-libs/setup/ssr/robots-txt/robots-txt-default-content.ts b/core-libs/setup/ssr/robots-txt/robots-txt-default-content.ts new file mode 100644 index 00000000000..884f63a2b73 --- /dev/null +++ b/core-libs/setup/ssr/robots-txt/robots-txt-default-content.ts @@ -0,0 +1,76 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Default robots.txt content for Spartacus storefronts. + * + * Disallows transactional/session-specific paths for all crawlers. + * Includes explicit entries for major AI crawlers. + * + * Merchants should override this to set their own AI crawler policy + * (e.g. disallowing GPTBot for training, adding Content-Signal directives). + */ +export const DEFAULT_ROBOTS_TXT_CONTENT = `# Spartacus default robots.txt + +User-agent: * +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ +Disallow: /order/ +Disallow: /admin/ +Disallow: /*?*facet= +Disallow: /*?*currentPage= +Disallow: /*?*sortCode= + +# AI search / grounding crawlers + +User-agent: OAI-SearchBot +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ +Disallow: /order/ + +User-agent: ChatGPT-User +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ + +User-agent: PerplexityBot +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ +Disallow: /order/ + +User-agent: Claude-SearchBot +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ + +# AI training crawlers + +User-agent: GPTBot +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ +Disallow: /order/ + +User-agent: ClaudeBot +Allow: / +Disallow: /cart/ +Disallow: /checkout/ +Disallow: /my-account/ + +User-agent: Google-Extended +Allow: / + +Sitemap: /sitemap.xml +`; diff --git a/core-libs/setup/ssr/robots-txt/robots-txt.handler.spec.ts b/core-libs/setup/ssr/robots-txt/robots-txt.handler.spec.ts new file mode 100644 index 00000000000..b5af8df89ef --- /dev/null +++ b/core-libs/setup/ssr/robots-txt/robots-txt.handler.spec.ts @@ -0,0 +1,111 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { RequestHandler } from 'express'; +import { createRobotsTxtHandler } from './robots-txt.handler'; +import { DEFAULT_ROBOTS_TXT_CONTENT } from './robots-txt-default-content'; + +describe('createRobotsTxtHandler', () => { + let req: any; + let res: any; + + beforeEach(() => { + req = {}; + res = { + set: jest.fn().mockReturnThis(), + type: jest.fn().mockReturnThis(), + send: jest.fn().mockReturnThis(), + }; + }); + + describe('when enabled is false', () => { + it('should return null', () => { + const handler = createRobotsTxtHandler({ enabled: false }); + expect(handler).toBeNull(); + }); + }); + + describe('when no options are provided', () => { + it('should return a handler', () => { + const handler = createRobotsTxtHandler(); + expect(handler).not.toBeNull(); + }); + + it('should set Cache-Control header', () => { + const handler = createRobotsTxtHandler() as RequestHandler; + handler(req, res, jest.fn()); + expect(res.set).toHaveBeenCalledWith( + 'Cache-Control', + 'public, max-age=3600' + ); + }); + + it('should set content type to text/plain', () => { + const handler = createRobotsTxtHandler() as RequestHandler; + handler(req, res, jest.fn()); + expect(res.type).toHaveBeenCalledWith('text/plain'); + }); + + it('should serve the default content', () => { + const handler = createRobotsTxtHandler() as RequestHandler; + handler(req, res, jest.fn()); + expect(res.send).toHaveBeenCalledWith(DEFAULT_ROBOTS_TXT_CONTENT); + }); + + it('should not serve HTML', () => { + const handler = createRobotsTxtHandler() as RequestHandler; + handler(req, res, jest.fn()); + const body: string = res.send.mock.calls[0][0]; + expect(body).not.toContain(''); + expect(body).not.toContain(''); + }); + + it('default content should contain User-agent directive', () => { + const handler = createRobotsTxtHandler() as RequestHandler; + handler(req, res, jest.fn()); + const body: string = res.send.mock.calls[0][0]; + expect(body).toContain('User-agent'); + }); + }); + + describe('when custom content is provided', () => { + const customContent = 'User-agent: *\nDisallow: /private/'; + + it('should serve the custom content', () => { + const handler = createRobotsTxtHandler({ + content: customContent, + }) as RequestHandler; + handler(req, res, jest.fn()); + expect(res.send).toHaveBeenCalledWith(customContent); + }); + + it('should still set Cache-Control header', () => { + const handler = createRobotsTxtHandler({ + content: customContent, + }) as RequestHandler; + handler(req, res, jest.fn()); + expect(res.set).toHaveBeenCalledWith( + 'Cache-Control', + 'public, max-age=3600' + ); + }); + + it('should still set content type to text/plain', () => { + const handler = createRobotsTxtHandler({ + content: customContent, + }) as RequestHandler; + handler(req, res, jest.fn()); + expect(res.type).toHaveBeenCalledWith('text/plain'); + }); + }); + + describe('when enabled is true explicitly', () => { + it('should return a handler', () => { + const handler = createRobotsTxtHandler({ enabled: true }); + expect(handler).not.toBeNull(); + }); + }); +}); diff --git a/core-libs/setup/ssr/robots-txt/robots-txt.handler.ts b/core-libs/setup/ssr/robots-txt/robots-txt.handler.ts new file mode 100644 index 00000000000..9fc79eb25f1 --- /dev/null +++ b/core-libs/setup/ssr/robots-txt/robots-txt.handler.ts @@ -0,0 +1,37 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +import { RequestHandler } from 'express'; +import { DEFAULT_ROBOTS_TXT_CONTENT } from './robots-txt-default-content'; +import { RobotsTxtOptions } from './robots-txt.model'; + +/** + * Creates an Express request handler that serves /robots.txt as plain text. + * + * Returns null when `enabled` is false so callers can skip route registration. + * + * Usage in server.ts: + * ```ts + * const robotsTxtHandler = createRobotsTxtHandler(); + * if (robotsTxtHandler) { + * server.get('/robots.txt', robotsTxtHandler); + * } + * ``` + */ +export function createRobotsTxtHandler( + options?: RobotsTxtOptions +): RequestHandler | null { + if (options?.enabled === false) { + return null; + } + + const content = options?.content ?? DEFAULT_ROBOTS_TXT_CONTENT; + + return (_req, res) => { + res.set('Cache-Control', 'public, max-age=3600'); + res.type('text/plain').send(content); + }; +} diff --git a/core-libs/setup/ssr/robots-txt/robots-txt.model.ts b/core-libs/setup/ssr/robots-txt/robots-txt.model.ts new file mode 100644 index 00000000000..05bdd01c6f1 --- /dev/null +++ b/core-libs/setup/ssr/robots-txt/robots-txt.model.ts @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface RobotsTxtOptions { + /** + * Set to false to skip registering the /robots.txt route entirely. + * Default: true. + */ + enabled?: boolean; + + /** + * Custom robots.txt content to serve. + * When omitted, the Spartacus default content is used. + */ + content?: string; +} diff --git a/projects/ssr-tests/src/ssr-robots-txt.spec.ts b/projects/ssr-tests/src/ssr-robots-txt.spec.ts new file mode 100644 index 00000000000..1e24760e62a --- /dev/null +++ b/projects/ssr-tests/src/ssr-robots-txt.spec.ts @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: 2026 SAP Spartacus team + * + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as HttpUtils from './utils/http.utils'; +import * as LogUtils from './utils/log.utils'; +import * as SsrUtils from './utils/ssr.utils'; + +jest.setTimeout(SsrUtils.DEFAULT_SSR_TIMEOUT); + +describe('SSR /robots.txt', () => { + beforeAll(async () => { + await SsrUtils.startSsrServer(); + }); + + afterAll(async () => { + await SsrUtils.killSsrServer(); + }); + + it('should return 200 with Content-Type text/plain', async () => { + const response = await HttpUtils.sendRequestToSsrServer({ + path: '/robots.txt', + }); + + expect(response.statusCode).toEqual(200); + expect(response.headers['content-type']).toContain('text/plain'); + }); + + it('should return body containing User-agent directive', async () => { + const response = await HttpUtils.sendRequestToSsrServer({ + path: '/robots.txt', + }); + + expect(response.body).toContain('User-agent'); + }); + + it('should not return HTML', async () => { + const response = await HttpUtils.sendRequestToSsrServer({ + path: '/robots.txt', + }); + + expect(response.body).not.toContain(''); + expect(response.body).not.toContain(''); + }); + + it('should not trigger Angular SSR rendering', async () => { + await HttpUtils.sendRequestToSsrServer({ path: '/robots.txt' }); + + const logsMessages = LogUtils.getLogsMessages(); + expect(logsMessages).not.toContain('Rendering started (/robots.txt)'); + }); + + it('should return Cache-Control: public, max-age=3600', async () => { + const response = await HttpUtils.sendRequestToSsrServer({ + path: '/robots.txt', + }); + + expect(response.headers['cache-control']).toContain('public'); + expect(response.headers['cache-control']).toContain('max-age=3600'); + }); +}); diff --git a/projects/storefrontapp/src/server.ts b/projects/storefrontapp/src/server.ts index 4eb68d319c1..586b982b98d 100644 --- a/projects/storefrontapp/src/server.ts +++ b/projects/storefrontapp/src/server.ts @@ -8,6 +8,7 @@ import { APP_BASE_HREF } from '@angular/common'; import { NgExpressEngineDecorator, SsrOptimizationOptions, + createRobotsTxtHandler, defaultExpressErrorHandlers, defaultSsrOptimizationOptions, ngExpressEngine as engine, @@ -37,6 +38,11 @@ export function app(): express.Express { server.set('trust proxy', 'loopback'); + const robotsTxtHandler = createRobotsTxtHandler(); + if (robotsTxtHandler) { + server.get('/robots.txt', robotsTxtHandler); + } + // Our Universal express-engine (found @ https://github.com/angular/universal/tree/master/modules/express-engine) server.engine( 'html',