diff --git a/src/routes/robots[.]txt.ts b/src/routes/robots[.]txt.ts index c4fcda2..09db74d 100644 --- a/src/routes/robots[.]txt.ts +++ b/src/routes/robots[.]txt.ts @@ -4,7 +4,17 @@ import { SITE_URL } from "@/lib/metadata"; export function buildRobotsTxt() { const sitemapUrl = new URL("sitemap.xml", SITE_URL).toString(); - return ["User-agent: *", "Allow: /", `Sitemap: ${sitemapUrl}`, ""].join("\n"); + return [ + // Content Signals declare our preferences for how AI systems and search + // engines may use this content. See https://contentsignals.org/ and + // https://datatracker.ietf.org/doc/draft-romm-aipref-contentsignals/ + "Content-Signal: search=yes, ai-input=yes, ai-train=yes", + "", + "User-agent: *", + "Allow: /", + `Sitemap: ${sitemapUrl}`, + "", + ].join("\n"); } export const Route = createFileRoute("/robots.txt")({ diff --git a/src/start.ts b/src/start.ts index 9171d03..12c961b 100644 --- a/src/start.ts +++ b/src/start.ts @@ -132,8 +132,43 @@ const llmMiddleware = createMiddleware().server(({ next, request }) => { return next(); }); +/** + * Links advertised to agents via the `Link` response header (RFC 8288). + * + * Reference: https://www.rfc-editor.org/rfc/rfc8288 + * https://www.iana.org/assignments/link-relations/link-relations.xhtml + */ +const AGENT_LINK_HEADER = [ + '; rel="sitemap"; type="application/xml"', + '; rel="describedby"; type="text/plain"', + '; rel="alternate"; type="text/plain"; title="LLM-friendly full docs"', + '; rel="service-doc"; type="text/html"', + '; rel="canonical"; type="text/html"', +].join(", "); + +function shouldAnnotateResponse(request: Request, response: Response): boolean { + if (request.method !== "GET" && request.method !== "HEAD") return false; + const contentType = response.headers.get("Content-Type"); + if (!contentType) return false; + return contentType.toLowerCase().includes("text/html"); +} + +const agentDiscoveryMiddleware = createMiddleware().server(async ({ next, request }) => { + const result = await next(); + + if (shouldAnnotateResponse(request, result.response)) { + const existing = result.response.headers.get("Link"); + result.response.headers.set( + "Link", + existing ? `${existing}, ${AGENT_LINK_HEADER}` : AGENT_LINK_HEADER, + ); + } + + return result; +}); + export const startInstance = createStart(() => { return { - requestMiddleware: [legacyRedirectMiddleware, llmMiddleware], + requestMiddleware: [agentDiscoveryMiddleware, legacyRedirectMiddleware, llmMiddleware], }; });