How to Block AI Bots in Node.js Polka
Polka is an ultra-lightweight Node.js HTTP router (~70 lines of source) used by SvelteKit dev adapters, Vite, and other build tools. It accepts Express-compatible (req, res, next) middleware — any Express middleware works in Polka without changes. The key difference from Express: Polka adds no response helpers — there is no res.json(), res.send(), or res.status(). You use raw Node.js http.ServerResponse methods: res.writeHead() and res.end(). For bot blocking this means: res.writeHead(403, {...}) then res.end('Forbidden'), and do not call next() after — the response is already sent.
1. Bot detection
CommonJS module, no dependencies. String.prototype.includes() for literal substring matching. Array.prototype.some() short-circuits on first match.
// bot-utils.js — bot detection, no dependencies
'use strict';
// All lowercase — matched against ua.toLowerCase()
const AI_BOT_PATTERNS = [
'gptbot',
'chatgpt-user',
'claudebot',
'anthropic-ai',
'ccbot',
'google-extended',
'cohere-ai',
'meta-externalagent',
'bytespider',
'omgili',
'diffbot',
'imagesiftbot',
'magpie-crawler',
'amazonbot',
'dataprovider',
'netcraft',
];
/**
* Returns true if the User-Agent string matches a known AI crawler.
* @param {string} ua
* @returns {boolean}
*/
function isAiBot(ua) {
if (!ua) return false;
const lower = ua.toLowerCase();
// String.prototype.includes() — literal substring, no regex
return AI_BOT_PATTERNS.some((p) => lower.includes(p));
}
module.exports = { isAiBot };2. Middleware — raw Node.js ServerResponse
req.headers is Node.js IncomingMessage.headers — all keys lowercase, values are strings or undefined. Set X-Robots-Tag via res.setHeader() for pass-through (before next()) or in res.writeHead() headers for blocked responses.
// middleware/ai-bot-blocker.js
'use strict';
const { isAiBot } = require('../bot-utils');
/**
* Polka/Express-compatible middleware that blocks AI crawlers.
* Signature: (req, res, next) — identical to Express middleware.
*
* Polka does NOT add res.json(), res.send(), res.status() etc.
* Use raw Node.js http.ServerResponse methods:
* res.writeHead(status, headers)
* res.end(body)
*/
function aiBotBlocker(req, res, next) {
// Path guard: let robots.txt through.
// Polka has no built-in static serving — this guard fires for all requests.
if (req.path === '/robots.txt') {
return next();
}
// req.headers is Node.js IncomingMessage.headers — all keys lowercase.
// Value is undefined when the header is absent.
const ua = req.headers['user-agent'] || '';
if (isAiBot(ua)) {
// res.writeHead sets status + headers in one call.
// Must be called before res.end().
res.writeHead(403, {
'Content-Type': 'text/plain',
'X-Robots-Tag': 'noai, noimageai',
});
// res.end() sends the body and finishes the response.
// Do NOT call next() after res.end() — response is already sent.
res.end('Forbidden');
return;
}
// Pass-through: set X-Robots-Tag before calling next().
// Headers must be set before the response is written downstream.
res.setHeader('X-Robots-Tag', 'noai, noimageai');
next();
}
module.exports = aiBotBlocker;3. server.js — global .use() registration
app.use(fn) without a path prefix applies middleware to all routes. All response writing uses raw Node.js methods — no Express helpers.
// server.js — Polka application
'use strict';
const polka = require('polka');
const aiBotBlocker = require('./middleware/ai-bot-blocker');
const app = polka();
// Global middleware — fires for every request.
// .use() without a path prefix applies to all routes.
app.use(aiBotBlocker);
// Routes
app.get('/', (req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ message: 'Hello' }));
});
app.get('/api/data', (req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ data: 'value' }));
});
app.get('/robots.txt', (req, res) => {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(`User-agent: *
Allow: /
User-agent: GPTBot
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: Google-Extended
Disallow: /`);
});
app.listen(8080, () => {
console.log('Listening on http://localhost:8080');
});4. Sub-application scoping — protect /api only
Polka supports mounting sub-applications at path prefixes with app.use('/api', apiSubapp). Middleware on the sub-app only fires for requests under that prefix. The parent app's health check and public routes bypass the bot filter entirely.
// Sub-application scoping — apply bot blocker only to /api routes.
// Polka supports mounting sub-applications at path prefixes.
const polka = require('polka');
const aiBotBlocker = require('./middleware/ai-bot-blocker');
// Sub-app for /api — bot blocker applied here only
const api = polka();
api.use(aiBotBlocker);
api.get('/data', (req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ data: 'value' }));
});
// Main app — no bot blocker on top-level routes
const app = polka();
// Health check — bypasses bot filter
app.get('/health', (req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ status: 'ok' }));
});
// Mount the api sub-app at /api
// All /api/* requests go through the api sub-app middleware first
app.use('/api', api);
app.listen(8080);5. Static files with sirv — robots.txt before middleware
sirv is the recommended static file middleware for Polka. Register it before the bot blocker — sirv serves public/robots.txt at the Polka middleware layer, so the bot blocker never fires for it. The path guard in the middleware is a safety net for other deployment configurations.
// Serving static files with sirv (Polka's recommended static middleware)
// sirv serves the public/ directory — robots.txt at public/robots.txt
// becomes available at /robots.txt BEFORE routes are checked.
const polka = require('polka');
const sirv = require('sirv');
const aiBotBlocker = require('./middleware/ai-bot-blocker');
const app = polka();
// Order matters: sirv runs before aiBotBlocker.
// robots.txt is served by sirv — aiBotBlocker never fires for it.
// The path guard in aiBotBlocker is a safety net for other deployments.
app.use(sirv('public', { dev: process.env.NODE_ENV !== 'production' }));
app.use(aiBotBlocker);
app.get('/api/data', (req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ data: 'value' }));
});
app.listen(8080);6. ESM variant (Node.js 14+)
// ESM variant (Node.js 14+, package.json "type": "module")
import polka from 'polka';
import { isAiBot } from './bot-utils.js';
function aiBotBlocker(req, res, next) {
if (req.path === '/robots.txt') return next();
const ua = req.headers['user-agent'] || '';
if (isAiBot(ua)) {
res.writeHead(403, { 'X-Robots-Tag': 'noai, noimageai', 'Content-Type': 'text/plain' });
res.end('Forbidden');
return;
}
res.setHeader('X-Robots-Tag', 'noai, noimageai');
next();
}
const app = polka();
app.use(aiBotBlocker);
app.get('/', (req, res) => { res.end('Hello'); });
app.listen(8080);7. public/robots.txt
# public/robots.txt (served by sirv before routes)
User-agent: *
Allow: /
User-agent: GPTBot
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: Google-Extended
Disallow: /Key points
- No res.json() / res.send() / res.status(): Polka adds zero response helpers. Use raw Node.js:
res.writeHead(status, headers)andres.end(body). This is the defining difference from Express — the middleware signature is identical, the response API is not. - req.headers keys are lowercase — always: Node.js normalises all incoming HTTP header names to lowercase in
IncomingMessage.headers. Usereq.headers['user-agent']— neverreq.headers['User-Agent'](returnsundefined). This matches HTTP/2 header naming. - Set headers before res.writeHead() or res.end(): Once
res.writeHead()orres.end()is called, headers are locked. Useres.setHeader()for pass-through (beforenext()) or include headers inres.writeHead(status, { headers })for blocked responses. - Do not call next() after res.end(): Calling
next()after ending the response passes to the next middleware, which may attempt to write additional headers — causing aCannot set headers after they are senterror. Alwaysreturnimmediately afterres.end(). - Express middleware is fully compatible: Any Express middleware (body-parser, cors, morgan, etc.) works in Polka without modification. The
(req, res, next)contract is identical. Polka is often used as a drop-in Express replacement for performance- critical paths. - sirv middleware order matters: Register
sirvbefore the bot blocker to let static files through without triggering bot detection. Polka processes middleware in registration order — first registered runs first.
Framework comparison — Node.js HTTP routers
| Framework | Middleware | Block | UA header |
|---|---|---|---|
| Polka | (req, res, next) | res.writeHead(403, {...}); res.end('Forbidden') | req.headers['user-agent'] |
| Express | (req, res, next) (identical) | res.status(403).send('Forbidden') | req.headers['user-agent'] |
| Fastify | addHook('onRequest', async (req, reply)) | reply.code(403).send('Forbidden') | req.headers['user-agent'] |
| Hono (Node) | app.use(async (c, next)) | return c.text('Forbidden', 403) | c.req.header('user-agent') |
Polka and Express share an identical middleware signature — any Express middleware works in Polka. The difference is in the response API: Express wraps res with helpers; Polka uses the raw Node.js ServerResponse. For bot blocking, this means using res.writeHead() and res.end() instead of res.status().send(). The raw API is slightly more verbose but has zero overhead.
Dependencies
npm install polka
npm install sirv # optional static file middleware
# Run
node server.js
# With nodemon for development
npx nodemon server.js
# Polka is used internally by SvelteKit's dev server and Vite.
# It has zero production dependencies and ~70 lines of source.
# Version: @polka/polka (scoped) or polka (unscoped) — both available on npm.