Skip to content

How to Block AI Bots in Node.js Polka

Polka is an ultra-lightweight Node.js HTTP router (~70 lines of source) used by SvelteKit dev adapters, Vite, and other build tools. It accepts Express-compatible (req, res, next) middleware — any Express middleware works in Polka without changes. The key difference from Express: Polka adds no response helpers — there is no res.json(), res.send(), or res.status(). You use raw Node.js http.ServerResponse methods: res.writeHead() and res.end(). For bot blocking this means: res.writeHead(403, {...}) then res.end('Forbidden'), and do not call next() after — the response is already sent.

1. Bot detection

CommonJS module, no dependencies. String.prototype.includes() for literal substring matching. Array.prototype.some() short-circuits on first match.

// bot-utils.js — bot detection, no dependencies
'use strict';

// All lowercase — matched against ua.toLowerCase()
const AI_BOT_PATTERNS = [
  'gptbot',
  'chatgpt-user',
  'claudebot',
  'anthropic-ai',
  'ccbot',
  'google-extended',
  'cohere-ai',
  'meta-externalagent',
  'bytespider',
  'omgili',
  'diffbot',
  'imagesiftbot',
  'magpie-crawler',
  'amazonbot',
  'dataprovider',
  'netcraft',
];

/**
 * Returns true if the User-Agent string matches a known AI crawler.
 * @param {string} ua
 * @returns {boolean}
 */
function isAiBot(ua) {
  if (!ua) return false;
  const lower = ua.toLowerCase();
  // String.prototype.includes() — literal substring, no regex
  return AI_BOT_PATTERNS.some((p) => lower.includes(p));
}

module.exports = { isAiBot };

2. Middleware — raw Node.js ServerResponse

req.headers is Node.js IncomingMessage.headers — all keys lowercase, values are strings or undefined. Set X-Robots-Tag via res.setHeader() for pass-through (before next()) or in res.writeHead() headers for blocked responses.

// middleware/ai-bot-blocker.js
'use strict';

const { isAiBot } = require('../bot-utils');

/**
 * Polka/Express-compatible middleware that blocks AI crawlers.
 * Signature: (req, res, next) — identical to Express middleware.
 *
 * Polka does NOT add res.json(), res.send(), res.status() etc.
 * Use raw Node.js http.ServerResponse methods:
 *   res.writeHead(status, headers)
 *   res.end(body)
 */
function aiBotBlocker(req, res, next) {
  // Path guard: let robots.txt through.
  // Polka has no built-in static serving — this guard fires for all requests.
  if (req.path === '/robots.txt') {
    return next();
  }

  // req.headers is Node.js IncomingMessage.headers — all keys lowercase.
  // Value is undefined when the header is absent.
  const ua = req.headers['user-agent'] || '';

  if (isAiBot(ua)) {
    // res.writeHead sets status + headers in one call.
    // Must be called before res.end().
    res.writeHead(403, {
      'Content-Type': 'text/plain',
      'X-Robots-Tag': 'noai, noimageai',
    });
    // res.end() sends the body and finishes the response.
    // Do NOT call next() after res.end() — response is already sent.
    res.end('Forbidden');
    return;
  }

  // Pass-through: set X-Robots-Tag before calling next().
  // Headers must be set before the response is written downstream.
  res.setHeader('X-Robots-Tag', 'noai, noimageai');
  next();
}

module.exports = aiBotBlocker;

3. server.js — global .use() registration

app.use(fn) without a path prefix applies middleware to all routes. All response writing uses raw Node.js methods — no Express helpers.

// server.js — Polka application
'use strict';

const polka = require('polka');
const aiBotBlocker = require('./middleware/ai-bot-blocker');

const app = polka();

// Global middleware — fires for every request.
// .use() without a path prefix applies to all routes.
app.use(aiBotBlocker);

// Routes
app.get('/', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify({ message: 'Hello' }));
});

app.get('/api/data', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify({ data: 'value' }));
});

app.get('/robots.txt', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'text/plain' });
  res.end(`User-agent: *
Allow: /

User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Google-Extended
Disallow: /`);
});

app.listen(8080, () => {
  console.log('Listening on http://localhost:8080');
});

4. Sub-application scoping — protect /api only

Polka supports mounting sub-applications at path prefixes with app.use('/api', apiSubapp). Middleware on the sub-app only fires for requests under that prefix. The parent app's health check and public routes bypass the bot filter entirely.

// Sub-application scoping — apply bot blocker only to /api routes.
// Polka supports mounting sub-applications at path prefixes.

const polka = require('polka');
const aiBotBlocker = require('./middleware/ai-bot-blocker');

// Sub-app for /api — bot blocker applied here only
const api = polka();
api.use(aiBotBlocker);

api.get('/data', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify({ data: 'value' }));
});

// Main app — no bot blocker on top-level routes
const app = polka();

// Health check — bypasses bot filter
app.get('/health', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify({ status: 'ok' }));
});

// Mount the api sub-app at /api
// All /api/* requests go through the api sub-app middleware first
app.use('/api', api);

app.listen(8080);

5. Static files with sirv — robots.txt before middleware

sirv is the recommended static file middleware for Polka. Register it before the bot blocker — sirv serves public/robots.txt at the Polka middleware layer, so the bot blocker never fires for it. The path guard in the middleware is a safety net for other deployment configurations.

// Serving static files with sirv (Polka's recommended static middleware)
// sirv serves the public/ directory — robots.txt at public/robots.txt
// becomes available at /robots.txt BEFORE routes are checked.

const polka = require('polka');
const sirv = require('sirv');
const aiBotBlocker = require('./middleware/ai-bot-blocker');

const app = polka();

// Order matters: sirv runs before aiBotBlocker.
// robots.txt is served by sirv — aiBotBlocker never fires for it.
// The path guard in aiBotBlocker is a safety net for other deployments.
app.use(sirv('public', { dev: process.env.NODE_ENV !== 'production' }));
app.use(aiBotBlocker);

app.get('/api/data', (req, res) => {
  res.writeHead(200, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify({ data: 'value' }));
});

app.listen(8080);

6. ESM variant (Node.js 14+)

// ESM variant (Node.js 14+, package.json "type": "module")
import polka from 'polka';
import { isAiBot } from './bot-utils.js';

function aiBotBlocker(req, res, next) {
  if (req.path === '/robots.txt') return next();
  const ua = req.headers['user-agent'] || '';
  if (isAiBot(ua)) {
    res.writeHead(403, { 'X-Robots-Tag': 'noai, noimageai', 'Content-Type': 'text/plain' });
    res.end('Forbidden');
    return;
  }
  res.setHeader('X-Robots-Tag', 'noai, noimageai');
  next();
}

const app = polka();
app.use(aiBotBlocker);
app.get('/', (req, res) => { res.end('Hello'); });
app.listen(8080);

7. public/robots.txt

# public/robots.txt (served by sirv before routes)
User-agent: *
Allow: /

User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Google-Extended
Disallow: /

Key points

Framework comparison — Node.js HTTP routers

FrameworkMiddlewareBlockUA header
Polka(req, res, next)res.writeHead(403, {...}); res.end('Forbidden')req.headers['user-agent']
Express(req, res, next) (identical)res.status(403).send('Forbidden')req.headers['user-agent']
FastifyaddHook('onRequest', async (req, reply))reply.code(403).send('Forbidden')req.headers['user-agent']
Hono (Node)app.use(async (c, next))return c.text('Forbidden', 403)c.req.header('user-agent')

Polka and Express share an identical middleware signature — any Express middleware works in Polka. The difference is in the response API: Express wraps res with helpers; Polka uses the raw Node.js ServerResponse. For bot blocking, this means using res.writeHead() and res.end() instead of res.status().send(). The raw API is slightly more verbose but has zero overhead.

Dependencies

npm install polka
npm install sirv        # optional static file middleware

# Run
node server.js

# With nodemon for development
npx nodemon server.js

# Polka is used internally by SvelteKit's dev server and Vite.
# It has zero production dependencies and ~70 lines of source.
# Version: @polka/polka (scoped) or polka (unscoped) — both available on npm.