How to Block AI Bots on CherryPy (Python): Complete 2026 Guide
CherryPy is Python's oldest production web framework — shipping since 2002, still actively maintained. Instead of traditional middleware, CherryPy uses a Tool system — callables attached to lifecycle hook points and activated through config. Raise cherrypy.HTTPError(403) to block; set cherrypy.response.headers for response headers.
Tools, not middleware
CherryPy Tools are config-driven callables attached to hook points. Enable with 'tools.block_bots.on': True in config. Disable per-route with False. Pass parameters: 'tools.block_bots.strict': True. This config-driven activation is unique to CherryPy — no other Python framework uses it.
CherryPy hook points (execution order)
Protection layers
Layer 1: robots.txt
Use CherryPy's built-in staticfile tool to serve robots.txt. Static files are served before custom tools — so robots.txt bypasses the bot blocker automatically:
# static/robots.txt User-agent: * Allow: / User-agent: GPTBot User-agent: ClaudeBot User-agent: anthropic-ai User-agent: Google-Extended User-agent: CCBot User-agent: cohere-ai User-agent: Bytespider User-agent: Amazonbot User-agent: PerplexityBot User-agent: YouBot User-agent: Diffbot User-agent: DeepSeekBot User-agent: MistralBot User-agent: xAI-Bot User-agent: AI2Bot Disallow: /
# Config — serve robots.txt via staticfile tool
config = {
'/robots.txt': {
'tools.staticfile.on': True,
'tools.staticfile.filename': os.path.join(
os.path.abspath(os.path.dirname(__file__)),
'static', 'robots.txt'
),
},
}
# staticfile tool runs before custom tools —
# robots.txt is served without hitting the bot blocker.Layers 2, 3 & 4: custom Tool
Create a class-based Tool by subclassing cherrypy.Tool. The __init__ binds to a hook point; CherryPy calls your method at that hook:
# tools/ai_bot_blocker.py
import cherrypy
AI_BOTS = [
'gptbot', 'chatgpt-user', 'claudebot', 'anthropic-ai',
'ccbot', 'cohere-ai', 'bytespider', 'amazonbot',
'applebot-extended', 'perplexitybot', 'youbot', 'diffbot',
'google-extended', 'deepseekbot', 'mistralbot', 'xai-bot',
'ai2bot', 'oai-searchbot', 'duckassistbot',
]
class AiBotBlocker(cherrypy.Tool):
"""Block AI crawlers and set noai directives."""
def __init__(self):
# Bind to before_handler — runs before the page handler
super().__init__('before_handler', self._check, priority=10)
def _setup(self):
"""Called per-request to attach hooks."""
super()._setup()
# Also attach to before_finalize for X-Robots-Tag
cherrypy.request.hooks.attach(
'before_finalize', self._set_headers, priority=60
)
def _check(self):
"""before_handler: block AI bots + set noai meta."""
# Layer 2: set noai meta for templates
cherrypy.request.robots = 'noai, noimageai'
# Layer 4: block AI bots
ua = cherrypy.request.headers.get('User-Agent', '').lower()
if any(bot in ua for bot in AI_BOTS):
raise cherrypy.HTTPError(
403, 'Forbidden: AI crawlers are not permitted.'
)
def _set_headers(self):
"""before_finalize: add X-Robots-Tag to all responses."""
# Layer 3: X-Robots-Tag on every legitimate response
cherrypy.response.headers['X-Robots-Tag'] = 'noai, noimageai'
# Register the tool globally
cherrypy.tools.block_bots = AiBotBlocker()Activate in your app config:
# server.py
import os
import cherrypy
# Import to register the tool
import tools.ai_bot_blocker # noqa: F401
class Root:
@cherrypy.expose
def index(self):
return '<html><head><meta name="robots" content="{}"></head>...</html>'.format(
getattr(cherrypy.request, 'robots', 'index, follow')
)
@cherrypy.expose
def api(self):
return '{"data": "protected"}'
config = {
'/': {
'tools.block_bots.on': True, # ← Enable globally
},
'/robots.txt': {
'tools.staticfile.on': True,
'tools.staticfile.filename': os.path.join(
os.path.abspath(os.path.dirname(__file__)),
'static', 'robots.txt'
),
},
}
cherrypy.quickstart(Root(), '/', config)raise, not return
CherryPy tools raise cherrypy.HTTPError(403) to block — the exception short-circuits the handler. This matches Falcon (raise HTTPForbidden) and Bottle (abort(403)). Flask and Django return a response. Pyramid returns without calling handler().
Alternative: function-based Tool
For simpler cases, register a plain function as a tool using the @cherrypy.tools.register decorator:
# Function-based tool — simpler, but can only bind to one hook point
@cherrypy.tools.register('before_handler')
def block_bots():
"""Block AI bots — registered as cherrypy.tools.block_bots."""
cherrypy.request.robots = 'noai, noimageai'
ua = cherrypy.request.headers.get('User-Agent', '').lower()
if any(bot in ua for bot in AI_BOTS):
raise cherrypy.HTTPError(403, 'Forbidden')
# Separate tool for X-Robots-Tag header
@cherrypy.tools.register('before_finalize')
def robots_header():
"""Set X-Robots-Tag — registered as cherrypy.tools.robots_header."""
cherrypy.response.headers['X-Robots-Tag'] = 'noai, noimageai'
# Config:
# '/': {
# 'tools.block_bots.on': True,
# 'tools.robots_header.on': True,
# }Class-based: one tool, multiple hooks (via
_setup()), priority control, state.Function-based: one function per hook point, simpler, no state. Need two tools for block + headers.
Both activate via
'tools.name.on': True in config.Per-route activation via config
CherryPy config is hierarchical. Enable globally, disable per-path. Or enable only on specific paths:
# Config-based per-path control
config = {
'/': {
'tools.block_bots.on': True, # Global: block everywhere
},
'/public': {
'tools.block_bots.on': False, # Disable for /public/*
},
'/health': {
'tools.block_bots.on': False, # Disable for health check
},
}
# Or: enable only on specific paths
config = {
'/': {
'tools.block_bots.on': False, # Off by default
},
'/api': {
'tools.block_bots.on': True, # Only protect /api/*
},
'/content': {
'tools.block_bots.on': True, # Only protect /content/*
},
}Or use the decorator for per-method control:
class Root:
@cherrypy.expose
def index(self):
"""Homepage — no bot blocking (inherits config)."""
return 'Welcome'
@cherrypy.expose
@cherrypy.tools.block_bots() # ← Enable on this method only
def api(self):
"""API — bot blocking via decorator."""
return '{"data": "protected"}'
@cherrypy.expose
@cherrypy.tools.block_bots(on=False) # ← Explicitly disable
def health(self):
"""Health check — always accessible."""
return 'OK'Decorator > per-path config > parent path config > root config.
@cherrypy.tools.block_bots(on=False) on a method overrides 'tools.block_bots.on': True in config.Configurable tool parameters
Any keyword argument your tool callable accepts can be set via config. This is CherryPy's most distinctive feature — tools are fully configurable through the same system used for everything else:
# Tool with configurable parameters
@cherrypy.tools.register('before_handler')
def block_bots(strict=True, custom_bots=None):
"""
strict: if True, block all known AI bots.
if False, only block training crawlers (not search).
custom_bots: additional bot strings to block.
"""
cherrypy.request.robots = 'noai, noimageai'
ua = cherrypy.request.headers.get('User-Agent', '').lower()
bots = AI_BOTS if strict else [
b for b in AI_BOTS
if b not in ('oai-searchbot', 'perplexitybot', 'duckassistbot')
]
if custom_bots:
bots = bots + [b.lower() for b in custom_bots]
if any(bot in ua for bot in bots):
raise cherrypy.HTTPError(403, 'Forbidden')
# Config — pass parameters:
config = {
'/': {
'tools.block_bots.on': True,
'tools.block_bots.strict': True, # Full blocking
},
'/public': {
'tools.block_bots.on': True,
'tools.block_bots.strict': False, # Allow AI search bots
},
}
# Decorator — same parameters:
# @cherrypy.tools.block_bots(strict=False, custom_bots=['MyBot'])Layer 2: noai meta tag
Set attributes on cherrypy.request in the tool. Access in Mako, Jinja2, or Genshi templates:
# In tool (already set above):
cherrypy.request.robots = 'noai, noimageai'
# Mako template (base.html):
# <meta name="robots" content="${cherrypy.request.robots}">
# Jinja2 template (via cherrypy-jinja2 plugin):
# <meta name="robots" content="{{ request.robots }}">
# Handler with Mako — cherrypy.request is available globally:
@cherrypy.expose
def page(self):
tmpl = lookup.get_template('page.html')
return tmpl.render()
# Template accesses cherrypy.request.robots directly
# Per-page override:
@cherrypy.expose
def public_page(self):
cherrypy.request.robots = 'index, follow' # Override noai
return render('public.html')_setup(): multi-hook Tool pattern
The _setup() method is called per-request when the tool is active. Override it to attach to multiple hook points from a single tool:
class AiBotBlocker(cherrypy.Tool):
def __init__(self):
# Primary hook: before_handler (priority 10 = early)
super().__init__('before_handler', self._check, priority=10)
def _setup(self):
"""Called per-request — attach additional hooks here."""
super()._setup() # Registers _check at before_handler
# Additional hook: before_finalize (priority 60)
cherrypy.request.hooks.attach(
'before_finalize', self._set_headers, priority=60
)
# Additional hook: on_end_request for logging
cherrypy.request.hooks.attach(
'on_end_request', self._log_blocked, priority=80
)
def _check(self):
cherrypy.request.robots = 'noai, noimageai'
cherrypy.request._was_bot = False
ua = cherrypy.request.headers.get('User-Agent', '').lower()
if any(bot in ua for bot in AI_BOTS):
cherrypy.request._was_bot = True
raise cherrypy.HTTPError(403, 'Forbidden')
def _set_headers(self):
cherrypy.response.headers['X-Robots-Tag'] = 'noai, noimageai'
def _log_blocked(self):
if getattr(cherrypy.request, '_was_bot', False):
cherrypy.log(
f'Blocked AI bot: {cherrypy.request.headers.get("User-Agent", "")}'
)
cherrypy.tools.block_bots = AiBotBlocker()1–100 scale. Lower = runs first within the same hook point. Built-in tools: sessions=50, caching=60, encoding=70. Bot blocker at 10 runs before all built-in tools at before_handler.
Error response customization
cherrypy.HTTPError(403) triggers CherryPy's error page handler. Customize the 403 response:
# Option 1: Custom error page in config
config = {
'/': {
'tools.block_bots.on': True,
'error_page.403': os.path.join(
os.path.dirname(__file__), 'templates', '403.html'
),
},
}
# Option 2: Custom error handler function
def handle_403(status, message, traceback, version):
"""Returns plain text 403 — minimal info for bots."""
cherrypy.response.headers['Content-Type'] = 'text/plain'
return 'Access denied.'
config = {
'/': {
'tools.block_bots.on': True,
'error_page.403': handle_403,
},
}
# X-Robots-Tag on error responses:
# before_error_response and after_error_response hooks
# are available but before_finalize also fires on errors.CherryPy vs Flask vs Django vs Pyramid — comparison
CherryPy — Tool with raise
# CherryPy Tool (raise to block)
@cherrypy.tools.register('before_handler')
def block_bots():
ua = cherrypy.request.headers.get('User-Agent', '').lower()
if any(b in ua for b in AI_BOTS):
raise cherrypy.HTTPError(403) # raise to block
# Config: 'tools.block_bots.on': TrueFlask — before_request return
# Flask hook (return to block)
@app.before_request
def block_bots():
ua = request.headers.get('User-Agent', '').lower()
if any(b in ua for b in AI_BOTS):
return Response('Forbidden', 403) # return to blockDjango — middleware class __call__
# Django middleware (return to block)
class AiBotBlocker:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
ua = request.META.get('HTTP_USER_AGENT', '').lower()
if any(b in ua for b in AI_BOTS):
return HttpResponseForbidden() # return to block
return self.get_response(request)Pyramid — tween factory
# Pyramid tween (return without calling handler)
def ai_bot_blocker_factory(handler, registry):
def blocker(request):
ua = request.headers.get('User-Agent', '').lower()
if any(b in ua for b in AI_BOTS):
return Response(status=403) # return to block
return handler(request) # call handler to continue
return blockerCherryPy raises (like Falcon/Bottle). Flask and Django return. Pyramid returns without calling handler(). CherryPy is the only one with config-driven activation: 'tools.block_bots.on': True.
Testing
CherryPy provides a built-in test helper via cherrypy.test.helper:
import cherrypy
from cherrypy.test import helper
class TestAiBotBlocker(helper.CPWebCase):
@staticmethod
def setup_server():
import tools.ai_bot_blocker # noqa: F401
class Root:
@cherrypy.expose
def index(self):
return 'Hello'
@cherrypy.expose
def api(self):
return '{"ok": true}'
config = {
'/': {'tools.block_bots.on': True},
'/robots.txt': {
'tools.staticfile.on': True,
'tools.staticfile.filename': '/path/to/robots.txt',
},
}
cherrypy.tree.mount(Root(), '/', config)
def test_blocks_gptbot(self):
self.getPage('/', headers=[('User-Agent', 'GPTBot/1.0')])
self.assertStatus(403)
def test_blocks_claudebot(self):
self.getPage('/api', headers=[('User-Agent', 'ClaudeBot/2.0')])
self.assertStatus(403)
def test_allows_browser(self):
self.getPage('/', headers=[('User-Agent', 'Mozilla/5.0')])
self.assertStatus(200)
self.assertHeader('X-Robots-Tag', 'noai, noimageai')
def test_robots_txt_accessible(self):
self.getPage('/robots.txt', headers=[('User-Agent', 'GPTBot/1.0')])
self.assertStatus(200) # Static file bypasses tool
# Run: python -m pytest test_bot_blocker.py -vAI bot User-Agent strings (2026)
CherryPy uses WebOb-style header access — cherrypy.request.headers.get('User-Agent', '').lower() gives case-insensitive matching.
Is your site protected from AI bots?
Run a free scan to check your robots.txt, meta tags, and overall AI readiness score.