# robots.txt — blogs.cagancalidag.com
# Strategy: Allow search engines, block AI training crawlers.

# ── Search Engines (explicitly allowed) ──────────────────────
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: Slurp
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: YandexBot
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

# ── AI Training Crawlers (blocked) ───────────────────────────

# OpenAI
User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

# Anthropic
User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: ClaudeBot
Disallow: /

# Google AI (Gemini training — separate from Googlebot search)
User-agent: Google-Extended
Disallow: /

# Meta AI
User-agent: FacebookBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# Apple
User-agent: Applebot-Extended
Disallow: /

# Common AI scrapers
User-agent: CCBot
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: DataForSeoBot
Disallow: /

# ── Default: allow all others ─────────────────────────────────
User-agent: *
Allow: /
Crawl-delay: 2

# ── Sitemap ───────────────────────────────────────────────────
Sitemap: https://blogs.cagancalidag.com/sitemap.xml