{
    "name": "Botcrawl Bot Intelligence Feed",
    "schema_version": "1.0",
    "feed_version": "2026.04.30.1015",
    "generated_at": "2026-04-30T10:15:11Z",
    "records": 808,
    "bots": [
        {
            "id": "2checkout",
            "name": "2checkout",
            "slug": "2checkout",
            "url": "https://botcrawl.com/bots/2checkout/",
            "status": "active",
            "operator": "2checkout",
            "company": "2checkout",
            "family": "2checkout",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "2checkout"
            ],
            "primary_user_agent": "2checkout",
            "robots_token": "2checkout",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "2Checkout Payment Platform's webhooks integration for notifying websites about payment events. Now operated by Verifone.",
            "short_description": "2Checkout Payment Platform's webhooks integration for notifying websites about payment events.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: 2checkout\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"2checkout\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://knowledgecenter.2checkout.com/API-Integration/Webhooks/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "360monitoring",
            "name": "360Monitoring",
            "slug": "360monitoring",
            "url": "https://botcrawl.com/bots/360monitoring/",
            "status": "active",
            "operator": "360Monitoring",
            "company": "360Monitoring",
            "family": "360Monitoring",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "monitoring360"
            ],
            "primary_user_agent": "monitoring360",
            "robots_token": "monitoring360",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Empowering Developers to Monitor Sites & Servers, Easily",
            "short_description": "Empowering Developers to Monitor Sites & Servers, Easily",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: monitoring360\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"monitoring360\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "360spider",
            "name": "360Spider",
            "slug": "360spider",
            "url": "https://botcrawl.com/bots/360spider/",
            "status": "active",
            "operator": "Qihoo 360",
            "company": "Qihoo 360",
            "family": "Qihoo 360",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "360Spider",
                "Mozilla/5.0 (compatible",
                "+http://webscan.360.cn)",
                "360 Bot",
                "Qihoo 360 Spider"
            ],
            "primary_user_agent": "360Spider",
            "robots_token": "360Spider",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "360 Search Chinese search engine web crawler for indexing web content.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://webscan.360.cn.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: 360Spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"360Spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://webscan.360.cn"
                }
            ],
            "updated_at": "2026-04-01T00:55:29Z"
        },
        {
            "id": "accessible-web-bot",
            "name": "Accessible Web Bot",
            "slug": "accessible-web-bot",
            "url": "https://botcrawl.com/bots/accessible-web-bot/",
            "status": "active",
            "operator": "Accessible Web",
            "company": "Accessible Web",
            "family": "Accessible Web",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AccessibleWebBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "AccessibleWebBot/1.0",
                "+https://accessibleweb.com/bot/) HeadlessChrome/81.0.4044.0 Safari/537.36",
                "Mozilla/5.0 (compatible",
                "+https://accessibleweb.com/bot/)"
            ],
            "primary_user_agent": "AccessibleWebBot",
            "robots_token": "AccessibleWebBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Accessible Web Bot crawls customer websites to discover pages and monitor for accessibility violations on regular basis. Crawls are initiated for Accessible Web's \"Page Monitoring\" SaaS product.",
            "short_description": "Accessible Web Bot crawls customer websites to discover pages and monitor for accessibility violations on regular basis.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AccessibleWebBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AccessibleWebBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://accessibleweb.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "accessstatus",
            "name": "AccessStatus",
            "slug": "accessstatus",
            "url": "https://botcrawl.com/bots/accessstatus/",
            "status": "active",
            "operator": "AccessStatus",
            "company": "AccessStatus",
            "family": "AccessStatus",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AccessStatus",
                "AccessStatus/1.0"
            ],
            "primary_user_agent": "AccessStatus",
            "robots_token": "AccessStatus",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AccessStatus is a bot that checks the HTTP status code of a web page. It is used to determine if a URL is active, redirected, or returning an error.",
            "short_description": "AccessStatus is a bot that checks the HTTP status code of a web page.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AccessStatus\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AccessStatus\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.accesslink.fr/page/a-propos-de-accessstatus"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "activecomply-bot",
            "name": "ActiveComply Bot",
            "slug": "activecomply-bot",
            "url": "https://botcrawl.com/bots/activecomply-bot/",
            "status": "active",
            "operator": "ActiveComply",
            "company": "ActiveComply",
            "family": "ActiveComply",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "ActiveComply",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/119.0.6045.9 Safari/537.36 (compatible",
                "ActiveComply/2.0",
                "+https://app.activecomply.com/bot)",
                "ActiveComply crawler"
            ],
            "primary_user_agent": "ActiveComply",
            "robots_token": "ActiveComply",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ActiveComply Bot is a crawler for a service that monitors social media for regulatory compliance. It scans for specific keywords and content for businesses in regulated industries.",
            "short_description": "ActiveComply Bot is a crawler for a service that monitors social media for regulatory compliance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ActiveComply\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ActiveComply\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://app.activecomply.com/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "adagio-bot",
            "name": "Adagio Bot",
            "slug": "adagio-bot",
            "url": "https://botcrawl.com/bots/adagio-bot/",
            "status": "active",
            "operator": "Adagio",
            "company": "Adagio",
            "family": "Adagio",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Adagiobot",
                "Adagiobot/1.0"
            ],
            "primary_user_agent": "Adagiobot",
            "robots_token": "Adagiobot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Adagio demand optimization solutions help publishers leverage unlimited demand sources at unprecedented revenue level, while improving user experience, SPO and carbon footprint.",
            "short_description": "Adagio demand optimization solutions help publishers leverage unlimited demand sources at unprecedented revenue level, while improving user experience, SPO and carbon…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Adagiobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Adagiobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://adagio.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "adbeat_bot",
            "name": "adbeat_bot",
            "slug": "adbeat_bot",
            "url": "https://botcrawl.com/bots/adbeat_bot/",
            "status": "active",
            "operator": "Adbeat",
            "company": "Adbeat",
            "family": "Adbeat",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "adbeat_bot",
                "Mozilla/5.0 (compatible",
                "+https://www.adbeat.com/operation_policy.htm)",
                "Adbeat Bot"
            ],
            "primary_user_agent": "adbeat_bot",
            "robots_token": "adbeat_bot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Adbeat competitive intelligence crawler that tracks and analyzes digital advertising campaigns.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.adbeat.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: adbeat_bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"adbeat_bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.adbeat.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "addsearchbot",
            "name": "AddSearchBot",
            "slug": "addsearchbot",
            "url": "https://botcrawl.com/bots/addsearchbot/",
            "status": "active",
            "operator": "Addsearch",
            "company": "Addsearch",
            "family": "Addsearch",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AddSearchBot",
                "Mozilla/5.0 (compatible",
                "AddSearchBot/1.0",
                "+http://www.addsearch.com/bot/)",
                "+http://www.addsearch.com/bot",
                "info@addsearch.com)",
                "AddSearch crawler"
            ],
            "primary_user_agent": "AddSearchBot",
            "robots_token": "AddSearchBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "As AddSearch adds content from your site to the search, the AddSearch bot gets counted as traffic by most analytics software.",
            "short_description": "As AddSearch adds content from your site to the search, the AddSearch bot gets counted as traffic by most analytics software.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AddSearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AddSearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.addsearch.com/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "addthis",
            "name": "AddThis",
            "slug": "addthis",
            "url": "https://botcrawl.com/bots/addthis/",
            "status": "active",
            "operator": "AddThis",
            "company": "AddThis",
            "family": "AddThis",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AddThis",
                "AddThis.com (http://support.addthis.com/)",
                "AddThis bot"
            ],
            "primary_user_agent": "AddThis",
            "robots_token": "AddThis",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The AddThis bot crawls websites to gather and update content for its website marketing tools. These tools include features like social sharing buttons and content recommendation widgets.",
            "short_description": "The AddThis bot crawls websites to gather and update content for its website marketing tools.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AddThis\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AddThis\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.oracle.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "adkerneltopiccrawler",
            "name": "AdkernelTopicCrawler",
            "slug": "adkerneltopiccrawler",
            "url": "https://botcrawl.com/bots/adkerneltopiccrawler/",
            "status": "active",
            "operator": "Adkernel",
            "company": "Adkernel",
            "family": "Adkernel",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AdkernelTopicCrawler",
                "Mozilla/5.0 (compatible",
                "+https://adkernel.com)",
                "Adkernel Crawler"
            ],
            "primary_user_agent": "AdkernelTopicCrawler",
            "robots_token": "AdkernelTopicCrawler",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Adkernel topic crawler that gathers information to support its ad network and DSP platforms.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://adkernel.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AdkernelTopicCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AdkernelTopicCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://adkernel.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "ads-naver",
            "name": "Ads-Naver",
            "slug": "ads-naver",
            "url": "https://botcrawl.com/bots/ads-naver/",
            "status": "active",
            "operator": "NAVER",
            "company": "NAVER",
            "family": "NAVER",
            "category": "search",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Ads-Naver"
            ],
            "primary_user_agent": "Ads-Naver",
            "robots_token": "Ads-Naver",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "unknown",
            "common_use": "Collects additional advertiser-page information for NAVER ad exposure.",
            "short_description": "NAVER specialist robot that collects additional information for advertiser pages.",
            "verification_method": "Verify the user-agent and confirm the source IP reverse-resolves into .naver.com, then confirm forward DNS matches.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify with the operator’s published guidance before allow-listing.",
            "rules": {
                "robots": "User-agent: Ads-Naver\nDisallow: /",
                "apache": "RewriteCond %{HTTP_USER_AGENT} Ads-Naver [NC]\nRewriteRule .* - [F,L]",
                "nginx": "if ($http_user_agent ~* \"Ads-Naver\") { return 403; }",
                "cloudflare": "(http.user_agent contains \"Ads-Naver\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://searchadvisor.naver.com/guide/seo-basic-firewall"
                }
            ],
            "last_checked": "2026-04-03",
            "updated_at": "2026-04-03T08:22:11Z"
        },
        {
            "id": "adsbot-google-mobile",
            "name": "AdsBot-Google-Mobile",
            "slug": "adsbot-google-mobile",
            "url": "https://botcrawl.com/bots/adsbot-google-mobile/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "AdsBot-Google-Mobile",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible",
                "+http://www.google.com/mobile/adsbot.html)",
                "Google Ads mobile crawler"
            ],
            "primary_user_agent": "AdsBot-Google-Mobile",
            "robots_token": "AdsBot-Google-Mobile",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google Ads crawler for mobile ad quality checks.",
            "verification_method": "Verify as a Google crawler using reverse DNS and Google's published special-crawler IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/special-crawlers.json",
            "rules": {
                "robots": "User-agent: AdsBot-Google-Mobile\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AdsBot-Google-Mobile\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "adsbot-google-mobile-iphone-retired",
            "name": "AdsBot-Google-Mobile (iPhone retired)",
            "slug": "adsbot-google-mobile-iphone-retired",
            "url": "https://botcrawl.com/bots/adsbot-google-mobile-iphone-retired/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML",
                "like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1 (compatible",
                "AdsBot-Google-Mobile",
                "+http://www.google.com/mobile/adsbot.html)",
                "AdsBot mobile iPhone variant",
                "Official"
            ],
            "primary_user_agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)",
            "robots_token": "AdsBot-Google-Mobile",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Retired iPhone-specific AdsBot mobile crawler variant.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AdsBot-Google-Mobile\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:38Z"
        },
        {
            "id": "adsbot-google-mobile-apps",
            "name": "AdsBot-Google-Mobile-Apps",
            "slug": "adsbot-google-mobile-apps",
            "url": "https://botcrawl.com/bots/adsbot-google-mobile-apps/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "AdsBot-Google-Mobile-Apps",
                "Google Ads app crawler",
                "Official"
            ],
            "primary_user_agent": "AdsBot-Google-Mobile-Apps",
            "robots_token": "AdsBot-Google-Mobile-Apps",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Retired Google Ads crawler for Android app page ad-quality checks.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AdsBot-Google-Mobile-Apps\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:36Z"
        },
        {
            "id": "adsnaver",
            "name": "adsnaver",
            "slug": "adsnaver",
            "url": "https://botcrawl.com/bots/adsnaver/",
            "status": "active",
            "operator": "Naver",
            "company": "Naver",
            "family": "Naver",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "adsnaver"
            ],
            "primary_user_agent": "adsnaver",
            "robots_token": "adsnaver",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Collects on-page content from registered ad landing pages for ad matching and ranking.",
            "short_description": "Naver's ad crawler for registered landing pages.",
            "verification_method": "Verified on bots.fyi. Naver states it ignores robots.txt for URLs registered in the ad system.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: adsnaver\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"adsnaver\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/adsnaver"
                },
                {
                    "type": "operator",
                    "url": "https://ads.naver.com/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "adstxtcrawler",
            "name": "AdsTxtCrawler",
            "slug": "adstxtcrawler",
            "url": "https://botcrawl.com/bots/adstxtcrawler/",
            "status": "active",
            "operator": "OneTag Limited",
            "company": "OneTag Limited",
            "family": "OneTag Limited",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "InteractiveAdvertisingBureau",
                "AdsTxtCrawler/1.0",
                "+https://github.com/InteractiveAdvertisingBureau/adstxtcrawler"
            ],
            "primary_user_agent": "InteractiveAdvertisingBureau",
            "robots_token": "InteractiveAdvertisingBureau",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "OneTag is required to crawl ads.txt files from our publishers to verify the presence of the onetag.com line",
            "short_description": "OneTag is required to crawl ads.txt files from our publishers to verify the presence of the onetag.com line",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: InteractiveAdvertisingBureau\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"InteractiveAdvertisingBureau\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.onetag.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "advailo-kuma",
            "name": "Advailo Kuma",
            "slug": "advailo-kuma",
            "url": "https://botcrawl.com/bots/advailo-kuma/",
            "status": "active",
            "operator": "Advailo",
            "company": "Advailo",
            "family": "Advailo",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Advailo Kuma"
            ],
            "primary_user_agent": "Advailo Kuma",
            "robots_token": "Advailo Kuma",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Advailo Kuma uptime monitor is used to monitor webpage availability and performance.",
            "short_description": "The Advailo Kuma uptime monitor is used to monitor webpage availability and performance.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: Advailo Kuma\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Advailo Kuma\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.advailo.com/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "adyen",
            "name": "Adyen",
            "slug": "adyen",
            "url": "https://botcrawl.com/bots/adyen/",
            "status": "active",
            "operator": "Adyen",
            "company": "Adyen",
            "family": "Adyen",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Adyen",
                "Adyen HttpClient 1.0"
            ],
            "primary_user_agent": "Adyen",
            "robots_token": "Adyen",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Adyen webhooks integration sends HTTP requests to inform web servers about payment-related events.",
            "short_description": "The Adyen webhooks integration sends HTTP requests to inform web servers about payment-related events.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Adyen\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Adyen\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.adyen.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "aftership-magento-2-connector",
            "name": "AfterShip Magento 2 Connector",
            "slug": "aftership-magento-2-connector",
            "url": "https://botcrawl.com/bots/aftership-magento-2-connector/",
            "status": "active",
            "operator": "AfterShip",
            "company": "AfterShip",
            "family": "AfterShip",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "AfterShip Magento-2 Connector"
            ],
            "primary_user_agent": "AfterShip Magento-2 Connector",
            "robots_token": "AfterShip Magento-2 Connector",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Use Magento 2 connector for AfterShip to track your orders and get delivery",
            "short_description": "Use Magento 2 connector for AfterShip to track your orders and get delivery",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AfterShip Magento-2 Connector\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AfterShip Magento-2 Connector\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.aftership.com/integrations/magento-2"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:26Z"
        },
        {
            "id": "agency-analytics-crawler",
            "name": "Agency Analytics Crawler",
            "slug": "agency-analytics-crawler",
            "url": "https://botcrawl.com/bots/agency-analytics-crawler/",
            "status": "active",
            "operator": "Agency Analytics",
            "company": "Agency Analytics",
            "family": "Agency Analytics",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "aa/1.0"
            ],
            "primary_user_agent": "aa/1.0",
            "robots_token": "aa/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A web crawler by Agency Analytics that allows their clients to check their own sites for SEO",
            "short_description": "A web crawler by Agency Analytics that allows their clients to check their own sites for SEO",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: aa/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"aa/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://agencyanalytics.com/aa-bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "agencytoolspro-uptime-monitoring-bot",
            "name": "AgencyToolsPro Uptime Monitoring Bot",
            "slug": "agencytoolspro-uptime-monitoring-bot",
            "url": "https://botcrawl.com/bots/agencytoolspro-uptime-monitoring-bot/",
            "status": "active",
            "operator": "AgencyToolsPro",
            "company": "AgencyToolsPro",
            "family": "AgencyToolsPro",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AgencyToolsPro",
                "AgencyToolsPro Uptime Monitoring Bot/1.2.7",
                "https://AgencyToolsPro.com/uptime-monitoring-bot"
            ],
            "primary_user_agent": "AgencyToolsPro",
            "robots_token": "AgencyToolsPro",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website monitoring bot, let's you know that your website is down.",
            "short_description": "Website monitoring bot, let's you know that your website is down.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: AgencyToolsPro\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AgencyToolsPro\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://AgencyToolsPro.com/uptime-monitoring-bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "agi-agent",
            "name": "AGI Agent",
            "slug": "agi-agent",
            "url": "https://botcrawl.com/bots/agi-agent/",
            "status": "active",
            "operator": "The AGI Company",
            "company": "The AGI Company",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "AGI Agent"
            ],
            "primary_user_agent": "AGI Agent",
            "robots_token": "AGI Agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AI assistant agent for taking actions on behalf of users.",
            "verification_method": "Cloudflare verified bot directory mirror; verify against operator agent documentation where needed",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: *\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.theagi.company/agent"
                }
            ],
            "updated_at": "2026-03-31T13:51:08Z"
        },
        {
            "id": "ahrefs-site-audit",
            "name": "Ahrefs Site Audit",
            "slug": "ahrefs-site-audit",
            "url": "https://botcrawl.com/bots/ahrefs-site-audit/",
            "status": "active",
            "operator": "Ahrefs",
            "company": "Ahrefs",
            "family": "Ahrefs",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AhrefsSiteAudit",
                "Mozilla/5.0 (compatible",
                "AhrefsSiteAudit/6.1",
                "+http://ahrefs.com/robot/)",
                "Ahrefs audit crawler"
            ],
            "primary_user_agent": "AhrefsSiteAudit",
            "robots_token": "AhrefsSiteAudit",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AhrefsSiteAudit is used by website owners (paid and free) to look for issues on their websites",
            "short_description": "AhrefsSiteAudit is used by website owners (paid and free) to look for issues on their websites",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://ahrefs.com/robot",
            "rules": {
                "robots": "User-agent: AhrefsSiteAudit\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ahrefs.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "ahrefssiteaudit",
            "name": "Ahrefs Site Audit",
            "slug": "ahrefssiteaudit",
            "url": "https://botcrawl.com/bots/ahrefssiteaudit/",
            "status": "active",
            "operator": "Ahrefs",
            "company": "Ahrefs",
            "family": "Ahrefs",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AhrefsSiteAudit",
                "Mozilla/5.0 (compatible",
                "AhrefsSiteAudit/6.1",
                "+http://ahrefs.com/robot/)",
                "Ahrefs audit crawler"
            ],
            "primary_user_agent": "AhrefsSiteAudit",
            "robots_token": "AhrefsSiteAudit",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "AhrefsSiteAudit is used by website owners (paid and free) to look for issues on their websites",
            "short_description": "AhrefsSiteAudit is used by website owners (paid and free) to look for issues on their websites",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://ahrefs.com/robot",
            "rules": {
                "robots": "User-agent: AhrefsSiteAudit\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AhrefsSiteAudit\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ahrefs.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:22:53Z"
        },
        {
            "id": "ahrefsbot",
            "name": "AhrefsBot",
            "slug": "ahrefsbot",
            "url": "https://botcrawl.com/bots/ahrefsbot/",
            "status": "active",
            "operator": "Ahrefs",
            "company": "Ahrefs",
            "family": "Ahrefs",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AhrefsBot",
                "Mozilla/5.0 (compatible",
                "AhrefsBot/7.0",
                "+http://ahrefs.com/robot/)",
                "Ahrefs crawler"
            ],
            "primary_user_agent": "AhrefsBot",
            "robots_token": "AhrefsBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AhrefsBot is a Web Crawler that powers the 12 trillion link database for Ahrefs online marketing toolset. It constantly crawls web to fill our database with new links and check the status of the previously found ones to provide the most comprehensive and up-to-the-minute data to our users.",
            "short_description": "AhrefsBot is a Web Crawler that powers the 12 trillion link database for Ahrefs online marketing toolset.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://ahrefs.com/robot",
            "rules": {
                "robots": "User-agent: AhrefsBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AhrefsBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://ahrefs.com/robot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "ai-search",
            "name": "AI Search",
            "slug": "ai-search",
            "url": "https://botcrawl.com/bots/ai-search/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-AI-Search",
                "Cloudflare-AI-Search (https://developers.cloudflare.com/ai-search",
                "ai-search@cloudflare.com)"
            ],
            "primary_user_agent": "Cloudflare-AI-Search",
            "robots_token": "Cloudflare-AI-Search",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare AI Search is a managed service that lets you connect your data and build AI-powered search.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot. Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-AI-Search\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-AI-Search\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-ai-search"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:01Z"
        },
        {
            "id": "ai2bot",
            "name": "AI2Bot",
            "slug": "ai2bot",
            "url": "https://botcrawl.com/bots/ai2bot/",
            "status": "active",
            "operator": "Allen Institute for AI",
            "company": "Allen Institute for AI",
            "family": "Allen Institute for AI",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "AI2Bot",
                "Mozilla/5.0 (compatible",
                "+https://allenai.org/crawler)",
                "Ai2Bot",
                "AllenAI Bot"
            ],
            "primary_user_agent": "AI2Bot",
            "robots_token": "AI2Bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Allen Institute for AI crawler used to collect training data for open language models.",
            "verification_method": "Verify the exact user-agent together with the published IP ranges at https://allenai.org/crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://allenai.org/crawler",
            "rules": {
                "robots": "User-agent: AI2Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AI2Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://allenai.org/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:14Z"
        },
        {
            "id": "ai2bot-deepresearcheval",
            "name": "AI2Bot-DeepResearchEval",
            "slug": "ai2bot-deepresearcheval",
            "url": "https://botcrawl.com/bots/ai2bot-deepresearcheval/",
            "status": "active",
            "operator": "Allen Institute for AI",
            "company": "Allen Institute for AI",
            "family": "Allen Institute for AI",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "AI2Bot-DeepResearchEval",
                "Mozilla/5.0 (compatible",
                "+https://allenai.org/crawler)",
                "Ai2 DeepResearch"
            ],
            "primary_user_agent": "AI2Bot-DeepResearchEval",
            "robots_token": "AI2Bot-DeepResearchEval",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Allen Institute for AI deep research evaluation crawler.",
            "verification_method": "Verify the exact user-agent against AI2/Allen Institute for AI crawler documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AI2Bot-DeepResearchEval\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AI2Bot-DeepResearchEval\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://allenai.org/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:14Z"
        },
        {
            "id": "ai2bot-dolma",
            "name": "Ai2Bot-Dolma",
            "slug": "ai2bot-dolma",
            "url": "https://botcrawl.com/bots/ai2bot-dolma/",
            "status": "active",
            "operator": "Allen Institute for AI",
            "company": "Allen Institute for AI",
            "family": "Allen Institute for AI",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "Ai2Bot-Dolma",
                "Mozilla/5.0 (compatible",
                "+https://allenai.org/crawler)",
                "AI2Bot Dolma",
                "Dolma Crawler"
            ],
            "primary_user_agent": "Ai2Bot-Dolma",
            "robots_token": "Ai2Bot-Dolma",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Allen Institute for AI crawler that downloads data to train the Dolma open language model.",
            "verification_method": "Verify the exact user-agent together with the published IP ranges at https://allenai.org/crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://allenai.org/crawler",
            "rules": {
                "robots": "User-agent: Ai2Bot-Dolma\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Ai2Bot-Dolma\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://allenai.org/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:14Z"
        },
        {
            "id": "aihitbot",
            "name": "aiHitBot",
            "slug": "aihitbot",
            "url": "https://botcrawl.com/bots/aihitbot/",
            "status": "active",
            "operator": "aiHit",
            "company": "aiHit",
            "family": "aiHit",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "aiHitBot",
                "Mozilla/5.0 (compatible",
                "+https://www.aihitdata.com/about)",
                "aiHit Bot"
            ],
            "primary_user_agent": "aiHitBot",
            "robots_token": "aiHitBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "aiHit AI/ML automated system that scrapes data for business intelligence purposes.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.aihitdata.com/about.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: aiHitBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"aiHitBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.aihitdata.com/about"
                }
            ],
            "updated_at": "2026-04-01T00:55:27Z"
        },
        {
            "id": "akismet",
            "name": "Akismet",
            "slug": "akismet",
            "url": "https://botcrawl.com/bots/akismet/",
            "status": "active",
            "operator": "Akismet",
            "company": "Akismet",
            "family": "Akismet",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Akismet"
            ],
            "primary_user_agent": "Akismet",
            "robots_token": "Akismet",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The integration API for the Akismet spam filtering service.",
            "short_description": "The integration API for the Akismet spam filtering service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Akismet\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Akismet\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://akismet.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "alertsite-by-smartbear",
            "name": "Alertsite by Smartbear",
            "slug": "alertsite-by-smartbear",
            "url": "https://botcrawl.com/bots/alertsite-by-smartbear/",
            "status": "active",
            "operator": "Smartbear",
            "company": "Smartbear",
            "family": "Smartbear",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AlertSite",
                "Mozilla/4.0 (compatible",
                "MSIE 6.0",
                "Windows NT 5.0",
                ".NET CLR 1.0.3705",
                ".NET CLR 1.1.4322",
                "AlertSite)"
            ],
            "primary_user_agent": "AlertSite",
            "robots_token": "AlertSite",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Alertsite by Smartbear is the HTTP monitoring probe that monitors its customers websites for availability and performance anomalies.",
            "short_description": "Alertsite by Smartbear is the HTTP monitoring probe that monitors its customers websites for availability and performance anomalies.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AlertSite\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AlertSite\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.smartbear.com/alertsite/docs/monitors/index.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "algolia",
            "name": "Algolia",
            "slug": "algolia",
            "url": "https://botcrawl.com/bots/algolia/",
            "status": "active",
            "operator": "Algolia",
            "company": "Algolia",
            "family": "Algolia",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Algolia Crawler",
                "Algolia Crawler/v2.183.0",
                "Algolia Crawler Renderscript"
            ],
            "primary_user_agent": "Algolia Crawler",
            "robots_token": "Algolia Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Indexes content for Algolia search engine",
            "short_description": "Indexes content for Algolia search engine",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Algolia Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Algolia Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.algolia.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "all-africa-crawler",
            "name": "All Africa Crawler",
            "slug": "all-africa-crawler",
            "url": "https://botcrawl.com/bots/all-africa-crawler/",
            "status": "active",
            "operator": "AllAfrica Global Media",
            "company": "AllAfrica Global Media",
            "family": "AllAfrica Global Media",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "AllAfrica",
                "Mozilla/5.0 (compatible",
                "AllAfrica NewsBot/1.0)"
            ],
            "primary_user_agent": "AllAfrica",
            "robots_token": "AllAfrica",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AllAfrica Global Media produces, aggregates and distributes news from across Africa, relying on agreements with more than 140 news organizations and over 500 other institutions and individuals. The AllAfrica NewsBot scrapes content from sites with whom AllAfrica has written agreements, or whose content is available without licensing restrictions or otherwise freely distributable. In all cases, the author and institution is credited in full.",
            "short_description": "AllAfrica Global Media produces, aggregates and distributes news from across Africa, relying on agreements with more than 140 news organizations and over 500 other…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AllAfrica\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AllAfrica\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://allafrica.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-adbot",
            "name": "Amazon AdBot",
            "slug": "amazon-adbot",
            "url": "https://botcrawl.com/bots/amazon-adbot/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "AmazonAdBot",
                "Mozilla/5.0 (compatible",
                "AmazonAdBot/1.0",
                "+https://adbot.amazon.com)"
            ],
            "primary_user_agent": "AmazonAdBot",
            "robots_token": "AmazonAdBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Amazon AdBot is a crawler used by Amazon's advertising services. It visits advertiser landing pages to ensure they are compliant with advertising policies.",
            "short_description": "Amazon AdBot is a crawler used by Amazon's advertising services.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AmazonAdBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AmazonAdBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://adbot.amazon.com/index.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-ap-northeast",
            "name": "Amazon Bedrock AgentCore Browser (AP Northeast)",
            "slug": "amazon-bedrock-agentcore-browser-ap-northeast",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-ap-northeast/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (AP Northeast)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (AP Northeast)",
            "robots_token": "Amazon Bedrock AgentCore Browser (AP Northeast)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (AP Northeast)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-ap-south",
            "name": "Amazon Bedrock AgentCore Browser (AP South)",
            "slug": "amazon-bedrock-agentcore-browser-ap-south",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-ap-south/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (AP South)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (AP South)",
            "robots_token": "Amazon Bedrock AgentCore Browser (AP South)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (AP South)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-ap-southeast-1",
            "name": "Amazon Bedrock AgentCore Browser (AP Southeast 1)",
            "slug": "amazon-bedrock-agentcore-browser-ap-southeast-1",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-ap-southeast-1/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (AP Southeast 1)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (AP Southeast 1)",
            "robots_token": "Amazon Bedrock AgentCore Browser (AP Southeast 1)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (AP Southeast 1)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-ap-southeast",
            "name": "Amazon Bedrock AgentCore Browser (AP Southeast)",
            "slug": "amazon-bedrock-agentcore-browser-ap-southeast",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-ap-southeast/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (AP Southeast)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (AP Southeast)",
            "robots_token": "Amazon Bedrock AgentCore Browser (AP Southeast)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Verify the user-agent token and validate against the operator documentation or the Cloudflare verified-bot directory when needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (AP Southeast)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/amazon-bedrock-agentcore-browser-ap-southeast-2"
                }
            ],
            "updated_at": "2026-03-31T14:22:59Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-eu-central-1",
            "name": "Amazon Bedrock AgentCore Browser (EU Central 1)",
            "slug": "amazon-bedrock-agentcore-browser-eu-central-1",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-eu-central-1/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (EU Central 1)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (EU Central 1)",
            "robots_token": "Amazon Bedrock AgentCore Browser (EU Central 1)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (EU Central 1)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-eu-west-1",
            "name": "Amazon Bedrock AgentCore Browser (EU West 1)",
            "slug": "amazon-bedrock-agentcore-browser-eu-west-1",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-eu-west-1/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (EU West 1)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (EU West 1)",
            "robots_token": "Amazon Bedrock AgentCore Browser (EU West 1)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (EU West 1)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-us-east-1",
            "name": "Amazon Bedrock AgentCore Browser (US East 1)",
            "slug": "amazon-bedrock-agentcore-browser-us-east-1",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-us-east-1/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (US East 1)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (US East 1)",
            "robots_token": "Amazon Bedrock AgentCore Browser (US East 1)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (US East 1)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:39Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-us-east-2",
            "name": "Amazon Bedrock AgentCore Browser (US East 2)",
            "slug": "amazon-bedrock-agentcore-browser-us-east-2",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-us-east-2/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (US East 2)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (US East 2)",
            "robots_token": "Amazon Bedrock AgentCore Browser (US East 2)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (US East 2)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:40Z"
        },
        {
            "id": "amazon-bedrock-agentcore-browser-us-west-2",
            "name": "Amazon Bedrock AgentCore Browser (US West 2)",
            "slug": "amazon-bedrock-agentcore-browser-us-west-2",
            "url": "https://botcrawl.com/bots/amazon-bedrock-agentcore-browser-us-west-2/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Amazon Bedrock AgentCore Browser (US West 2)"
            ],
            "primary_user_agent": "Amazon Bedrock AgentCore Browser (US West 2)",
            "robots_token": "Amazon Bedrock AgentCore Browser (US West 2)",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AgentCore Browser provides a secure, cloud-based browser that enables AI agents to interact with websites.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Amazon Bedrock AgentCore Browser (US West 2)\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/browser-tool.html"
                }
            ],
            "updated_at": "2026-03-31T14:03:40Z"
        },
        {
            "id": "amazon-buy-for-me",
            "name": "Amazon Buy For Me",
            "slug": "amazon-buy-for-me",
            "url": "https://botcrawl.com/bots/amazon-buy-for-me/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "AmazonBuyForMe",
                "Agent/AmazonBuyForMe"
            ],
            "primary_user_agent": "AmazonBuyForMe",
            "robots_token": "AmazonBuyForMe",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Buy For Me agent places orders on e-commerce websites at the direction of customers.",
            "short_description": "Buy For Me agent places orders on e-commerce websites at the direction of customers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AmazonBuyForMe\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AmazonBuyForMe\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://buyforme.amazon/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-contxbot",
            "name": "Amazon Contxbot",
            "slug": "amazon-contxbot",
            "url": "https://botcrawl.com/bots/amazon-contxbot/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "contxbot",
                "Mozilla/5.0 (compatible",
                "contxbot/1.0)"
            ],
            "primary_user_agent": "contxbot",
            "robots_token": "contxbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Amazon uses a crawler, also known as a spider or a bot to process and index the content of webpages. The Amazon crawler visits your site to determine its content in order to provide relevant ads.",
            "short_description": "Amazon uses a crawler, also known as a spider or a bot to process and index the content of webpages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: contxbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"contxbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://affiliate-program.amazon.com.au/help/node/topic/GT98G5PPRERNVZ2C"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-kendra",
            "name": "Amazon Kendra",
            "slug": "amazon-kendra",
            "url": "https://botcrawl.com/bots/amazon-kendra/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "amazon-kendra-",
                "amazon-kendra-customer-id-[id]",
                "amazon-kendra-web-crawler-*"
            ],
            "primary_user_agent": "amazon-kendra-",
            "robots_token": "amazon-kendra-",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Amazon Kendra is a highly accurate intelligent search service that enables your users to search unstructured data using natural language. It returns specific answers to questions, giving users an experience that's close to interacting with a human expert. It is highly scalable and capable of meeting performance demands, tightly integrated with other AWS services such as Amazon S3 and Amazon Lex, and offers enterprise-grade security.",
            "short_description": "Amazon Kendra is a highly accurate intelligent search service that enables your users to search unstructured data using natural language.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: amazon-kendra-\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"amazon-kendra-\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/kendra/latest/dg/what-is-kendra.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-q",
            "name": "Amazon Q",
            "slug": "amazon-q",
            "url": "https://botcrawl.com/bots/amazon-q/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "amazon-QBusiness",
                "amazon-q",
                "Amazon Q Business Web Crawler"
            ],
            "primary_user_agent": "amazon-QBusiness",
            "robots_token": "amazon-QBusiness",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Amazon Q Business uses a web crawler to index authorized websites for enterprise search and assistant features.",
            "short_description": "Amazon Q Business's web crawler for user-configured website indexing.",
            "verification_method": "Use the documented robots.txt token amazon-QBusiness from AWS documentation. Validate identifiers before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: amazon-QBusiness\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"amazon-QBusiness\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.aws.amazon.com/amazonq/latest/qbusiness-ug/stop-web-crawler.html"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazon-route-53-health-check-service",
            "name": "Amazon Route 53 Health Check Service",
            "slug": "amazon-route-53-health-check-service",
            "url": "https://botcrawl.com/bots/amazon-route-53-health-check-service/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "amazon-route-53-health-check-service",
                "Amazon Route 53 Health Check Service"
            ],
            "primary_user_agent": "amazon-route-53-health-check-service",
            "robots_token": "amazon-route-53-health-check-service",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Amazon Route 53 Health Check Service monitoring bot.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"amazon-route-53-health-check-service\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/amazon-route-53-health-check-service"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amazonbot",
            "name": "Amazonbot",
            "slug": "amazonbot",
            "url": "https://botcrawl.com/bots/amazonbot/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "ai",
            "kind": "crawler",
            "purpose": "training",
            "identity_type": "official-documented",
            "user_agents": [
                "Amazonbot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Amazonbot/0.1",
                "+https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36",
                "Amazon AI crawler"
            ],
            "primary_user_agent": "Amazonbot",
            "robots_token": "Amazonbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Amazonbot is Amazon's web crawler used to improve our services, such as enabling Alexa to answer even more questions for customers. Amazonbot is a polite crawler that respects standard robots.txt rules and robots meta tags.",
            "short_description": "Amazon crawler for product improvement and possible AI model training.",
            "verification_method": "Verify the user-agent together with Amazon's published IP ranges for Amazonbot.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developer.amazon.com/amazonbot/ip-addresses/",
            "rules": {
                "robots": "User-agent: Amazonbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Amazonbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.amazon.com/amazonbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amzn-searchbot",
            "name": "Amzn-SearchBot",
            "slug": "amzn-searchbot",
            "url": "https://botcrawl.com/bots/amzn-searchbot/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "Amzn-SearchBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Amzn-SearchBot/0.1) Chrome/119.0.6045.214 Safari/537.36",
                "Amazon search bot"
            ],
            "primary_user_agent": "Amzn-SearchBot",
            "robots_token": "Amzn-SearchBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Amazon search crawler for Alexa, Rufus, and related search experiences.",
            "verification_method": "Verify the user-agent together with Amazon's published IP ranges for Amzn-SearchBot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developer.amazon.com/amazonbot/ip-addresses/",
            "rules": {
                "robots": "User-agent: Amzn-SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Amzn-SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.amazon.com/amazonbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "amzn-user",
            "name": "Amzn-User",
            "slug": "amzn-user",
            "url": "https://botcrawl.com/bots/amzn-user/",
            "status": "active",
            "operator": "Amazon",
            "company": "Amazon",
            "family": "Amazon",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Amzn-User",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Amzn-User/0.1) Chrome/119.0.6045.214 Safari/537.36",
                "Amazon user fetcher"
            ],
            "primary_user_agent": "Amzn-User",
            "robots_token": "Amzn-User",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "User-requested Amazon fetcher for live answers.",
            "verification_method": "Verify the user-agent together with Amazon's published IP ranges for Amzn-User.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developer.amazon.com/amazonbot/ip-addresses/",
            "rules": {
                "robots": "User-agent: Amzn-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Amzn-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.amazon.com/amazonbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "anchor-browser",
            "name": "Anchor Browser",
            "slug": "anchor-browser",
            "url": "https://botcrawl.com/bots/anchor-browser/",
            "status": "active",
            "operator": "Anchor",
            "company": "Anchor",
            "family": "Anchor",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Anchor Browser"
            ],
            "primary_user_agent": "Anchor Browser",
            "robots_token": "Anchor Browser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cloud-based browser automation for AI agents.",
            "short_description": "The Web Browser for AI Agents.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Anchor Browser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Anchor Browser\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.anchorbrowser.io/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "apify-website-content-crawler",
            "name": "Apify Website Content Crawler",
            "slug": "apify-website-content-crawler",
            "url": "https://botcrawl.com/bots/apify-website-content-crawler/",
            "status": "active",
            "operator": "Apify",
            "company": "Apify",
            "family": "Apify",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "ApifyWebsiteContentCrawler",
                "ApifyWebsiteContentCrawler/1.0 (+https://apify.com/apify/website-content-crawler)",
                "Apify Website Content Crawler"
            ],
            "primary_user_agent": "ApifyWebsiteContentCrawler",
            "robots_token": "ApifyWebsiteContentCrawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website content crawling and extraction for AI applications.",
            "short_description": "Crawl websites and extract content to feed AI apps.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ApifyWebsiteContentCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ApifyWebsiteContentCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://apify.com/apify/website-content-crawler"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "apis-google",
            "name": "APIs-Google",
            "slug": "apis-google",
            "url": "https://botcrawl.com/bots/apis-google/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)",
                "Google APIs crawler",
                "Official"
            ],
            "primary_user_agent": "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)",
            "robots_token": "APIs-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Google special-case crawler for API push-notification delivery.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: APIs-Google\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:36Z"
        },
        {
            "id": "appcastbot",
            "name": "AppcastBot",
            "slug": "appcastbot",
            "url": "https://botcrawl.com/bots/appcastbot/",
            "status": "active",
            "operator": "Appcast",
            "company": "Appcast",
            "family": "Appcast",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "AppcastParsingBot",
                "AppcastBot"
            ],
            "primary_user_agent": "AppcastParsingBot",
            "robots_token": "AppcastParsingBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Scrapes client job postings for Appcast services.",
            "short_description": "Used for scraping client job postings.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AppcastParsingBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AppcastParsingBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.appcast.io"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "apple-app-site-association",
            "name": "Apple App Site Association",
            "slug": "apple-app-site-association",
            "url": "https://botcrawl.com/bots/apple-app-site-association/",
            "status": "active",
            "operator": "Apple",
            "company": "Apple",
            "family": "Apple",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "AASA-Bot",
                "AASA-Bot/1.0.0",
                "AASA-Bot/"
            ],
            "primary_user_agent": "AASA-Bot",
            "robots_token": "AASA-Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Apple App Site Association is used to support \"Universal Links\" that can open in native iOS apps. The bot requests a specific path for a given hostname, which returns metadata that associates certain URL patterns with native iOS apps.",
            "short_description": "The Apple App Site Association is used to support \"Universal Links\" that can open in native iOS apps.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AASA-Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AASA-Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.apple.com/library/archive/documentation/General/Conceptual/AppSearch/UniversalLinks.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "apple-podcasts",
            "name": "Apple Podcasts",
            "slug": "apple-podcasts",
            "url": "https://botcrawl.com/bots/apple-podcasts/",
            "status": "active",
            "operator": "Apple",
            "company": "Apple",
            "family": "Apple",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "iTMS",
                "apple-podcasts",
                "Apple Podcasts"
            ],
            "primary_user_agent": "iTMS",
            "robots_token": "iTMS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Fetches URLs associated with registered Apple Podcasts content.",
            "short_description": "Apple Podcasts crawler for registered podcast content.",
            "verification_method": "Use Apple's documented iTMS user-agent; Apple states it does not follow robots.txt for Apple Podcasts crawling.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: iTMS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"iTMS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.apple.com/en-us/119829"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "applebot",
            "name": "Applebot",
            "slug": "applebot",
            "url": "https://botcrawl.com/bots/applebot/",
            "status": "active",
            "operator": "Apple",
            "company": "Apple",
            "family": "Apple",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Applebot",
                "Mozilla/5.0 (Device",
                "OS_version) AppleWebKit/WebKit_version (KHTML",
                "like Gecko) Version/Safari_version [Mobile/Mobile_version] Safari/WebKit_version (Applebot/Applebot_version",
                "+http://www.apple.com/go/applebot)",
                "Apple search crawler"
            ],
            "primary_user_agent": "Applebot",
            "robots_token": "Applebot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Applebot data is used to power various features, such as the search technology that is integrated into many user experiences in Appleʼs ecosystem including Spotlight, Siri, and Safari.",
            "short_description": "Apple crawler for search and related Apple features.",
            "verification_method": "Verify reverse DNS in the applebot.apple.com domain or match IPs against Apple's Applebot CIDR JSON.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://search.developer.apple.com/applebot.json",
            "rules": {
                "robots": "User-agent: Applebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Applebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.apple.com/en-us/119829"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:27Z"
        },
        {
            "id": "applebot-extended",
            "name": "Applebot-Extended",
            "slug": "applebot-extended",
            "url": "https://botcrawl.com/bots/applebot-extended/",
            "status": "active",
            "operator": "Apple",
            "company": "Apple",
            "family": "Apple",
            "category": "ai",
            "kind": "control-token",
            "purpose": "control-token",
            "identity_type": "official-documented",
            "user_agents": [
                "Applebot-Extended",
                "Apple AI opt-out token"
            ],
            "primary_user_agent": "Applebot-Extended",
            "robots_token": "Applebot-Extended",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Apple control token for foundation-model training opt-out.",
            "verification_method": "Control token only; no separate crawl traffic is expected.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Applebot-Extended\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Applebot-Extended\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.apple.com/en-us/119829"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "area-360",
            "name": "Area 360",
            "slug": "area-360",
            "url": "https://botcrawl.com/bots/area-360/",
            "status": "active",
            "operator": "Area360",
            "company": "Area360",
            "family": "Area360",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "A360-Search",
                "A360-Search/1.0"
            ],
            "primary_user_agent": "A360-Search",
            "robots_token": "A360-Search",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Area 360 property search and analytics",
            "short_description": "Area 360 property search and analytics",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: A360-Search\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"A360-Search\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://area360.uk"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "arquivo-web-crawler",
            "name": "Arquivo Web Crawler",
            "slug": "arquivo-web-crawler",
            "url": "https://botcrawl.com/bots/arquivo-web-crawler/",
            "status": "active",
            "operator": "Arquivo",
            "company": "Arquivo",
            "family": "Arquivo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Arquivo-web-crawler",
                "Arquivo-web-crawler (compatible",
                "heritrix/3.4.0-20200304 +https://arquivo.pt/faq-crawling)"
            ],
            "primary_user_agent": "Arquivo-web-crawler",
            "robots_token": "Arquivo-web-crawler",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Web crawler archives the Portuguese web",
            "short_description": "Web crawler archives the Portuguese web",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Arquivo-web-crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Arquivo-web-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "artemis-web-crawler",
            "name": "Artemis Web Crawler",
            "slug": "artemis-web-crawler",
            "url": "https://botcrawl.com/bots/artemis-web-crawler/",
            "status": "active",
            "operator": "Artemis",
            "company": "Artemis",
            "family": "Artemis",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "artemis-web-crawler",
                "Artemis Web Crawler"
            ],
            "primary_user_agent": "artemis-web-crawler",
            "robots_token": "artemis-web-crawler",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Feed Fetcher",
            "short_description": "Artemis is a calm web reader with which you can follow websites and blogs.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"artemis-web-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/artemis-web-crawler"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "artemis-web-reader",
            "name": "Artemis Web Reader",
            "slug": "artemis-web-reader",
            "url": "https://botcrawl.com/bots/artemis-web-reader/",
            "status": "active",
            "operator": "capjamesg",
            "company": "capjamesg",
            "family": "capjamesg",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "artemis web reader/",
                "artemis web reader/1.0 - https://artemis.jamesg.blog/bot"
            ],
            "primary_user_agent": "artemis web reader/",
            "robots_token": "artemis web reader/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Artemis is a calm, independently-run, free web reader.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: artemis web reader/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"artemis web reader/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://artemis.jamesg.blog"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "atlassian-jira-webhooks",
            "name": "Atlassian Jira Webhooks",
            "slug": "atlassian-jira-webhooks",
            "url": "https://botcrawl.com/bots/atlassian-jira-webhooks/",
            "status": "active",
            "operator": "Atlassian",
            "company": "Atlassian",
            "family": "Atlassian",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "atlassian-jira-webhooks",
                "Atlassian Jira Webhooks"
            ],
            "primary_user_agent": "atlassian-jira-webhooks",
            "robots_token": "atlassian-jira-webhooks",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "Delivers webhook notifications from Jira Cloud when issues, projects, or other resources change.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"atlassian-jira-webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/atlassian-jira-webhooks"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "atlassian-rovo",
            "name": "Atlassian Rovo",
            "slug": "atlassian-rovo",
            "url": "https://botcrawl.com/bots/atlassian-rovo/",
            "status": "active",
            "operator": "Atlassian",
            "company": "Atlassian",
            "family": "Atlassian",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "rovo",
                "Atlassian Rovo"
            ],
            "primary_user_agent": "rovo",
            "robots_token": "rovo",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "AI Crawler",
            "short_description": "Crawls and indexes web content for Atlassian Rovo's AI-powered search, chat, and agents.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"rovo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/rovo"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "atlassian-bot",
            "name": "atlassian-bot",
            "slug": "atlassian-bot",
            "url": "https://botcrawl.com/bots/atlassian-bot/",
            "status": "active",
            "operator": "Atlassian",
            "company": "Atlassian",
            "family": "atlassian-bot",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "atlassian-bot"
            ],
            "primary_user_agent": "atlassian-bot",
            "robots_token": "atlassian-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "atlassian-bot is a crawler for custom 3P websites that indexes data for rovo search",
            "short_description": "atlassian-bot is a crawler for custom 3P websites that indexes data for rovo search",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: atlassian-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"atlassian-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "attracta",
            "name": "Attracta",
            "slug": "attracta",
            "url": "https://botcrawl.com/bots/attracta/",
            "status": "active",
            "operator": "Attracta",
            "company": "Attracta",
            "family": "Attracta",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Attracta",
                "Mozilla/5.0 (compatible",
                "Attracta)"
            ],
            "primary_user_agent": "Attracta",
            "robots_token": "Attracta",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Attracta bot is analyzes user website content as part of Attracta's SEO services",
            "short_description": "The Attracta bot is analyzes user website content as part of Attracta's SEO services",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Attracta\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Attracta\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.attracta.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "audisto-crawler",
            "name": "Audisto Crawler",
            "slug": "audisto-crawler",
            "url": "https://botcrawl.com/bots/audisto-crawler/",
            "status": "active",
            "operator": "Audisto",
            "company": "Audisto",
            "family": "Audisto",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Audisto Crawler",
                "Audisto Crawler (desktop",
                "+https://audisto.com/bot)",
                "Audisto Crawler (mobile",
                "essential",
                "Audisto crawler"
            ],
            "primary_user_agent": "Audisto Crawler",
            "robots_token": "Audisto Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Audisto Crawler fetches all accessible URLs of a website. Audisto provides a service to audit and monitor websites for its customers. More information about the crawler is available here: https://audisto.com/bot",
            "short_description": "Audisto Crawler fetches all accessible URLs of a website.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Audisto Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Audisto Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://audisto.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "authory",
            "name": "Authory",
            "slug": "authory",
            "url": "https://botcrawl.com/bots/authory/",
            "status": "active",
            "operator": "Authory",
            "company": "Authory",
            "family": "Authory",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Authory",
                "Mozilla/5.0 (compatible",
                "Authory/1.0",
                "+https://authory.com/)",
                "Authory/"
            ],
            "primary_user_agent": "Authory",
            "robots_token": "Authory",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Authory bot visits websites to back up articles on behalf of journalists and other writers who use the service.",
            "short_description": "The Authory bot visits websites to back up articles on behalf of journalists and other writers who use the service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Authory\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Authory\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://authory.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "automaton",
            "name": "Automaton",
            "slug": "automaton",
            "url": "https://botcrawl.com/bots/automaton/",
            "status": "active",
            "operator": "Automaton",
            "company": "Automaton",
            "family": "Automaton",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AutomatonHeadlessChrome"
            ],
            "primary_user_agent": "AutomatonHeadlessChrome",
            "robots_token": "AutomatonHeadlessChrome",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "An end-to-end campaign and integration testing tool created to optimize your marketing, advertising and sales technology stack by ensuring setups are running as they should be.",
            "short_description": "An end-to-end campaign and integration testing tool created to optimize your marketing, advertising and sales technology stack by ensuring setups are running as they…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AutomatonHeadlessChrome\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AutomatonHeadlessChrome\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.automatoninc.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "awariobot",
            "name": "AwarioBot",
            "slug": "awariobot",
            "url": "https://botcrawl.com/bots/awariobot/",
            "status": "active",
            "operator": "Awario",
            "company": "Awario",
            "family": "Awario",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "AwarioBot",
                "Mozilla/5.0 (compatible",
                "AwarioBot/1.0",
                "+https://awario.com/bots.html)",
                "Awario Crawler"
            ],
            "primary_user_agent": "AwarioBot",
            "robots_token": "AwarioBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Awario brand monitoring crawler that crawls billions of web pages daily to gather brand mentions.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://awario.com/bots.html.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AwarioBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AwarioBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://awario.com/bots.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "awariosmartbot",
            "name": "AwarioSmartBot",
            "slug": "awariosmartbot",
            "url": "https://botcrawl.com/bots/awariosmartbot/",
            "status": "active",
            "operator": "Awario",
            "company": "Awario",
            "family": "Awario",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Awario",
                "AwarioSmartBot/1.0 (+https://awario.com/bots.html",
                "bots@awario.com)",
                "AwarioRssBot/1.0 (+https://awario.com/bots.html",
                "AwarioRssBot",
                "Awario crawler"
            ],
            "primary_user_agent": "Awario",
            "robots_token": "Awario",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AwarioSmartBot is a web crawlers sent by Awario to discover and collect new and updated web data (that is further used by Internet marketers from all over the world).",
            "short_description": "AwarioSmartBot is a web crawlers sent by Awario to discover and collect new and updated web data (that is further used by Internet marketers from all over the world).",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Awario\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Awario\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://awario.com/bots.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "azureai-searchbot",
            "name": "AzureAI-SearchBot",
            "slug": "azureai-searchbot",
            "url": "https://botcrawl.com/bots/azureai-searchbot/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "ai",
            "kind": "unknown",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "AzureAI-SearchBot",
                "Mozilla/5.0 (compatible",
                "AzureAI-SearchBot/1.0",
                "+https://azure.microsoft.com/en-us/products/ai-services/ai-search/)",
                "Azure Search Bot",
                "Microsoft AI Search"
            ],
            "primary_user_agent": "AzureAI-SearchBot",
            "robots_token": "AzureAI-SearchBot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Microsoft Azure AI search crawler that indexes web content for Azure AI search services.",
            "verification_method": "Verify the exact user-agent and confirm Microsoft ownership using Bing's published bot verification guidance.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: AzureAI-SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AzureAI-SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://azure.microsoft.com/en-us/products/ai-services/ai-search/"
                }
            ],
            "updated_at": "2026-04-01T00:55:18Z"
        },
        {
            "id": "baidu-ads-server-proxy",
            "name": "Baidu ADS Server Proxy",
            "slug": "baidu-ads-server-proxy",
            "url": "https://botcrawl.com/bots/baidu-ads-server-proxy/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baidu-ADS"
            ],
            "primary_user_agent": "Baidu-ADS",
            "robots_token": "Baidu-ADS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Baidu's scrubbing proxy.",
            "short_description": "Baidu's scrubbing proxy.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Baidu-ADS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baidu-ADS\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "baiduspider",
            "name": "Baiduspider",
            "slug": "baiduspider",
            "url": "https://botcrawl.com/bots/baiduspider/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML",
                "like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible",
                "Baiduspider-render/2.0",
                "+http://www.baidu.com/search/spider.html)"
            ],
            "primary_user_agent": "Baiduspider",
            "robots_token": "Baiduspider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Baiduspider is the search engine crawler for the search engine Baidu.",
            "short_description": "Baiduspider is the search engine crawler for the search engine Baidu.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Baiduspider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://help.baidu.com/question?prod_id=99&amp;class=0&amp;id=3001"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "baiduspider-favo",
            "name": "Baiduspider-favo",
            "slug": "baiduspider-favo",
            "url": "https://botcrawl.com/bots/baiduspider-favo/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "unknown",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider-favo",
                "Mozilla/5.0 (compatible",
                "Baiduspider-favo/2.0",
                "+http://www.baidu.com/search/spider.html)",
                "Baidu Favicon Bot"
            ],
            "primary_user_agent": "Baiduspider-favo",
            "robots_token": "Baiduspider-favo",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Baidu favicon crawler that downloads site favicons for Baidu search results.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS/ownership against Baidu's published crawler guidance where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Baiduspider-favo\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider-favo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.baidu.com/search/spider.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "baiduspider-image",
            "name": "Baiduspider-image",
            "slug": "baiduspider-image",
            "url": "https://botcrawl.com/bots/baiduspider-image/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider-image",
                "Mozilla/5.0 (compatible",
                "Baiduspider-image/2.0",
                "+http://www.baidu.com/search/spider.html)",
                "Baidu Image Bot"
            ],
            "primary_user_agent": "Baiduspider-image",
            "robots_token": "Baiduspider-image",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Baidu image search crawler that indexes images for Baidu Image Search.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS/ownership against Baidu's published crawler guidance where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Baiduspider-image\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider-image\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.baidu.com/search/spider.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "baiduspider-news",
            "name": "Baiduspider-news",
            "slug": "baiduspider-news",
            "url": "https://botcrawl.com/bots/baiduspider-news/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider-news",
                "Mozilla/5.0 (compatible",
                "Baiduspider-news/2.0",
                "+http://www.baidu.com/search/spider.html)",
                "Baidu News Bot"
            ],
            "primary_user_agent": "Baiduspider-news",
            "robots_token": "Baiduspider-news",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Baidu news crawler that indexes news articles for Baidu News.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS/ownership against Baidu's published crawler guidance where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Baiduspider-news\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider-news\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.baidu.com/search/spider.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "baiduspider-render",
            "name": "Baiduspider-render",
            "slug": "baiduspider-render",
            "url": "https://botcrawl.com/bots/baiduspider-render/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider-render",
                "Mozilla/5.0 (compatible",
                "Baiduspider-render/2.0",
                "+http://www.baidu.com/search/spider.html)",
                "Baidu Render Bot"
            ],
            "primary_user_agent": "Baiduspider-render",
            "robots_token": "Baiduspider-render",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Baidu rendering crawler that renders JavaScript-heavy pages for Baidu indexing.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS/ownership against Baidu's published crawler guidance where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Baiduspider-render\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider-render\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.baidu.com/search/spider.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "baiduspider-video",
            "name": "Baiduspider-video",
            "slug": "baiduspider-video",
            "url": "https://botcrawl.com/bots/baiduspider-video/",
            "status": "active",
            "operator": "Baidu",
            "company": "Baidu",
            "family": "Baidu",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Baiduspider-video",
                "Mozilla/5.0 (compatible",
                "Baiduspider-video/2.0",
                "+http://www.baidu.com/search/spider.html)",
                "Baidu Video Bot"
            ],
            "primary_user_agent": "Baiduspider-video",
            "robots_token": "Baiduspider-video",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Baidu video search crawler that indexes video content for Baidu Video Search.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS/ownership against Baidu's published crawler guidance where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Baiduspider-video\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Baiduspider-video\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.baidu.com/search/spider.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "barkrowler",
            "name": "Barkrowler",
            "slug": "barkrowler",
            "url": "https://botcrawl.com/bots/barkrowler/",
            "status": "active",
            "operator": "Babbar",
            "company": "Babbar",
            "family": "Babbar",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Barkrowler",
                "Mozilla/5.0 (compatible",
                "Barkrowler/0.9",
                "+https://babbar.tech/crawler)",
                "Barkrowler/"
            ],
            "primary_user_agent": "Barkrowler",
            "robots_token": "Barkrowler",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SEO web crawler to identify web page popularity and thematic",
            "short_description": "SEO web crawler to identify web page popularity and thematic",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Barkrowler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Barkrowler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://babbar.tech/crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "bazqux",
            "name": "Bazqux",
            "slug": "bazqux",
            "url": "https://botcrawl.com/bots/bazqux/",
            "status": "active",
            "operator": "Bazqux",
            "company": "Bazqux",
            "family": "Bazqux",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "BazQux",
                "Mozilla/5.0 (compatible",
                "BazQux/2.4",
                "+https://bazqux.com/fetcher",
                "1 subscribers)"
            ],
            "primary_user_agent": "BazQux",
            "robots_token": "BazQux",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Bazqux Fetcher is how BazQux Reader grabs RSS/Atom feeds and com­ments when users choose to sub­scribe to your blog in BazQux Reader. Fetcher col­lects and pe­ri­od­i­cally re­freshes these user-ini­ti­ated feeds.",
            "short_description": "The Bazqux Fetcher is how BazQux Reader grabs RSS/Atom feeds and com­ments when users choose to sub­scribe to your blog in BazQux Reader.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BazQux\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BazQux\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bazqux.com/fetcher"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "bestchange-bot",
            "name": "BestChange Bot",
            "slug": "bestchange-bot",
            "url": "https://botcrawl.com/bots/bestchange-bot/",
            "status": "active",
            "operator": "BestChange",
            "company": "BestChange",
            "family": "BestChange",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "BestChange",
                "BestChange Bot",
                "https://www.bestchange.com/"
            ],
            "primary_user_agent": "BestChange",
            "robots_token": "BestChange",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The BestChange bot downloads exchange rate information from 600 websites every 5 seconds.",
            "short_description": "The BestChange bot downloads exchange rate information from 600 websites every 5 seconds.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BestChange\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BestChange\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bestchange.com/wiki/rates.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "better-uptime",
            "name": "Better Uptime",
            "slug": "better-uptime",
            "url": "https://botcrawl.com/bots/better-uptime/",
            "status": "active",
            "operator": "Better Stack",
            "company": "Better Stack",
            "family": "Better Stack",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Better Uptime Bot",
                "Better Uptime Bot Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/74.0.3729.169 Safari/537.36",
                "Better Stack monitoring bot"
            ],
            "primary_user_agent": "Better Uptime Bot",
            "robots_token": "Better Uptime Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The bot is used for monitoring infrastructure platforms.",
            "short_description": "The bot is used for monitoring infrastructure platforms.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Better Uptime Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Better Uptime Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://betteruptime.com/faq"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "bibliotheque-nacional-de-france-crawler",
            "name": "Bibliotheque Nacional de France Crawler",
            "slug": "bibliotheque-nacional-de-france-crawler",
            "url": "https://botcrawl.com/bots/bibliotheque-nacional-de-france-crawler/",
            "status": "active",
            "operator": "Bibliothèque nationale de France",
            "company": "Bibliothèque nationale de France",
            "family": "Bibliothèque nationale de France",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "bnf.fr_bot",
                "Mozilla/5.0 (compatible",
                "+https://www.bnf.fr/fr/capture-de-votre-site-web-par-le-robot-de-la-bnf)",
                "Bibliothèque nationale de France crawler",
                "BnF crawler"
            ],
            "primary_user_agent": "bnf.fr_bot",
            "robots_token": "bnf.fr_bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bibliothèque nationale de France's mission is to collect, catalog, preserve, enrich and communicate the national documentary heritage.",
            "short_description": "Bibliothèque nationale de France's mission is to collect, catalog, preserve, enrich and communicate the national documentary heritage.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: bnf.fr_bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bnf.fr_bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bnf.fr"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "big-sur-ai",
            "name": "Big Sur AI",
            "slug": "big-sur-ai",
            "url": "https://botcrawl.com/bots/big-sur-ai/",
            "status": "active",
            "operator": "Big Sur AI",
            "company": "Big Sur AI",
            "family": "Big Sur AI",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "bigsur.ai",
                "bigsur.ai (+https://www.bigsur.ai)",
                "Big Sur AI crawler"
            ],
            "primary_user_agent": "bigsur.ai",
            "robots_token": "bigsur.ai",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Big Sur AI Crawler, crawlers users websites to enable AI-infused experiences",
            "short_description": "Big Sur AI Crawler, crawlers users websites to enable AI-infused experiences",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: bigsur.ai\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bigsur.ai\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.google.com/document/d/1eZXudiuWBY0LnTyNcXkI7QnTYuoRY2NeOfTjMMJ-QzI/edit?tab=t.0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:28Z"
        },
        {
            "id": "bigscootsmonitor",
            "name": "BigScootsMonitor",
            "slug": "bigscootsmonitor",
            "url": "https://botcrawl.com/bots/bigscootsmonitor/",
            "status": "active",
            "operator": "BigScoots",
            "company": "BigScoots",
            "family": "BigScootsMonitor",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "BigScootsMonitor",
                "BigScootsMonitor Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/74.0.3729.169 Safari/537.36"
            ],
            "primary_user_agent": "BigScootsMonitor",
            "robots_token": "BigScootsMonitor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "BigScoots Managed Services Monitor - Uptime",
            "short_description": "BigScoots Managed Services Monitor - Uptime",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BigScootsMonitor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BigScootsMonitor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://monitor.bigscoots.dev"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bigupdata-bot",
            "name": "BigUpData Bot",
            "slug": "bigupdata-bot",
            "url": "https://botcrawl.com/bots/bigupdata-bot/",
            "status": "active",
            "operator": "BigUpData",
            "company": "BigUpData",
            "family": "BigUpData",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "BigUpDataBot"
            ],
            "primary_user_agent": "BigUpDataBot",
            "robots_token": "BigUpDataBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Automates the pipeline of active product deals to advertisement",
            "short_description": "Automates the pipeline of active product deals to advertisement",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BigUpDataBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BigUpDataBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bigupdata.co.uk/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "binarycanary",
            "name": "BinaryCanary",
            "slug": "binarycanary",
            "url": "https://botcrawl.com/bots/binarycanary/",
            "status": "active",
            "operator": "Binary Canary",
            "company": "Binary Canary",
            "family": "Binary Canary",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "http://www.binarycanary.com",
                "Mozilla/5.0 (compatible",
                "Chirp/1.0",
                "+http://www.binarycanary.com)",
                "Chirp"
            ],
            "primary_user_agent": "http://www.binarycanary.com",
            "robots_token": "http://www.binarycanary.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "BinaryCanary monitors websites for availability and performance issues.",
            "short_description": "BinaryCanary monitors websites for availability and performance issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: http://www.binarycanary.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"http://www.binarycanary.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.binarycanary.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bing-ads",
            "name": "Bing Ads",
            "slug": "bing-ads",
            "url": "https://botcrawl.com/bots/bing-ads/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "adidxbot",
                "Mozilla/5.0 (compatible",
                "adidxbot/2.0",
                "+http://www.bing.com/bingbot.htm)",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML",
                "like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible",
                "Bing Ads crawler"
            ],
            "primary_user_agent": "adidxbot",
            "robots_token": "adidxbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AdIdxBot is the crawler used by Bing Ads. AdIdxBot crawls ads and follows the websites from those ads for quality control. Just like Bingbot, AdIdxBot has both “desktop” and “mobile” variants.",
            "short_description": "AdIdxBot is the crawler used by Bing Ads.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: adidxbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"adidxbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bing-preview",
            "name": "Bing Preview",
            "slug": "bing-preview",
            "url": "https://botcrawl.com/bots/bing-preview/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "BingPreview",
                "Mozilla/5.0 (Windows NT 6.1",
                "WOW64) AppleWebKit/534+ (KHTML",
                "like Gecko) BingPreview/1.0b",
                "Mozilla/5.0 (Windows Phone 8.1",
                "ARM",
                "Trident/7.0",
                "Touch",
                "rv:11.0",
                "IEMobile/11.0",
                "NOKIA",
                "Lumia 530) like Gecko BingPreview/1.0b"
            ],
            "primary_user_agent": "BingPreview",
            "robots_token": "BingPreview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "BingPreview generates page snapshots for Bing. Note that BingPreview has desktop and mobile variants.",
            "short_description": "BingPreview generates page snapshots for Bing.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BingPreview\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BingPreview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bingbot",
            "name": "BingBot",
            "slug": "bingbot",
            "url": "https://botcrawl.com/bots/bingbot/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "bingbot",
                "Mozilla/5.0 (compatible",
                "bingbot/2.0",
                "+http://www.bing.com/bingbot.htm)",
                "Mozilla/5.3 (compatible",
                "bingbot/2.1",
                "+http://www.bing.com/bot.html)",
                "bingbot/"
            ],
            "primary_user_agent": "bingbot",
            "robots_token": "bingbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bingbot crawler and handles most of Bing's crawling needs each day.",
            "short_description": "Bingbot crawler and handles most of Bing's crawling needs each day.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://www.bing.com/toolbox/bingbot.json",
            "rules": {
                "robots": "User-agent: bingbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bingbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/bingbot.htm"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bitbucket",
            "name": "Bitbucket",
            "slug": "bitbucket",
            "url": "https://botcrawl.com/bots/bitbucket/",
            "status": "active",
            "operator": "Atlassian",
            "company": "Atlassian",
            "family": "Atlassian",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Bitbucket-Webhooks",
                "Bitbucket-Webhooks/2.0"
            ],
            "primary_user_agent": "Bitbucket-Webhooks",
            "robots_token": "Bitbucket-Webhooks",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bitbucket Webhooks for CI/CD",
            "short_description": "Bitbucket Webhooks for CI/CD",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Bitbucket-Webhooks\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Bitbucket-Webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.atlassian.com/bitbucket-cloud/docs/what-are-the-bitbucket-cloud-ip-addresses-i-should-use-to-configure-my-corporate-firewall/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bl-uk_lddc_bot",
            "name": "bl.uk_lddc_bot",
            "slug": "bl-uk_lddc_bot",
            "url": "https://botcrawl.com/bots/bl-uk_lddc_bot/",
            "status": "active",
            "operator": "British Library",
            "company": "British Library",
            "family": "British Library",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "bl.uk_lddc_bot",
                "Mozilla/5.0 (compatible",
                "+https://www.bl.uk/legal-deposit)",
                "British Library Bot"
            ],
            "primary_user_agent": "bl.uk_lddc_bot",
            "robots_token": "bl.uk_lddc_bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "British Library legal deposit web archiving crawler that preserves UK web content.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://www.bl.uk/legal-deposit.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: bl.uk_lddc_bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bl.uk_lddc_bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bl.uk/legal-deposit"
                }
            ],
            "updated_at": "2026-04-01T00:55:21Z"
        },
        {
            "id": "black-duck-fast-dynamic",
            "name": "Black Duck Fast Dynamic",
            "slug": "black-duck-fast-dynamic",
            "url": "https://botcrawl.com/bots/black-duck-fast-dynamic/",
            "status": "active",
            "operator": "Black Duck",
            "company": "Black Duck",
            "family": "Black Duck",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "FastDAST",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/132.0.0.0 Safari/537.36 FastDAST/1.0"
            ],
            "primary_user_agent": "FastDAST",
            "robots_token": "FastDAST",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fast Dynamic DAST security scanning by Black Duck Software",
            "short_description": "Fast Dynamic DAST security scanning by Black Duck Software",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FastDAST\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FastDAST\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://polaris.blackduck.com/developer/default/documentation/t_ip-ranges"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "blexbot",
            "name": "BLEXBot",
            "slug": "blexbot",
            "url": "https://botcrawl.com/bots/blexbot/",
            "status": "active",
            "operator": "WebMeUp",
            "company": "WebMeUp",
            "family": "WebMeUp",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "BLEXBot",
                "Mozilla/5.0 (compatible",
                "BLEXBot/1.0",
                "+http://webmeup-crawler.com/)"
            ],
            "primary_user_agent": "BLEXBot",
            "robots_token": "BLEXBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SEO PowerSuite Link Explorer (webmeup.com) is the world's freshest backlink index, and the primary source of backlink-related data for the SEO PowerSuite tools. We're dedicated to providing SEOs with the most comprehensive, up-to-date backlink data on the Web.",
            "short_description": "SEO PowerSuite Link Explorer (webmeup.com) is the world's freshest backlink index, and the primary source of backlink-related data for the SEO PowerSuite tools.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BLEXBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BLEXBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://webmeup.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bling-erp",
            "name": "Bling ERP",
            "slug": "bling-erp",
            "url": "https://botcrawl.com/bots/bling-erp/",
            "status": "active",
            "operator": "Bling",
            "company": "Bling",
            "family": "Bling",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "BlingERP"
            ],
            "primary_user_agent": "BlingERP",
            "robots_token": "BlingERP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bling is an online ERP system that integrates with ecommerce platforms.",
            "short_description": "Bling is an online ERP system that integrates with ecommerce platforms.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BlingERP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BlingERP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bling.com.br"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "blockaid",
            "name": "Blockaid",
            "slug": "blockaid",
            "url": "https://botcrawl.com/bots/blockaid/",
            "status": "active",
            "operator": "Blockaid",
            "company": "Blockaid",
            "family": "Blockaid",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "36b1546a5700e52eb2972b3f92b314fa",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/110.0.0.0 Safari/537.36 36b1546a5700e52eb2972b3f92b314fa"
            ],
            "primary_user_agent": "36b1546a5700e52eb2972b3f92b314fa",
            "robots_token": "36b1546a5700e52eb2972b3f92b314fa",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Scanning the internet to find malicious sites that scam crypto users into draining their wallets",
            "short_description": "Scanning the internet to find malicious sites that scam crypto users into draining their wallets",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: 36b1546a5700e52eb2972b3f92b314fa\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"36b1546a5700e52eb2972b3f92b314fa\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.blockaid.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "blogtrottr",
            "name": "Blogtrottr",
            "slug": "blogtrottr",
            "url": "https://botcrawl.com/bots/blogtrottr/",
            "status": "active",
            "operator": "Blogtrottr",
            "company": "Blogtrottr",
            "family": "Blogtrottr",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Blogtrottr",
                "Blogtrottr/2.0"
            ],
            "primary_user_agent": "Blogtrottr",
            "robots_token": "Blogtrottr",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Blogtrottr delivers updates from all of your favourite news, feeds, and blogs directly to your email inbox, giving you the flexibility to stay updated whilst on the go.",
            "short_description": "Blogtrottr delivers updates from all of your favourite news, feeds, and blogs directly to your email inbox, giving you the flexibility to stay updated whilst on the go.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Blogtrottr\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Blogtrottr\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://blogtrottr.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "blogvault",
            "name": "BlogVault",
            "slug": "blogvault",
            "url": "https://botcrawl.com/bots/blogvault/",
            "status": "active",
            "operator": "BlogVault",
            "company": "BlogVault",
            "family": "BlogVault",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "BlogVault",
                "BlogVault/1.0 (+https://blogvault.net)",
                "BlogVault/"
            ],
            "primary_user_agent": "BlogVault",
            "robots_token": "BlogVault",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WordPress services like backup, security, monitoring etc.",
            "short_description": "WordPress services like backup, security, monitoring etc.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BlogVault\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BlogVault\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://blogvault.net/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "blueno",
            "name": "Blueno",
            "slug": "blueno",
            "url": "https://botcrawl.com/bots/blueno/",
            "status": "active",
            "operator": "NAVER",
            "company": "NAVER",
            "family": "NAVER",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Blueno"
            ],
            "primary_user_agent": "Blueno",
            "robots_token": "Blueno",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "unknown",
            "common_use": "Collects summary preview information for links inserted in NAVER products.",
            "short_description": "NAVER preview robot that collects link summary information in editors and similar workflows.",
            "verification_method": "Verify the user-agent and confirm the source IP reverse-resolves into .naver.com, then confirm forward DNS matches.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify with the operator’s published guidance before allow-listing.",
            "rules": {
                "robots": "User-agent: Blueno\nDisallow: /",
                "apache": "RewriteCond %{HTTP_USER_AGENT} Blueno [NC]\nRewriteRule .* - [F,L]",
                "nginx": "if ($http_user_agent ~* \"Blueno\") { return 403; }",
                "cloudflare": "(http.user_agent contains \"Blueno\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://searchadvisor.naver.com/guide/seo-basic-firewall"
                }
            ],
            "last_checked": "2026-04-03",
            "updated_at": "2026-04-03T08:22:11Z"
        },
        {
            "id": "bluesky-link-preview-service",
            "name": "Bluesky Link Preview Service",
            "slug": "bluesky-link-preview-service",
            "url": "https://botcrawl.com/bots/bluesky-link-preview-service/",
            "status": "active",
            "operator": "Bluesky",
            "company": "Bluesky",
            "family": "Bluesky",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Bluesky",
                "Bluesky/"
            ],
            "primary_user_agent": "Bluesky",
            "robots_token": "Bluesky",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bluesky social pulls links in advance to render webpage previews.",
            "short_description": "Bluesky social pulls links in advance to render webpage previews.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Bluesky\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Bluesky\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://blueskyweb.xyz/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bne-es_bot",
            "name": "bne.es_bot",
            "slug": "bne-es_bot",
            "url": "https://botcrawl.com/bots/bne-es_bot/",
            "status": "active",
            "operator": "Biblioteca Nacional de España",
            "company": "Biblioteca Nacional de España",
            "family": "Biblioteca Nacional de España",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "bne.es_bot",
                "Mozilla/5.0 (compatible",
                "+https://www.bne.es)",
                "BNE Bot",
                "Spanish National Library Bot"
            ],
            "primary_user_agent": "bne.es_bot",
            "robots_token": "bne.es_bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Biblioteca Nacional de España web archiving crawler.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://www.bne.es.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: bne.es_bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bne.es_bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bne.es"
                }
            ],
            "updated_at": "2026-04-01T00:55:21Z"
        },
        {
            "id": "boardgameprices-bot",
            "name": "BoardGamePrices Bot",
            "slug": "boardgameprices-bot",
            "url": "https://botcrawl.com/bots/boardgameprices-bot/",
            "status": "active",
            "operator": "KP Software Consult ApS",
            "company": "KP Software Consult ApS",
            "family": "KP Software Consult ApS",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "BoardGamePrices Bot"
            ],
            "primary_user_agent": "BoardGamePrices Bot",
            "robots_token": "BoardGamePrices Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Price comparison site for board games. Need to crawl store pages for participating stores. All stores give permission to be crawled.",
            "short_description": "Price comparison site for board games.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BoardGamePrices Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BoardGamePrices Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://boardgameprices.eu"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "botify",
            "name": "Botify",
            "slug": "botify",
            "url": "https://botcrawl.com/bots/botify/",
            "status": "active",
            "operator": "Botify",
            "company": "Botify",
            "family": "Botify",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "botify",
                "Desktop: Mozilla/5.0 (compatible",
                "http://botify.com)",
                "SiteCrawler",
                "Botify crawler"
            ],
            "primary_user_agent": "botify",
            "robots_token": "botify",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SiteCrawler, part of the Botify Analytics suite, gives enterprise SEO teams the power to evaluate the structure and content of their websites just like a search engine",
            "short_description": "SiteCrawler, part of the Botify Analytics suite, gives enterprise SEO teams the power to evaluate the structure and content of their websites just like a search engine",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: botify\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"botify\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.botify.com/platform/botify-analytics/sitecrawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "brandwatch",
            "name": "Brandwatch",
            "slug": "brandwatch",
            "url": "https://botcrawl.com/bots/brandwatch/",
            "status": "active",
            "operator": "Brandwatch",
            "company": "Brandwatch",
            "family": "Brandwatch",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "magpie-crawler",
                "magpie-crawler/1.1 (U",
                "Linux amd64",
                "en-GB",
                "+http://www.brandwatch.net)",
                "Magpie Crawler"
            ],
            "primary_user_agent": "magpie-crawler",
            "robots_token": "magpie-crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Magpie Crawler indexes content for its soical media monitoring solution.",
            "short_description": "The Magpie Crawler indexes content for its soical media monitoring solution.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: magpie-crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"magpie-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.brandwatch.com/legal/magpie-crawler/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "bravebot",
            "name": "Bravebot",
            "slug": "bravebot",
            "url": "https://botcrawl.com/bots/bravebot/",
            "status": "active",
            "operator": "Brave Software, Inc.",
            "company": "Brave Software, Inc.",
            "family": "Brave Software, Inc.",
            "category": "ai",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "Bravebot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Bravebot/1.0",
                "+https://search.brave.com/help/brave-search-crawler) Chrome/W.X.Y.Z Safari/537.36"
            ],
            "primary_user_agent": "Bravebot",
            "robots_token": "Bravebot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Brave search has a crawler to discover new pages and index their content.",
            "short_description": "Brave search has a crawler to discover new pages and index their content.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Bravebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Bravebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://safe.search.brave.com/help/brave-search-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "brightbot",
            "name": "Brightbot",
            "slug": "brightbot",
            "url": "https://botcrawl.com/bots/brightbot/",
            "status": "active",
            "operator": "Bright Data",
            "company": "Bright Data",
            "family": "Bright Data",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "brightbot"
            ],
            "primary_user_agent": "brightbot",
            "robots_token": "brightbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Brightbot is Bright Data's crawler layer that monitors website health and enforces ethical data collection guardrails.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"brightbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/brightbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "brightedge-bot",
            "name": "BrightEdge Bot",
            "slug": "brightedge-bot",
            "url": "https://botcrawl.com/bots/brightedge-bot/",
            "status": "active",
            "operator": "BrightEdge",
            "company": "BrightEdge",
            "family": "BrightEdge",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "BrightEdge Crawler",
                "BrightEdge Crawler/1.0 (crawler@brightedge.com)"
            ],
            "primary_user_agent": "BrightEdge Crawler",
            "robots_token": "BrightEdge Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Autopilot is an SEO marketing automation tool that includes features for internal linking and image optimization. We crawl customer sites so that we can determine the best links to use on the site and to find images that need to be optimized.",
            "short_description": "Autopilot is an SEO marketing automation tool that includes features for internal linking and image optimization.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BrightEdge Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BrightEdge Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.brightedge.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:29Z"
        },
        {
            "id": "browserbase",
            "name": "Browserbase",
            "slug": "browserbase",
            "url": "https://botcrawl.com/bots/browserbase/",
            "status": "active",
            "operator": "Browserbase",
            "company": "Browserbase",
            "family": "Browserbase",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "browserbase"
            ],
            "primary_user_agent": "browserbase",
            "robots_token": "browserbase",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Browser automation for scraping, form submission, and testing on behalf of customers.",
            "short_description": "Runs headless browser automation on behalf of Browserbase customers.",
            "verification_method": "Verified on bots.fyi. Exact browser user-agent strings can vary, so validate traffic context before hard allow-listing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: browserbase\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"browserbase\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/browserbase"
                },
                {
                    "type": "operator",
                    "url": "https://docs.browserbase.com/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "buffer-link-preview-bot",
            "name": "Buffer Link Preview Bot",
            "slug": "buffer-link-preview-bot",
            "url": "https://botcrawl.com/bots/buffer-link-preview-bot/",
            "status": "active",
            "operator": "buffer.com",
            "company": "buffer.com",
            "family": "buffer.com",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "BufferLinkPreviewBot",
                "BufferLinkPreviewBot/1.0 (+https://scraper.buffer.com/about/bots/link-preview-bot)"
            ],
            "primary_user_agent": "BufferLinkPreviewBot",
            "robots_token": "BufferLinkPreviewBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "When Buffer users share links in their social media posts, their scraper helps create engaging previews.",
            "short_description": "When Buffer users share links in their social media posts, their scraper helps create engaging previews.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BufferLinkPreviewBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BufferLinkPreviewBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://scraper.buffer.com/about/bots/link-preview-bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "bugsnag",
            "name": "BugsNag",
            "slug": "bugsnag",
            "url": "https://botcrawl.com/bots/bugsnag/",
            "status": "active",
            "operator": "Bugs Nag",
            "company": "Bugs Nag",
            "family": "Bugs Nag",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "BugsNag",
                "Insight Hub monitoring bot"
            ],
            "primary_user_agent": "BugsNag",
            "robots_token": "BugsNag",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "BugsNag integration service, now Insight Hub, is used for error and performance monitoring of web applications.",
            "short_description": "BugsNag integration service, now Insight Hub, is used for error and performance monitoring of web applications.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: BugsNag\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"BugsNag\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bugsnag.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "bushbaby",
            "name": "Bushbaby",
            "slug": "bushbaby",
            "url": "https://botcrawl.com/bots/bushbaby/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "security",
            "kind": "crawler",
            "purpose": "unknown",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "bushbaby/",
                "Cloudflare Bushbaby"
            ],
            "primary_user_agent": "bushbaby/",
            "robots_token": "bushbaby/",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Bushbaby is an internal Cloudflare bot used to manage and renew SSL certificates for websites using Cloudflare services.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: bushbaby/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"bushbaby/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/bushbaby"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:15Z"
        },
        {
            "id": "buttondown-rss-feed-parser",
            "name": "Buttondown RSS-Feed-Parser",
            "slug": "buttondown-rss-feed-parser",
            "url": "https://botcrawl.com/bots/buttondown-rss-feed-parser/",
            "status": "active",
            "operator": "Buttondown",
            "company": "Buttondown",
            "family": "Buttondown",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Buttondown",
                "Buttondown RSS-Feed-Parser/1.0 (https://buttondown.com)"
            ],
            "primary_user_agent": "Buttondown",
            "robots_token": "Buttondown",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The bot fetches RSS feeds to import into Buttondown newsletters.",
            "short_description": "The bot fetches RSS feeds to import into Buttondown newsletters.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Buttondown\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Buttondown\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.buttondown.com/rss-to-email#buttondown-says-it-cant-reach-my-rss-feed-but-i-can-access-it-just-fine-in-the-browser"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "caliberbot",
            "name": "CaliberBot",
            "slug": "caliberbot",
            "url": "https://botcrawl.com/bots/caliberbot/",
            "status": "active",
            "operator": "Conductor",
            "company": "Conductor",
            "family": "Conductor",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Caliperbot/1.0",
                "Caliperbot/1.0 (+http://www.conductor.com/caliperbot)"
            ],
            "primary_user_agent": "Caliperbot/1.0",
            "robots_token": "Caliperbot/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Caliperbot crawls Conductor clients' and prospects' websites for HTML feature extraction to power Content Analytics features within our Searchlight web application.",
            "short_description": "Caliperbot crawls Conductor clients' and prospects' websites for HTML feature extraction to power Content Analytics features within our Searchlight web application.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Caliperbot/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Caliperbot/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.conductor.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "capital-one-bot",
            "name": "Capital One Bot",
            "slug": "capital-one-bot",
            "url": "https://botcrawl.com/bots/capital-one-bot/",
            "status": "active",
            "operator": "Capital One",
            "company": "Capital One",
            "family": "Capital One",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "CapitalOneBot",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/104.0.5112.79 Safari/537.36 CapitalOneBot"
            ],
            "primary_user_agent": "CapitalOneBot",
            "robots_token": "CapitalOneBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Capital One Bot crawls dealer websites for getting the usage information for Capital One lead navigator button.",
            "short_description": "Capital One Bot crawls dealer websites for getting the usage information for Capital One lead navigator button.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CapitalOneBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CapitalOneBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.capitalone.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "cartai-agentic-commerce-as-a-service",
            "name": "CartAI Agentic Commerce as a Service",
            "slug": "cartai-agentic-commerce-as-a-service",
            "url": "https://botcrawl.com/bots/cartai-agentic-commerce-as-a-service/",
            "status": "active",
            "operator": "Unknown / Unspecified",
            "company": "Unknown / Unspecified",
            "category": "ai",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "CartAI"
            ],
            "primary_user_agent": "CartAI",
            "robots_token": "CartAI",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "CartAI B2B rails combine execution, payments, loyalty and affiliate networks to facilitate agentic commerce",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: CartAI\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.cartai.ai/docs/introduction"
                }
            ],
            "updated_at": "2026-03-31T13:55:20Z"
        },
        {
            "id": "catchpoint",
            "name": "catchpoint",
            "slug": "catchpoint",
            "url": "https://botcrawl.com/bots/catchpoint/",
            "status": "active",
            "operator": "Catchpoint",
            "company": "Catchpoint",
            "family": "Catchpoint",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Catchpoint",
                "Mozilla/4.0 (compatible",
                "Catchpoint)",
                "Mozilla/5.0 (Linux",
                "Android 4.1.1",
                "Nexus 7 Build/JRO03D",
                "Catchpoint) AppleWebKit/535.19 (KHTML",
                "like Gecko) Chrome/18.0.1025.166 Safari/535.19",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "Catchpoint) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/75.0.3770.90 Safari/537.36"
            ],
            "primary_user_agent": "Catchpoint",
            "robots_token": "Catchpoint",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our mission has remained the same from day one: to prioritize monitoring and observability from the end-user perspective.",
            "short_description": "Our mission has remained the same from day one: to prioritize monitoring and observability from the end-user perspective.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Catchpoint\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Catchpoint\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.catchpoint.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "ccbot",
            "name": "CCBot",
            "slug": "ccbot",
            "url": "https://botcrawl.com/bots/ccbot/",
            "status": "active",
            "operator": "Common Crawl",
            "company": "Common Crawl",
            "family": "Common Crawl",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "CCBot",
                "CCBot/2.0 (https://commoncrawl.org/faq/)",
                "Common Crawl crawler"
            ],
            "primary_user_agent": "CCBot",
            "robots_token": "CCBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Common Crawl dataset crawler with published IP ranges and reverse-DNS verification.",
            "verification_method": "Verify reverse DNS in crawl.commoncrawl.org and match IPs against Common Crawl's published JSON ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://index.commoncrawl.org/ccbot.json",
            "rules": {
                "robots": "User-agent: CCBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CCBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://commoncrawl.org/faq"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "cert-chief",
            "name": "Cert Chief",
            "slug": "cert-chief",
            "url": "https://botcrawl.com/bots/cert-chief/",
            "status": "active",
            "operator": "Chief Tools",
            "company": "Chief Tools",
            "family": "Chief Tools",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CertChief",
                "CertChief/8714fb40 (+https://cert.chief.app)",
                "CertChief/"
            ],
            "primary_user_agent": "CertChief",
            "robots_token": "CertChief",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cert Chief is a certificate monitoring tool that periodically crawl web properties to check their configuration and reports problems and changes when they are detected.",
            "short_description": "Cert Chief is a certificate monitoring tool that periodically crawl web properties to check their configuration and reports problems and changes when they are detected.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CertChief\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CertChief\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cert.chief.app"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "channable",
            "name": "Channable",
            "slug": "channable",
            "url": "https://botcrawl.com/bots/channable/",
            "status": "active",
            "operator": "Channable",
            "company": "Channable",
            "family": "Channable",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "channable/importer",
                "channable/importer/"
            ],
            "primary_user_agent": "channable/importer",
            "robots_token": "channable/importer",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: channable/importer\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"channable/importer\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.channable.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "channel3bot",
            "name": "Channel3Bot",
            "slug": "channel3bot",
            "url": "https://botcrawl.com/bots/channel3bot/",
            "status": "active",
            "operator": "Channel3",
            "company": "Channel3",
            "family": "Channel3",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "channel3bot",
                "Channel3/1.0 (+https://trychannel3.com/channel3bot)",
                "Mozilla/5.0 (compatible",
                "Channel3Bot/1.0",
                "+https://trychannel3.com/channel3bot)"
            ],
            "primary_user_agent": "channel3bot",
            "robots_token": "channel3bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Channel3Bot visits public product pages to index their content, with the aim of driving traffic back to those websites.",
            "short_description": "Channel3Bot visits public product pages to index their content, with the aim of driving traffic back to those websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: channel3bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"channel3bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://trychannel3.com/channel3bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "chargebee",
            "name": "ChargeBee",
            "slug": "chargebee",
            "url": "https://botcrawl.com/bots/chargebee/",
            "status": "active",
            "operator": "ChargeBee",
            "company": "ChargeBee",
            "family": "ChargeBee",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ChargeBee"
            ],
            "primary_user_agent": "ChargeBee",
            "robots_token": "ChargeBee",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Chargebee provides a webhooks integration to notify web severs of payment events.",
            "short_description": "Chargebee provides a webhooks integration to notify web severs of payment events.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ChargeBee\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ChargeBee\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.chargebee.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "chatglm-spider",
            "name": "ChatGLM-Spider",
            "slug": "chatglm-spider",
            "url": "https://botcrawl.com/bots/chatglm-spider/",
            "status": "active",
            "operator": "Zhipu AI",
            "company": "Zhipu AI",
            "family": "Zhipu AI",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "ChatGLM-Spider",
                "Mozilla/5.0 (compatible",
                "ChatGLM-Spider/1.0",
                "+https://chatglm.cn/)",
                "Zhipu Spider",
                "ChatGLM Crawler"
            ],
            "primary_user_agent": "ChatGLM-Spider",
            "robots_token": "ChatGLM-Spider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "block",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Zhipu AI web crawler used for training the ChatGLM large language model.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://chatglm.cn.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ChatGLM-Spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ChatGLM-Spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://chatglm.cn"
                }
            ],
            "updated_at": "2026-04-01T00:55:14Z"
        },
        {
            "id": "chatgpt-agent",
            "name": "ChatGPT agent",
            "slug": "chatgpt-agent",
            "url": "https://botcrawl.com/bots/chatgpt-agent/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ChatGPT agent"
            ],
            "primary_user_agent": "ChatGPT agent",
            "robots_token": "ChatGPT agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Agent that can use its own browser to perform tasks for user.",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ChatGPT agent\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.openai.com/en/articles/11752874-chatgpt-agent"
                }
            ],
            "updated_at": "2026-03-31T14:03:40Z"
        },
        {
            "id": "chatgpt-operator",
            "name": "ChatGPT-Operator",
            "slug": "chatgpt-operator",
            "url": "https://botcrawl.com/bots/chatgpt-operator/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "family": "OpenAI",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "chatgpt-operator"
            ],
            "primary_user_agent": "chatgpt-operator",
            "robots_token": "chatgpt-operator",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "AI Assistant",
            "short_description": "Handles user-initiated requests from ChatGPT Operator accessing external content. It is not used for automated crawling or AI training.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"chatgpt-operator\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/chatgpt-operator"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "chatgpt-user",
            "name": "ChatGPT-User",
            "slug": "chatgpt-user",
            "url": "https://botcrawl.com/bots/chatgpt-user/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "family": "OpenAI",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "ChatGPT-User",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko)",
                "compatible",
                "ChatGPT-User/1.0",
                "+https://openai.com/bot",
                "OpenAI user fetcher"
            ],
            "primary_user_agent": "ChatGPT-User",
            "robots_token": "ChatGPT-User",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "ChatGPT-User is for user actions in ChatGPT and Custom GPTs. When users ask ChatGPT or a CustomGPT a question, it may visit a web page to help answer and include a link to the source in its response.",
            "short_description": "User-initiated OpenAI fetcher for ChatGPT browsing and actions.",
            "verification_method": "Verify the user-agent together with OpenAI's published IP ranges for ChatGPT-User.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://openai.com/chatgpt-user.json",
            "rules": {
                "robots": "User-agent: ChatGPT-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ChatGPT-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.openai.com/api/docs/bots/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "checkly-bot",
            "name": "Checkly Bot",
            "slug": "checkly-bot",
            "url": "https://botcrawl.com/bots/checkly-bot/",
            "status": "active",
            "operator": "Checkly",
            "company": "Checkly",
            "family": "Checkly",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Checkly",
                "Checkly/1.0",
                "Checkly monitor"
            ],
            "primary_user_agent": "Checkly",
            "robots_token": "Checkly",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Checkly is a high-programmability active monitoring solution. We support users in monitoring their websites and APIs. Puppeteer and Playwright (both supported) are browser automation tools that can be used for a variety of tasks. For testing, they really are about E2E/component testing, not unit testing.",
            "short_description": "Checkly is a high-programmability active monitoring solution.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Checkly\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.checklyhq.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "checkly",
            "name": "Checkly Bot",
            "slug": "checkly",
            "url": "https://botcrawl.com/bots/checkly/",
            "status": "active",
            "operator": "Checkly",
            "company": "Checkly",
            "family": "Checkly",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Checkly",
                "Checkly/1.0",
                "Checkly monitor"
            ],
            "primary_user_agent": "Checkly",
            "robots_token": "Checkly",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Checkly is a high-programmability active monitoring solution. We support users in monitoring their websites and APIs. Puppeteer and Playwright (both supported) are browser automation tools that can be used for a variety of tasks. For testing, they really are about E2E/component testing, not unit testing.",
            "short_description": "Checkly is a high-programmability active monitoring solution.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Checkly\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Checkly\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.checklyhq.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:22:57Z"
        },
        {
            "id": "checkmarknetwork",
            "name": "CheckMarkNetwork",
            "slug": "checkmarknetwork",
            "url": "https://botcrawl.com/bots/checkmarknetwork/",
            "status": "active",
            "operator": "CheckMark Network",
            "company": "CheckMark Network",
            "family": "CheckMark Network",
            "category": "security",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CheckMarkNetwork",
                "Mozilla/5.0 (compatible",
                "+https://checkmarknetwork.com)",
                "CheckMark Bot"
            ],
            "primary_user_agent": "CheckMarkNetwork",
            "robots_token": "CheckMarkNetwork",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "CheckMark Network brand protection crawler that tracks trademarks and counterfeit goods online.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://checkmarknetwork.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: CheckMarkNetwork\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CheckMarkNetwork\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://checkmarknetwork.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "checkview",
            "name": "CheckView",
            "slug": "checkview",
            "url": "https://botcrawl.com/bots/checkview/",
            "status": "active",
            "operator": "CheckView.io",
            "company": "CheckView.io",
            "family": "CheckView.io",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CheckView",
                "checkview-24"
            ],
            "primary_user_agent": "CheckView",
            "robots_token": "CheckView",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WordPress automated testing platform",
            "short_description": "WordPress automated testing platform",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: CheckView\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CheckView\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://checkview.io/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "chinasospider",
            "name": "ChinasoSpider",
            "slug": "chinasospider",
            "url": "https://botcrawl.com/bots/chinasospider/",
            "status": "active",
            "operator": "Chinaso",
            "company": "Chinaso",
            "family": "Chinaso",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ChinasoSpider"
            ],
            "primary_user_agent": "ChinasoSpider",
            "robots_token": "ChinasoSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Chinaso crawler token surfaced in baidu property robots.txt.",
            "verification_method": "This token is surfaced in a Baidu-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ChinasoSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ChinasoSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://map.baidu.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:49Z"
        },
        {
            "id": "chrome-privacy-preserving-prefetch-proxy",
            "name": "Chrome Privacy Preserving Prefetch Proxy",
            "slug": "chrome-privacy-preserving-prefetch-proxy",
            "url": "https://botcrawl.com/bots/chrome-privacy-preserving-prefetch-proxy/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "chrome-privacy-preserving-prefetch-proxy",
                "Chrome Privacy Preserving Prefetch Proxy"
            ],
            "primary_user_agent": "chrome-privacy-preserving-prefetch-proxy",
            "robots_token": "chrome-privacy-preserving-prefetch-proxy",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Chrome's privacy-preserving prefetch proxy service that fetches traffic-advice resources to enable prefetch hints.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"chrome-privacy-preserving-prefetch-proxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/chrome-privacy-preserving-prefetch-proxy"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "chrome-lighthouse",
            "name": "Chrome-Lighthouse",
            "slug": "chrome-lighthouse",
            "url": "https://botcrawl.com/bots/chrome-lighthouse/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Chrome-Lighthouse",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML",
                "like Gecko) Chrome/69.0.3464.0 Mobile Safari/537.36 Chrome-Lighthouse"
            ],
            "primary_user_agent": "Chrome-Lighthouse",
            "robots_token": "Chrome-Lighthouse",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Chrome-Lighthouse is an automated, open-source tool for auditing web page quality and does not operate as a traditional web crawler. It runs a series of audits against a given page to generate a report on performance and accessibility.",
            "short_description": "Chrome-Lighthouse is an automated, open-source tool for auditing web page quality and does not operate as a traditional web crawler.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Chrome-Lighthouse\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Chrome-Lighthouse\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/web/tools/lighthouse"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "citibotsitecrawler",
            "name": "CitibotSiteCrawler",
            "slug": "citibotsitecrawler",
            "url": "https://botcrawl.com/bots/citibotsitecrawler/",
            "status": "active",
            "operator": "Citibot",
            "company": "Citibot",
            "family": "CitibotSiteCrawler",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "CitibotSiteCrawler",
                "CitibotSiteCrawler/1.0"
            ],
            "primary_user_agent": "CitibotSiteCrawler",
            "robots_token": "CitibotSiteCrawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "CitibotSiteCrawler collects public data from government websites to power Citibot’s AI civic engagement tools.",
            "short_description": "CitibotSiteCrawler collects public data from government websites to power Citibot’s AI civic engagement tools.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CitibotSiteCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CitibotSiteCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.citibot.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "claude-searchbot",
            "name": "Claude-SearchBot",
            "slug": "claude-searchbot",
            "url": "https://botcrawl.com/bots/claude-searchbot/",
            "status": "active",
            "operator": "Anthropic",
            "company": "Anthropic",
            "family": "Anthropic",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "Claude-SearchBot",
                "Anthropic search bot"
            ],
            "primary_user_agent": "Claude-SearchBot",
            "robots_token": "Claude-SearchBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Anthropic crawler for Claude search indexing and search quality.",
            "verification_method": "Anthropic does not currently publish dedicated IP ranges; use the official token and robots.txt controls.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Claude-SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Claude-SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://privacy.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "claude-user",
            "name": "Claude-User",
            "slug": "claude-user",
            "url": "https://botcrawl.com/bots/claude-user/",
            "status": "active",
            "operator": "Anthropic",
            "company": "Anthropic",
            "family": "Anthropic",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Claude-User",
                "Anthropic user fetcher"
            ],
            "primary_user_agent": "Claude-User",
            "robots_token": "Claude-User",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "User-triggered Anthropic fetcher for Claude browsing.",
            "verification_method": "Anthropic does not currently publish dedicated IP ranges; use the official token and robots.txt controls.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Claude-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Claude-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://privacy.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "claudebot",
            "name": "ClaudeBot",
            "slug": "claudebot",
            "url": "https://botcrawl.com/bots/claudebot/",
            "status": "active",
            "operator": "Anthropic",
            "company": "Anthropic",
            "family": "Anthropic",
            "category": "ai",
            "kind": "crawler",
            "purpose": "training",
            "identity_type": "official-documented",
            "user_agents": [
                "ClaudeBot",
                "Anthropic training crawler"
            ],
            "primary_user_agent": "ClaudeBot",
            "robots_token": "ClaudeBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Anthropic crawler used for model-development data collection.",
            "verification_method": "Anthropic says it does not currently publish IP ranges and may use service-provider public IPs; rely on the official bot token and robots.txt controls.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ClaudeBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ClaudeBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://privacy.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "clearscopebot",
            "name": "ClearscopeBot",
            "slug": "clearscopebot",
            "url": "https://botcrawl.com/bots/clearscopebot/",
            "status": "active",
            "operator": "Mushi Labs",
            "company": "Mushi Labs",
            "family": "Mushi Labs",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Clearscopebot",
                "Mozilla/5.0 (compatible",
                "Clearscopebot/1.0)",
                "Clearscopebot/"
            ],
            "primary_user_agent": "Clearscopebot",
            "robots_token": "Clearscopebot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Clearscope is an AI-driven SEO content optimization platform developed by Mushi Labs. It assists content creators, marketers, and SEO professionals in producing high-quality, search-optimized content by providing real-time keyword recommendations, content grading, and insights into search intent. By analyzing top-performing content, Clearscope offers actionable suggestions to enhance content relevance and visibility in search engine results.",
            "short_description": "Clearscope is an AI-driven SEO content optimization platform developed by Mushi Labs.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Clearscopebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Clearscopebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.clearscope.io/support/articles/clearscopebot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "cledara-saas-management-agent",
            "name": "Cledara SaaS Management Agent",
            "slug": "cledara-saas-management-agent",
            "url": "https://botcrawl.com/bots/cledara-saas-management-agent/",
            "status": "active",
            "operator": "Cledara",
            "company": "Cledara",
            "family": "Cledara",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "CledaraBot",
                "CledaraBot/1.0"
            ],
            "primary_user_agent": "CledaraBot",
            "robots_token": "CledaraBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cledara’s agent automates customer-approved SaaS admin tasks, including invoice collection and user management.",
            "short_description": "Cledara’s agent automates customer-approved SaaS admin tasks, including invoice collection and user management.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: CledaraBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CledaraBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cledara.com"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "clickagy",
            "name": "Clickagy",
            "slug": "clickagy",
            "url": "https://botcrawl.com/bots/clickagy/",
            "status": "active",
            "operator": "Clickagy",
            "company": "Clickagy",
            "family": "Clickagy",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Clickagy Intelligence Bot v2"
            ],
            "primary_user_agent": "Clickagy Intelligence Bot v2",
            "robots_token": "Clickagy Intelligence Bot v2",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Clickagy Intelligence Bot is an ad verification bot for Clickagy.",
            "short_description": "The Clickagy Intelligence Bot is an ad verification bot for Clickagy.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Clickagy Intelligence Bot v2\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.clickagy.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:30Z"
        },
        {
            "id": "clickagy-intelligence-bot",
            "name": "Clickagy",
            "slug": "clickagy-intelligence-bot",
            "url": "https://botcrawl.com/bots/clickagy-intelligence-bot/",
            "status": "active",
            "operator": "Clickagy",
            "company": "Clickagy",
            "family": "Clickagy",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Clickagy Intelligence Bot v2"
            ],
            "primary_user_agent": "Clickagy Intelligence Bot v2",
            "robots_token": "Clickagy Intelligence Bot v2",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "The Clickagy Intelligence Bot is an ad verification bot for Clickagy.",
            "short_description": "The Clickagy Intelligence Bot is an ad verification bot for Clickagy.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Clickagy Intelligence Bot v2\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Clickagy Intelligence Bot v2\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.clickagy.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:22:58Z"
        },
        {
            "id": "cloudflare-browser-rendering",
            "name": "Cloudflare Browser Rendering",
            "slug": "cloudflare-browser-rendering",
            "url": "https://botcrawl.com/bots/cloudflare-browser-rendering/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "category": "ai",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Cloudflare"
            ],
            "primary_user_agent": "Cloudflare",
            "robots_token": "Cloudflare",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Renders web pages in headless browsers for Cloudflare customers. Used for browser automation (screenshots, PDF...",
            "verification_method": "Cloudflare verified bot directory mirror; verify with operator documentation, IP validation, or Web Bot Auth where available",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cloudflare.com"
                }
            ],
            "updated_at": "2026-03-31T13:55:20Z"
        },
        {
            "id": "cloudflare-crawler",
            "name": "Cloudflare Crawler",
            "slug": "cloudflare-crawler",
            "url": "https://botcrawl.com/bots/cloudflare-crawler/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "CloudflareBrowserRenderingCrawler/1.0",
                "CloudflareBrowserRenderingCrawler"
            ],
            "primary_user_agent": "CloudflareBrowserRenderingCrawler/1.0",
            "robots_token": "CloudflareBrowserRenderingCrawler/1.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "The Cloudflare Crawler is a well-behaved crawler used by Cloudflare customers via the Browser Rendering /crawl endpoi...",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: CloudflareBrowserRenderingCrawler/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CloudflareBrowserRenderingCrawler/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-browser-rendering-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:13Z"
        },
        {
            "id": "cloudflare-csup",
            "name": "Cloudflare CSUP",
            "slug": "cloudflare-csup",
            "url": "https://botcrawl.com/bots/cloudflare-csup/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "cloudflare-csup"
            ],
            "primary_user_agent": "cloudflare-csup",
            "robots_token": "cloudflare-csup",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare CSUP is used by Cloudflare customer support for diagnostic purposes. It is not a general web crawler and is used to investigate technical issues with customer websites.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot. Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: cloudflare-csup\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cloudflare-csup\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflarecsup"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:01Z"
        },
        {
            "id": "cloudflare-custom-error-page-crawler",
            "name": "Cloudflare Custom Error Page Crawler",
            "slug": "cloudflare-custom-error-page-crawler",
            "url": "https://botcrawl.com/bots/cloudflare-custom-error-page-crawler/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Collapsify/",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_10",
                "rv:60.0) Gecko/20100101 Firefox/60.0 Collapsify/0.6.0 node/v10.16.0"
            ],
            "primary_user_agent": "Collapsify/",
            "robots_token": "Collapsify/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloulflare internal service that crawls customer error pages in order to serve them directly from our edge network.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Collapsify/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Collapsify/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-collapsify"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-custom-hostname-verification",
            "name": "Cloudflare Custom Hostname Verification",
            "slug": "cloudflare-custom-hostname-verification",
            "url": "https://botcrawl.com/bots/cloudflare-custom-hostname-verification/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare Custom Hostname Verification"
            ],
            "primary_user_agent": "Cloudflare Custom Hostname Verification",
            "robots_token": "Cloudflare Custom Hostname Verification",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare Custom Hostname Verification service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare Custom Hostname Verification\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare Custom Hostname Verification\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-chv"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-diagnostics",
            "name": "Cloudflare Diagnostics",
            "slug": "cloudflare-diagnostics",
            "url": "https://botcrawl.com/bots/cloudflare-diagnostics/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "CloudflareDiagnostics",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/67.0.3396.99 Safari/537.36 CloudflareDiagnostics/1.0"
            ],
            "primary_user_agent": "CloudflareDiagnostics",
            "robots_token": "CloudflareDiagnostics",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare system bot that performs health checks and diagnostic tests",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: CloudflareDiagnostics\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CloudflareDiagnostics\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-diagnostics"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-digicert-dcv",
            "name": "Cloudflare Digicert DCV",
            "slug": "cloudflare-digicert-dcv",
            "url": "https://botcrawl.com/bots/cloudflare-digicert-dcv/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "Digicert DCV"
            ],
            "primary_user_agent": "Digicert DCV",
            "robots_token": "Digicert DCV",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Cloudflare Digicert DCV service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Digicert DCV\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Digicert DCV\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cloudflare.com"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-healthchecks",
            "name": "Cloudflare Healthchecks",
            "slug": "cloudflare-healthchecks",
            "url": "https://botcrawl.com/bots/cloudflare-healthchecks/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-Healthchecks"
            ],
            "primary_user_agent": "Cloudflare-Healthchecks",
            "robots_token": "Cloudflare-Healthchecks",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare Healthchecks service",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-Healthchecks\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-Healthchecks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-health-checks"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-observatory-curly",
            "name": "Cloudflare Observatory Curly",
            "slug": "cloudflare-observatory-curly",
            "url": "https://botcrawl.com/bots/cloudflare-observatory-curly/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "curly-",
                "curly-*"
            ],
            "primary_user_agent": "curly-",
            "robots_token": "curly-",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Synthetic network probes for HTTP timing measurements (TCP, TLS, TTFB). Measures connection timing for customer-owned URLs.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: curly-\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"curly-\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.cloudflare.com/speed/observatory/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-prefetch",
            "name": "Cloudflare Prefetch",
            "slug": "cloudflare-prefetch",
            "url": "https://botcrawl.com/bots/cloudflare-prefetch/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "CloudFlare-Prefetch",
                "Mozilla/5.0 (compatible",
                "CloudFlare-Prefetch/0.1",
                "+http://www.cloudflare.com/)"
            ],
            "primary_user_agent": "CloudFlare-Prefetch",
            "robots_token": "CloudFlare-Prefetch",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "URL prefetching means that Cloudflare pre-populates the cache with content a visitor is likely to request next. This setting leads to a higher cache hit rate...",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: CloudFlare-Prefetch\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CloudFlare-Prefetch\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cloudflare.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:29Z"
        },
        {
            "id": "cloudflare-purge",
            "name": "Cloudflare Purge",
            "slug": "cloudflare-purge",
            "url": "https://botcrawl.com/bots/cloudflare-purge/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-Purge",
                "Cloudflare-Purge/2.0",
                "Cloudflare purge bot"
            ],
            "primary_user_agent": "Cloudflare-Purge",
            "robots_token": "Cloudflare-Purge/",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare bot for the purge service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-Purge/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-Purge\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-purge"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:06Z"
        },
        {
            "id": "cloudflare-radar-url-scanner",
            "name": "Cloudflare Radar URL Scanner",
            "slug": "cloudflare-radar-url-scanner",
            "url": "https://botcrawl.com/bots/cloudflare-radar-url-scanner/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare Radar URL Scanner"
            ],
            "primary_user_agent": "Cloudflare Radar URL Scanner",
            "robots_token": "Cloudflare Radar URL Scanner",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare Radar URL Scanner.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot. Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare Radar URL Scanner\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare Radar URL Scanner\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-radar-url-scanner"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:01Z"
        },
        {
            "id": "cloudflare-speedtest",
            "name": "Cloudflare SpeedTest",
            "slug": "cloudflare-speedtest",
            "url": "https://botcrawl.com/bots/cloudflare-speedtest/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare SpeedTest",
                "Cloudflare speed test bot"
            ],
            "primary_user_agent": "Cloudflare SpeedTest",
            "robots_token": "Cloudflare SpeedTest",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare bot for the SpeedTest service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare SpeedTest\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare SpeedTest\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-speedtest"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:06Z"
        },
        {
            "id": "cloudflare-ssldetector",
            "name": "Cloudflare SSLDetector",
            "slug": "cloudflare-ssldetector",
            "url": "https://botcrawl.com/bots/cloudflare-ssldetector/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-SSLDetector",
                "Cloudflare SSL detector"
            ],
            "primary_user_agent": "Cloudflare-SSLDetector",
            "robots_token": "Cloudflare-SSLDetector",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare bot for SSL/TLS detection checks.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-SSLDetector\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-SSLDetector\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-ssl-detector"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:06Z"
        },
        {
            "id": "cloudflare-stream-webhook",
            "name": "Cloudflare Stream Webhook",
            "slug": "cloudflare-stream-webhook",
            "url": "https://botcrawl.com/bots/cloudflare-stream-webhook/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "agent",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare Stream Webhook",
                "Cloudflare stream webhook bot"
            ],
            "primary_user_agent": "Cloudflare Stream Webhook",
            "robots_token": "Cloudflare Stream Webhook",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare bot for the Stream Webhook service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare Stream Webhook\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare Stream Webhook\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-stream-webhook"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:06Z"
        },
        {
            "id": "cloudflare-validator",
            "name": "Cloudflare Validator",
            "slug": "cloudflare-validator",
            "url": "https://botcrawl.com/bots/cloudflare-validator/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-Validator",
                "Cloudflare-Validator/1.0"
            ],
            "primary_user_agent": "Cloudflare-Validator",
            "robots_token": "Cloudflare-Validator",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare Validator makes requests to verify IPs for the Cloudflare Bots Directory.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot. Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-Validator\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-Validator\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-validator"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:01Z"
        },
        {
            "id": "cloudflare-autorag",
            "name": "Cloudflare-AutoRAG",
            "slug": "cloudflare-autorag",
            "url": "https://botcrawl.com/bots/cloudflare-autorag/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Cloudflare-AutoRAG",
                "Mozilla/5.0 (compatible",
                "+https://developers.cloudflare.com/autorag/)",
                "Cloudflare AutoRAG Bot"
            ],
            "primary_user_agent": "Cloudflare-AutoRAG",
            "robots_token": "Cloudflare-AutoRAG",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Cloudflare AutoRAG web crawler that indexes websites for Cloudflare AI Search.",
            "verification_method": "Verify the exact user-agent against Cloudflare's published bot documentation and confirmed ownership details.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-AutoRAG\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-AutoRAG\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.cloudflare.com/autorag/"
                }
            ],
            "updated_at": "2026-04-01T00:55:18Z"
        },
        {
            "id": "cloudflare-radar",
            "name": "Cloudflare-Radar",
            "slug": "cloudflare-radar",
            "url": "https://botcrawl.com/bots/cloudflare-radar/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Cloudflare-Radar",
                "Mozilla/5.0 (compatible",
                "+https://radar.cloudflare.com)",
                "Cloudflare Radar Bot"
            ],
            "primary_user_agent": "Cloudflare-Radar",
            "robots_token": "Cloudflare-Radar",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Cloudflare Radar crawler that collects data for Cloudflare's internet intelligence platform.",
            "verification_method": "Verify the exact user-agent against Cloudflare's published bot documentation and confirmed ownership details.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-Radar\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-Radar\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "cloudflare-traffic-manager",
            "name": "Cloudflare-Traffic-Manager",
            "slug": "cloudflare-traffic-manager",
            "url": "https://botcrawl.com/bots/cloudflare-traffic-manager/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "cloudflare-verified-bot",
            "user_agents": [
                "Cloudflare-Traffic-Manager",
                "Cloudflare traffic manager bot"
            ],
            "primary_user_agent": "Cloudflare-Traffic-Manager",
            "robots_token": "Cloudflare-Traffic-Manager",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "verified-directory",
            "short_description": "Cloudflare bot for the Traffic Manager service.",
            "verification_method": "Listed by Cloudflare Radar as a verified bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Cloudflare-Traffic-Manager\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudflare-Traffic-Manager\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/cloudflare-traffic-manager"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:06Z"
        },
        {
            "id": "cloudtrellis",
            "name": "Cloudtrellis",
            "slug": "cloudtrellis",
            "url": "https://botcrawl.com/bots/cloudtrellis/",
            "status": "active",
            "operator": "Cloudtrellis",
            "company": "Cloudtrellis",
            "family": "Cloudtrellis",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Cloudtrellis",
                "Cloudtrellis/X.X",
                "Cloudtrellis/1.0",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "+https://www.cloudtrellis.com/robot) Chrome/126.0.6478.126 Safari/537.36",
                "Cloudtrellis/1.0) Chrome/126.0.6478.126 Safari/537.36"
            ],
            "primary_user_agent": "Cloudtrellis",
            "robots_token": "Cloudtrellis",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cloudtrellis automatically scans your entire site for broken links, accessibility issues, and potential SEO improvements",
            "short_description": "Cloudtrellis automatically scans your entire site for broken links, accessibility issues, and potential SEO improvements",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Cloudtrellis\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cloudtrellis\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cloudtrellis.com/robot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cludo",
            "name": "Cludo",
            "slug": "cludo",
            "url": "https://botcrawl.com/bots/cludo/",
            "status": "active",
            "operator": "Cludo",
            "company": "Cludo",
            "family": "Cludo",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "cludo.com",
                "Mozilla/5.0 (Windows NT 10.0",
                "cludo.com bot) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/61.0.3133.0 Safari/537.36",
                "Cludobot"
            ],
            "primary_user_agent": "cludo.com",
            "robots_token": "cludo.com",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cludobot crawls websites to facilitate and provide search and analytics solutions for its customers.",
            "short_description": "Cludobot crawls websites to facilitate and provide search and analytics solutions for its customers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: cludo.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cludo.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cludo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "coc-coc",
            "name": "Cốc Cốc",
            "slug": "coc-coc",
            "url": "https://botcrawl.com/bots/coc-coc/",
            "status": "active",
            "operator": "Coccoc",
            "company": "Coccoc",
            "family": "Coccoc",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "coccocbot",
                "Mozilla/5.0 (compatible",
                "coccocbot-fast/1.0",
                "+http://help.coccoc.com/searchengine)"
            ],
            "primary_user_agent": "coccocbot",
            "robots_token": "coccocbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Coccocbot scrapes websites that are request from the Vietnamese search engine Coc Coc.",
            "short_description": "Coccocbot scrapes websites that are request from the Vietnamese search engine Coc Coc.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: coccocbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"coccocbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://help.coccoc.com/searchengine"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "coccocbot-fast",
            "name": "coccocbot-fast",
            "slug": "coccocbot-fast",
            "url": "https://botcrawl.com/bots/coccocbot-fast/",
            "status": "active",
            "operator": "Coccoc",
            "company": "Coccoc",
            "family": "Coccoc",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "coccocbot-fast",
                "Mozilla/5.0 (compatible",
                "coccocbot-fast/1.0",
                "+http://help.coccoc.com/searchengine)"
            ],
            "primary_user_agent": "coccocbot-fast",
            "robots_token": "coccocbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: coccocbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"coccocbot-fast\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://help.coccoc.com/searchengine"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cognitiveseo-crawler",
            "name": "cognitiveSEO Crawler",
            "slug": "cognitiveseo-crawler",
            "url": "https://botcrawl.com/bots/cognitiveseo-crawler/",
            "status": "active",
            "operator": "cognitive SEO Internet Marketing Tools.",
            "company": "cognitive SEO Internet Marketing Tools.",
            "family": "cognitive SEO Internet Marketing Tools.",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "James BOT",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/76.0.3809.87 Safari/537.36 - James BOT - WebCrawler http://cognitiveseo.com/bot.html"
            ],
            "primary_user_agent": "James BOT",
            "robots_token": "James BOT",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "CognitiveSEO is an SEO toolset that crawls the web and analyzes links.",
            "short_description": "CognitiveSEO is an SEO toolset that crawls the web and analyzes links.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: James BOT\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"James BOT\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://cognitiveseo.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cohere-ai",
            "name": "cohere-ai",
            "slug": "cohere-ai",
            "url": "https://botcrawl.com/bots/cohere-ai/",
            "status": "active",
            "operator": "Cohere",
            "company": "Cohere",
            "family": "Cohere",
            "category": "ai",
            "kind": "crawler",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "cohere-ai",
                "Cohere AI crawler"
            ],
            "primary_user_agent": "cohere-ai",
            "robots_token": "cohere-ai",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Cohere-associated AI crawler identifier.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: cohere-ai\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cohere-ai\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/ai-bots/cohere-ai"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "cohere-training-data-crawler",
            "name": "cohere-training-data-crawler",
            "slug": "cohere-training-data-crawler",
            "url": "https://botcrawl.com/bots/cohere-training-data-crawler/",
            "status": "active",
            "operator": "Cohere",
            "company": "Cohere",
            "family": "Cohere",
            "category": "ai",
            "kind": "crawler",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "cohere-training-data-crawler",
                "Cohere training data crawler"
            ],
            "primary_user_agent": "cohere-training-data-crawler",
            "robots_token": "cohere-training-data-crawler",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Cohere-associated training-data crawler identifier.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: cohere-training-data-crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cohere-training-data-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/ai-bots/cohere-training-data-crawler"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "coinbase-webhooks",
            "name": "Coinbase Webhooks",
            "slug": "coinbase-webhooks",
            "url": "https://botcrawl.com/bots/coinbase-webhooks/",
            "status": "active",
            "operator": "Coinbase",
            "company": "Coinbase",
            "family": "Coinbase",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "weipay-webhooks",
                "weipay-webhooks/"
            ],
            "primary_user_agent": "weipay-webhooks",
            "robots_token": "weipay-webhooks",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Coinbase Webhooks are automated messages sent from the Coinbase platform to a user's server, used for notifying users about events such as receiving crypto payments.",
            "short_description": "Coinbase Webhooks are automated messages sent from the Coinbase platform to a user's server, used for notifying users about events such as receiving crypto payments.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: weipay-webhooks\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"weipay-webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://commerce.coinbase.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "consentcheck-bot",
            "name": "ConsentCheck Bot",
            "slug": "consentcheck-bot",
            "url": "https://botcrawl.com/bots/consentcheck-bot/",
            "status": "active",
            "operator": "ConsentCheck",
            "company": "ConsentCheck",
            "family": "ConsentCheck",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "consentcheck",
                "Mozilla/5.0 (compatible",
                "ConsentCheckBot/1.0",
                "+https://consentcheck.site/bot)"
            ],
            "primary_user_agent": "consentcheck",
            "robots_token": "consentcheck",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: consentcheck\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"consentcheck\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://consentcheck.site/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "contentking",
            "name": "ContentKing",
            "slug": "contentking",
            "url": "https://botcrawl.com/bots/contentking/",
            "status": "active",
            "operator": "ContentKing",
            "company": "ContentKing",
            "family": "ContentKing",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "contentking",
                "(+https://whatis.contentkingapp.com)"
            ],
            "primary_user_agent": "contentking",
            "robots_token": "contentking",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ContentKing is a cloud-based service that monitors websites from a digital marketing perspective. We monitor the websites for customers such as Netflix, Atlassian, Fedex and IBM and alert their digital marketing teams whenever a technical issue or content change is detected.",
            "short_description": "ContentKing is a cloud-based service that monitors websites from a digital marketing perspective.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: contentking\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"contentking\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.contentkingapp.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "contextualbot",
            "name": "ContextualBot",
            "slug": "contextualbot",
            "url": "https://botcrawl.com/bots/contextualbot/",
            "status": "active",
            "operator": "Outcomes",
            "company": "Outcomes",
            "family": "Outcomes",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ContextualBot",
                "Mozilla/5.0 (compatible",
                "ContextualBot/ 1.0",
                "+http://outcomes.net)."
            ],
            "primary_user_agent": "ContextualBot",
            "robots_token": "ContextualBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Accesses page content to create contextual segments for targeting within several DPS & SSP platforms.",
            "short_description": "Accesses page content to create contextual segments for targeting within several DPS & SSP platforms.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ContextualBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ContextualBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.outcomes.net/contextual-intelligence"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "convermaxbot",
            "name": "ConvermaxBot",
            "slug": "convermaxbot",
            "url": "https://botcrawl.com/bots/convermaxbot/",
            "status": "active",
            "operator": "Convermax Corp.",
            "company": "Convermax Corp.",
            "family": "Convermax Corp.",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Convermax",
                "Convermax/1.0 (+https://docs.convermax.com/indexer)",
                "Convermax/"
            ],
            "primary_user_agent": "Convermax",
            "robots_token": "Convermax",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Convermax Site Search Indexer",
            "short_description": "Convermax Site Search Indexer",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Convermax\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Convermax\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.convermax.com/indexer/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cookie-hub",
            "name": "Cookie Hub",
            "slug": "cookie-hub",
            "url": "https://botcrawl.com/bots/cookie-hub/",
            "status": "active",
            "operator": "CookieHub",
            "company": "CookieHub",
            "family": "CookieHub",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CookieHub",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/93.0.4577.82 Safari/537.36 CookieHubScan/1.1",
                "like Gecko) Chrome/93.0.4577.82 Safari/537.36 CookieHubVerify/1.1"
            ],
            "primary_user_agent": "CookieHub",
            "robots_token": "CookieHub",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Used to detect cookies set by websites (for CookieHub clients) and verify if user consents are respected",
            "short_description": "Used to detect cookies set by websites (for CookieHub clients) and verify if user consents are respected",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CookieHub\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CookieHub\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cookiehub.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cookie-maestro",
            "name": "cookie maestro",
            "slug": "cookie-maestro",
            "url": "https://botcrawl.com/bots/cookie-maestro/",
            "status": "active",
            "operator": "Cookie Maestro",
            "company": "Cookie Maestro",
            "family": "Cookie Maestro",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "cookie-maestro"
            ],
            "primary_user_agent": "cookie-maestro",
            "robots_token": "cookie-maestro",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Within the GDPR legislation it is mandatory to ask a visitor for permission before placing so-called marketing or tracking cookies. Many websites contain a cookie notice, but what is not clear to everyone is that those cookies may not be placed before the visitor has given explicit permission. Cookie Maestro searches for all cookies that your website places in your visitors browser.",
            "short_description": "Within the GDPR legislation it is mandatory to ask a visitor for permission before placing so-called marketing or tracking cookies.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: cookie-maestro\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cookie-maestro\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cookiemaestro.com/documentatie/limit-cookie-maestro-using-robots-txt"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cookiebot",
            "name": "Cookiebot",
            "slug": "cookiebot",
            "url": "https://botcrawl.com/bots/cookiebot/",
            "status": "active",
            "operator": "Cybot A/S",
            "company": "Cybot A/S",
            "family": "Cybot A/S",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Cookiebot",
                "Mozilla/5.0 (Windows NT 10.0",
                "WOW64) AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Cookiebot/1.0",
                "+http://cookiebot.com/) Chrome/101.0.4951.54 Safari/537.36"
            ],
            "primary_user_agent": "Cookiebot",
            "robots_token": "Cookiebot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Cookiebot scans the website for cookies and trackers to gather and provide the information on a cookie banner.",
            "short_description": "Cookiebot scans the website for cookies and trackers to gather and provide the information on a cookie banner.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Cookiebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cookiebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cookiebot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cookieyesbot",
            "name": "CookieYesBot",
            "slug": "cookieyesbot",
            "url": "https://botcrawl.com/bots/cookieyesbot/",
            "status": "active",
            "operator": "CookieYes",
            "company": "CookieYes",
            "family": "CookieYes",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CookieYesbot",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "CookieYesbot/1.0",
                "+http://www.cookieyes.com/documentation/cookieyesbot) Chrome/131.0.6778.0 Safari/537.36"
            ],
            "primary_user_agent": "CookieYesbot",
            "robots_token": "CookieYesbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "CookieYesbot scans and identify cookies and related information on websites that use CookieYes platform",
            "short_description": "CookieYesbot scans and identify cookies and related information on websites that use CookieYes platform",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CookieYesbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CookieYesbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cookieyes.com/documentation/cookieyesbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cotoyogi",
            "name": "Cotoyogi",
            "slug": "cotoyogi",
            "url": "https://botcrawl.com/bots/cotoyogi/",
            "status": "active",
            "operator": "Research Organization of Information and Systems",
            "company": "Research Organization of Information and Systems",
            "family": "Research Organization of Information and Systems",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Cotoyogi",
                "Mozilla/5.0 (compatible",
                "Cotoyogi/4.0",
                "+https://ds.rois.ac.jp/center8/crawler/)",
                "Cotoyogi/"
            ],
            "primary_user_agent": "Cotoyogi",
            "robots_token": "Cotoyogi",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "By providing a centralized management environment for content and models, we will build a framework that promotes both data utilization and research and development of AI technology, and realize the following functions as Japan's future information infrastructure.",
            "short_description": "By providing a centralized management environment for content and models, we will build a framework that promotes both data utilization and research and development of…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Cotoyogi\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cotoyogi\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ds.rois.ac.jp/center8/crawler/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "coveo-bot",
            "name": "Coveo Bot",
            "slug": "coveo-bot",
            "url": "https://botcrawl.com/bots/coveo-bot/",
            "status": "active",
            "operator": "Coveo",
            "company": "Coveo",
            "family": "Coveo",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Coveobot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko) (compatible",
                "Coveobot/2.0",
                "+http://www.coveo.com/bot.html)"
            ],
            "primary_user_agent": "Coveobot",
            "robots_token": "Coveobot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Coveo provide services to website, customer service and commerce solutions so they can feature relevant experiences to their end users; said services are based on a unified index which crawls websites when configured so by our customers.",
            "short_description": "Coveo provide services to website, customer service and commerce solutions so they can feature relevant experiences to their end users; said services are based on a…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Coveobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Coveobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://platform.cloud.coveo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "crawlson",
            "name": "Crawlson",
            "slug": "crawlson",
            "url": "https://botcrawl.com/bots/crawlson/",
            "status": "active",
            "operator": "Crawlson",
            "company": "Crawlson",
            "family": "Crawlson",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Crawlson",
                "Mozilla/5.0 (compatible",
                "Crawlson/1.0",
                "+https://www.crawlson.com/domain)",
                "Crawlson/"
            ],
            "primary_user_agent": "Crawlson",
            "robots_token": "Crawlson",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawlson is a search engine crawler for the crawlson.com search engine.",
            "short_description": "Crawlson is a search engine crawler for the crawlson.com search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Crawlson\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Crawlson\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.crawlson.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "crazy-egg-bot",
            "name": "Crazy Egg Bot",
            "slug": "crazy-egg-bot",
            "url": "https://botcrawl.com/bots/crazy-egg-bot/",
            "status": "active",
            "operator": "CrazyEgg",
            "company": "CrazyEgg",
            "family": "CrazyEgg",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Crazy Egg",
                "Crazy Egg Bot",
                "Crazy Egg Bot (Mobile)",
                "Crazy Egg Bot (Tablet)"
            ],
            "primary_user_agent": "Crazy Egg",
            "robots_token": "Crazy Egg",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crazy Egg bot that takes screenshots of pages, collects assets, tests script installation.",
            "short_description": "Crazy Egg bot that takes screenshots of pages, collects assets, tests script installation.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Crazy Egg\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Crazy Egg\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.crazyegg.com/hc/en-us/articles/22650794175251"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "criteobot",
            "name": "CriteoBot",
            "slug": "criteobot",
            "url": "https://botcrawl.com/bots/criteobot/",
            "status": "active",
            "operator": "Criteo",
            "company": "Criteo",
            "family": "Criteo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "CriteoBot",
                "CriteoBot/0.1 (+https://www.criteo.com/criteo-crawler/)",
                "CriteoBot/"
            ],
            "primary_user_agent": "CriteoBot",
            "robots_token": "CriteoBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Criteo Crawler is a software that visits web pages and analyzes its content to serve relevant ads on them.",
            "short_description": "Criteo Crawler is a software that visits web pages and analyzes its content to serve relevant ads on them.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CriteoBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CriteoBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.criteo.com/criteo-crawler/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "critical-css-bot",
            "name": "Critical CSS Bot",
            "slug": "critical-css-bot",
            "url": "https://botcrawl.com/bots/critical-css-bot/",
            "status": "active",
            "operator": "Critical CSS",
            "company": "Critical CSS",
            "family": "Critical CSS",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Criticalcss.com/",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/79.0.3945.0 Safari/537.36 Criticalcss.com/2.0.0"
            ],
            "primary_user_agent": "Criticalcss.com/",
            "robots_token": "Criticalcss.com/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "We operate a SASS for website optimisations, we have thousands of customers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Criticalcss.com/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Criticalcss.com/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://criticalcss.com"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cron-job-org",
            "name": "cron-job.org",
            "slug": "cron-job-org",
            "url": "https://botcrawl.com/bots/cron-job-org/",
            "status": "active",
            "operator": "cron-job.org",
            "company": "cron-job.org",
            "family": "cron-job.org",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "cron-job.org",
                "http://cron-job.org/"
            ],
            "primary_user_agent": "cron-job.org",
            "robots_token": "cron-job.org",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Scheduled execution of your websites and scripts.",
            "short_description": "Scheduled execution of your websites and scripts.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: cron-job.org\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cron-job.org\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cron-job.org/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "customer-io-webhooks",
            "name": "Customer.io Webhooks",
            "slug": "customer-io-webhooks",
            "url": "https://botcrawl.com/bots/customer-io-webhooks/",
            "status": "active",
            "operator": "Customer.io",
            "company": "Customer.io",
            "family": "Customer.io",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "customerio-webhooks",
                "Customer.io Webhooks"
            ],
            "primary_user_agent": "customerio-webhooks",
            "robots_token": "customerio-webhooks",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "Customer.io's webhook service for event-driven marketing automation and customer data platform workflows.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"customerio-webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/customerio-webhooks"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cxense",
            "name": "Cxense",
            "slug": "cxense",
            "url": "https://botcrawl.com/bots/cxense/",
            "status": "active",
            "operator": "Cxense",
            "company": "Cxense",
            "family": "Cxense",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "cXensebot",
                "Mozilla/5.0 (Macintosh",
                "U",
                "Intel Mac OS X",
                "en-US) AppleWebKit/537.36 (KHTML",
                "like Gecko) cXensebot/2.0",
                "+http://www.cxense.com/bot.html Safari/533.3 3615 155430223 0.03793830424547195 clarin.com 5.9.190.107 bot-5-9-190-107.cxensebot.com",
                "cXensebot/"
            ],
            "primary_user_agent": "cXensebot",
            "robots_token": "cXensebot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Cxensebot performs SEO monitoring and analysis of customer webpages.",
            "short_description": "The Cxensebot performs SEO monitoring and analysis of customer webpages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: cXensebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cXensebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.cxense.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:31Z"
        },
        {
            "id": "cybaa-agent",
            "name": "Cybaa Agent",
            "slug": "cybaa-agent",
            "url": "https://botcrawl.com/bots/cybaa-agent/",
            "status": "active",
            "operator": "Cybaa",
            "company": "Cybaa",
            "family": "Cybaa",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "cybaa-agent",
                "Cybaa Agent"
            ],
            "primary_user_agent": "cybaa-agent",
            "robots_token": "cybaa-agent",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Verification",
            "short_description": "Performs user-initiated security checks on behalf of Cybaa customers, validating domain and website security controls.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"cybaa-agent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/cybaa-agent"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "cybaa-bot",
            "name": "Cybaa Bot",
            "slug": "cybaa-bot",
            "url": "https://botcrawl.com/bots/cybaa-bot/",
            "status": "active",
            "operator": "Cybaa",
            "company": "Cybaa",
            "family": "Cybaa Bot",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "CybaaBot",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/140.0.0.0 Safari/537.36 CybaaBot"
            ],
            "primary_user_agent": "CybaaBot",
            "robots_token": "CybaaBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Performs broad security research by crawling the most popular domains obtained from Crawler.Ninja.",
            "short_description": "Performs broad security research by crawling the most popular domains obtained from Crawler.Ninja.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CybaaBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CybaaBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cybaa.io/bot-policy"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "daric2",
            "name": "Daric2",
            "slug": "daric2",
            "url": "https://botcrawl.com/bots/daric2/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Daric2"
            ],
            "primary_user_agent": "Daric2",
            "robots_token": "Daric2",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Cloudflare AI Crawl Control bot 2",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Daric2\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Daric2\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.cloudflare.com/ai-crawl-control/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:31Z"
        },
        {
            "id": "daric3",
            "name": "Daric3",
            "slug": "daric3",
            "url": "https://botcrawl.com/bots/daric3/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Daric3"
            ],
            "primary_user_agent": "Daric3",
            "robots_token": "Daric3",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Cloudflare AI Crawl Control bot 3",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Daric3\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Daric3\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.cloudflare.com/ai-crawl-control/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:31Z"
        },
        {
            "id": "daric4",
            "name": "Daric4",
            "slug": "daric4",
            "url": "https://botcrawl.com/bots/daric4/",
            "status": "active",
            "operator": "Cloudflare",
            "company": "Cloudflare",
            "family": "Cloudflare",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Daric4"
            ],
            "primary_user_agent": "Daric4",
            "robots_token": "Daric4",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Cloudflare AI Crawl Control bot 4",
            "verification_method": "Listed by Cloudflare Radar as a verified bot; match the published robots.txt token and user-agent pattern. Cross-check the operator documentation linked in the source field when available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Daric4\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Daric4\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.cloudflare.com/ai-crawl-control/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-01T23:15:31Z"
        },
        {
            "id": "dash0-synthetic-monitoring",
            "name": "Dash0 Synthetic Monitoring",
            "slug": "dash0-synthetic-monitoring",
            "url": "https://botcrawl.com/bots/dash0-synthetic-monitoring/",
            "status": "active",
            "operator": "Dash0",
            "company": "Dash0",
            "family": "Dash0",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "dash0-synthetic",
                "Dash0 Synthetic Monitoring"
            ],
            "primary_user_agent": "dash0-synthetic",
            "robots_token": "dash0-synthetic",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Dash0 synthetic monitoring provides proactive automated checks for website and API availability and performance.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"dash0-synthetic\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/dash0-synthetic"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "dataforseo",
            "name": "DataForSEO",
            "slug": "dataforseo",
            "url": "https://botcrawl.com/bots/dataforseo/",
            "status": "active",
            "operator": "DataForSEO",
            "company": "DataForSEO",
            "family": "DataForSEO",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "RSiteAuditor",
                "Mozilla/5.0 (compatible",
                "RSiteAuditor)"
            ],
            "primary_user_agent": "RSiteAuditor",
            "robots_token": "RSiteAuditor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "DataForSEO is using RSiteAuditor to scan websites for critical on-site SEO errors and provides aggregated data in a structured form to its customer through a RESTful API.",
            "short_description": "DataForSEO is using RSiteAuditor to scan websites for critical on-site SEO errors and provides aggregated data in a structured form to its customer through a RESTful API.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: RSiteAuditor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RSiteAuditor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://dataforseo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "dataforseo-bot",
            "name": "DataForSEO Bot",
            "slug": "dataforseo-bot",
            "url": "https://botcrawl.com/bots/dataforseo-bot/",
            "status": "active",
            "operator": "DataForSEO",
            "company": "DataForSEO",
            "family": "DataForSEO",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DataForSeoBot",
                "Mozilla/5.0 (compatible",
                "DataForSeoBot/1.0",
                "+https://dataforseo.com/dataforseo-bot)"
            ],
            "primary_user_agent": "DataForSeoBot",
            "robots_token": "DataForSeoBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "DataForSEO Bot is a driving force of our leading product - Backlinks API, which has been developed with a single purpose: providing website owners, webmasters, and SEO professionals with opportunities to analyze the key component of website optimization – backlink analytics. You can learn more about the DataForSEO Bot on this dedicated page: https://dataforseo.com/dataforseo-bot",
            "short_description": "DataForSEO Bot is a driving force of our leading product - Backlinks API, which has been developed with a single purpose: providing website owners, webmasters, and SEO…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: DataForSeoBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DataForSeoBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://dataforseo.com/"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "dataprovider-com",
            "name": "Dataprovider.com",
            "slug": "dataprovider-com",
            "url": "https://botcrawl.com/bots/dataprovider-com/",
            "status": "active",
            "operator": "Dataprovider.com",
            "company": "Dataprovider.com",
            "family": "Dataprovider.com",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Dataprovider",
                "Mozilla/5.0 (compatible",
                "Dataprovider.com)"
            ],
            "primary_user_agent": "Dataprovider",
            "robots_token": "Dataprovider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Dataprovider.com indexes the web and structures the data.",
            "short_description": "Dataprovider.com indexes the web and structures the data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Dataprovider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Dataprovider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.dataprovider.com/spider/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "datenbank-crawler",
            "name": "Datenbank Crawler",
            "slug": "datenbank-crawler",
            "url": "https://botcrawl.com/bots/datenbank-crawler/",
            "status": "active",
            "operator": "netEstate",
            "company": "netEstate",
            "family": "netEstate",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Datenbank Crawler",
                "Mozilla/5.0 (compatible",
                "+https://www.netestate.de/crawler.html)",
                "netEstate Crawler"
            ],
            "primary_user_agent": "Datenbank Crawler",
            "robots_token": "Datenbank Crawler",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "netEstate German crawler for collecting and selling website data for business intelligence.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.netestate.de.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Datenbank Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Datenbank Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.netestate.de"
                }
            ],
            "updated_at": "2026-04-01T00:55:26Z"
        },
        {
            "id": "daum",
            "name": "Daum",
            "slug": "daum",
            "url": "https://botcrawl.com/bots/daum/",
            "status": "active",
            "operator": "Daum",
            "company": "Daum",
            "family": "Daum",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Daum/4.1",
                "Mozilla/5.0 (Unknown",
                "Linux x86_64) AppleWebKit/538.1 (KHTML",
                "like Gecko) Safari/538.1 Daum/4.1"
            ],
            "primary_user_agent": "Daum/4.1",
            "robots_token": "Daum/4.1",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Korean search engine crawler",
            "short_description": "Korean search engine crawler",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Daum/4.1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Daum/4.1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://cs.daum.net/faq/15/4118.html?faqId=28966"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "deadlinkchecker",
            "name": "deadlinkchecker",
            "slug": "deadlinkchecker",
            "url": "https://botcrawl.com/bots/deadlinkchecker/",
            "status": "active",
            "operator": "DLC Websites",
            "company": "DLC Websites",
            "family": "DLC Websites",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "deadlinkchecker",
                "www.deadlinkchecker.com Mozilla/5.0 (Windows NT 6.1",
                "WOW64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/46.0.2490.86 Safari/537.36"
            ],
            "primary_user_agent": "deadlinkchecker",
            "robots_token": "deadlinkchecker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Dead Link Checker is a service which crawls a customer's website reporting on any broken links (404, 500 etc) it finds",
            "short_description": "Dead Link Checker is a service which crawls a customer's website reporting on any broken links (404, 500 etc) it finds",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: deadlinkchecker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"deadlinkchecker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.deadlinkchecker.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "deepcrawl",
            "name": "Deepcrawl",
            "slug": "deepcrawl",
            "url": "https://botcrawl.com/bots/deepcrawl/",
            "status": "active",
            "operator": "Lumar",
            "company": "Lumar",
            "family": "Lumar",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "lumar",
                "Lumar",
                "Deepcrawl"
            ],
            "primary_user_agent": "lumar",
            "robots_token": "lumar",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The DeepCrawl bot crawls the websites of its customers to collect performance analytics and suggest SEO optimizations.",
            "short_description": "The DeepCrawl bot crawls the websites of its customers to collect performance analytics and suggest SEO optimizations.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: lumar\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"lumar\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.deepcrawl.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "detectify",
            "name": "Detectify",
            "slug": "detectify",
            "url": "https://botcrawl.com/bots/detectify/",
            "status": "active",
            "operator": "Detectify",
            "company": "Detectify",
            "family": "Detectify",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Detectify",
                "Mozilla/5.0 (compatible",
                "Detectify) +https://detectify.com/bot/3e03814aef1db3cec70f42dd847e9ef5b37f4c8e",
                "Detectify)",
                "compatible"
            ],
            "primary_user_agent": "Detectify",
            "robots_token": "Detectify",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "Detectify analyzes the security level of web applications after a user proves control of the target site.",
            "verification_method": "Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Detectify\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Detectify\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://detectify.com/what-is-detectify"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "devin",
            "name": "Devin",
            "slug": "devin",
            "url": "https://botcrawl.com/bots/devin/",
            "status": "active",
            "operator": "Devin AI",
            "company": "Devin AI",
            "family": "Devin AI",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Devin",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/137.0.0.0 Safari/537.36",
                "Devin/1.0",
                "+https://devin.ai"
            ],
            "primary_user_agent": "Devin",
            "robots_token": "Devin",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Devin is a collaborative AI teammate built to help ambitious engineering teams achieve more.",
            "short_description": "Devin is a collaborative AI teammate built to help ambitious engineering teams achieve more.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Devin\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Devin\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.devin.ai/get-started/devin-intro"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "diffbot",
            "name": "Diffbot",
            "slug": "diffbot",
            "url": "https://botcrawl.com/bots/diffbot/",
            "status": "active",
            "operator": "Diffbot",
            "company": "Diffbot",
            "family": "Diffbot",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Diffbot",
                "Diffbot crawler"
            ],
            "primary_user_agent": "Diffbot",
            "robots_token": "Diffbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Diffbot crawler for Knowledge Graph and web-search discovery.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: Diffbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Diffbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.diffbot.com/docs/does-crawl-respect-robotstxt"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "diffbot-user",
            "name": "Diffbot-User",
            "slug": "diffbot-user",
            "url": "https://botcrawl.com/bots/diffbot-user/",
            "status": "active",
            "operator": "Diffbot",
            "company": "Diffbot",
            "family": "Diffbot",
            "category": "scraper",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Diffbot-User",
                "Diffbot user fetcher"
            ],
            "primary_user_agent": "Diffbot-User",
            "robots_token": "Diffbot-User",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Diffbot fetcher for user-requested URL browsing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: Diffbot-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Diffbot-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.diffbot.com/docs/does-crawl-respect-robotstxt"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "digicert-dcv",
            "name": "DigiCert DCV",
            "slug": "digicert-dcv",
            "url": "https://botcrawl.com/bots/digicert-dcv/",
            "status": "active",
            "operator": "DigiCert",
            "company": "DigiCert",
            "family": "DigiCert",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DigiCert DCV Bot",
                "DigiCert DCV Bot/1.0",
                "DigiCert DCV Bot/1.1"
            ],
            "primary_user_agent": "DigiCert DCV Bot",
            "robots_token": "DigiCert DCV Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "DigiCert DCV service.",
            "short_description": "DigiCert DCV service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: DigiCert DCV Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DigiCert DCV Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.digicert.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "direqt-anomura",
            "name": "Direqt Anomura",
            "slug": "direqt-anomura",
            "url": "https://botcrawl.com/bots/direqt-anomura/",
            "status": "active",
            "operator": "Direqt",
            "company": "Direqt",
            "family": "Direqt Anomura",
            "category": "ai",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "Anomura",
                "Anomura/1.2 (+https://www.direqt.ai)"
            ],
            "primary_user_agent": "Anomura",
            "robots_token": "Anomura",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Anomura is Direqt’s search crawler, it discovers and indexes pages their customers websites.",
            "short_description": "Anomura is Direqt’s search crawler, it discovers and indexes pages their customers websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Anomura\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Anomura\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://dashboarddocs.direqt.ai/direqt-bots/direqt-crawlers-and-user-agents"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "discordbot",
            "name": "discordBot",
            "slug": "discordbot",
            "url": "https://botcrawl.com/bots/discordbot/",
            "status": "active",
            "operator": "Discord, Inc.",
            "company": "Discord, Inc.",
            "family": "Discord, Inc.",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Discordbot",
                "Mozilla/5.0 (compatible",
                "Discordbot/2.0",
                "+https://discordapp.com)",
                "Discordbot/"
            ],
            "primary_user_agent": "Discordbot",
            "robots_token": "Discordbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The discordBot scrapes URLs that are shared within the Discord chat platform. This is done to generate contextual previews of the content, including titles, descriptions, and images.",
            "short_description": "The discordBot scrapes URLs that are shared within the Discord chat platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Discordbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Discordbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://discord.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "doctom-monitor",
            "name": "Doctom Monitor",
            "slug": "doctom-monitor",
            "url": "https://botcrawl.com/bots/doctom-monitor/",
            "status": "active",
            "operator": "Dotcom-Monitor",
            "company": "Dotcom-Monitor",
            "family": "Dotcom-Monitor",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DMBrowser",
                "DMBrowser/"
            ],
            "primary_user_agent": "DMBrowser",
            "robots_token": "DMBrowser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Doctom Monitor checks websites for uptime and performance issues.",
            "short_description": "The Doctom Monitor checks websites for uptime and performance issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: DMBrowser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DMBrowser\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.dotcom-monitor.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "dr-link-check",
            "name": "Dr. Link Check",
            "slug": "dr-link-check",
            "url": "https://botcrawl.com/bots/dr-link-check/",
            "status": "active",
            "operator": "Dr. Link Check",
            "company": "Dr. Link Check",
            "family": "Dr. Link Check",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Dlc",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/100.0.4896.127 Safari/537.36 Dlc/2.0.1",
                "Dlc/"
            ],
            "primary_user_agent": "Dlc",
            "robots_token": "Dlc",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Dr. Link Check crawls websites to help their owners identify and fix broken links.",
            "short_description": "Dr.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Dlc\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Dlc\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.drlinkcheck.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "drata-autopilot",
            "name": "Drata Autopilot",
            "slug": "drata-autopilot",
            "url": "https://botcrawl.com/bots/drata-autopilot/",
            "status": "active",
            "operator": "Drata",
            "company": "Drata",
            "family": "Drata",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Dratabot",
                "Dratabot (+https://dratabot.com)"
            ],
            "primary_user_agent": "Dratabot",
            "robots_token": "Dratabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Drata Autopilot bot continuously monitors the security posture of customer domains.",
            "short_description": "The Drata Autopilot bot continuously monitors the security posture of customer domains.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Dratabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Dratabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://drata.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "duckassistbot",
            "name": "DuckAssistBot",
            "slug": "duckassistbot",
            "url": "https://botcrawl.com/bots/duckassistbot/",
            "status": "active",
            "operator": "DuckDuckGo",
            "company": "DuckDuckGo",
            "family": "DuckDuckGo",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "DuckAssistBot",
                "DuckAssistBot/1.2",
                "(+http://duckduckgo.com/duckassistbot.html)",
                "DuckDuckGo AI answers crawler"
            ],
            "primary_user_agent": "DuckAssistBot",
            "robots_token": "DuckAssistBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "DuckAssistBot is a web crawler for DuckDuckGo",
            "short_description": "DuckDuckGo crawler for AI-assisted answers with source citations.",
            "verification_method": "Verify the user-agent together with DuckDuckGo's published DuckAssistBot IP list.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://duckduckgo.com/duckassistbot.json",
            "rules": {
                "robots": "User-agent: DuckAssistBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DuckAssistBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "duckduckbot",
            "name": "DuckDuckBot",
            "slug": "duckduckbot",
            "url": "https://botcrawl.com/bots/duckduckbot/",
            "status": "active",
            "operator": "DuckDuckGo",
            "company": "DuckDuckGo",
            "family": "DuckDuckGo",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "DuckDuckBot",
                "DuckDuckBot/1.0",
                "(+http://duckduckgo.com/duckduckbot.html)",
                "DuckDuckGo search crawler"
            ],
            "primary_user_agent": "DuckDuckBot",
            "robots_token": "DuckDuckBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "DuckDuckBot is the search engine crawler for the DuckDuckGo search engine.",
            "short_description": "DuckDuckBot is the search engine crawler for the DuckDuckGo search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://duckduckgo.com/duckduckbot.json",
            "rules": {
                "robots": "User-agent: DuckDuckBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DuckDuckBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://duckduckgo.com/duckduckbot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:32Z"
        },
        {
            "id": "duplexweb-google",
            "name": "DuplexWeb-Google",
            "slug": "duplexweb-google",
            "url": "https://botcrawl.com/bots/duplexweb-google/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mozilla/5.0 (Linux; Android 11; Pixel 2; DuplexWeb-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Mobile Safari/537.36",
                "Mozilla/5.0 (Linux",
                "Android 11",
                "Pixel 2",
                "DuplexWeb-Google/1.0) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/86.0.4240.193 Mobile Safari/537.36",
                "Duplex on the web",
                "Official"
            ],
            "primary_user_agent": "Mozilla/5.0 (Linux; Android 11; Pixel 2; DuplexWeb-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Mobile Safari/537.36",
            "robots_token": "DuplexWeb-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Retired Google special-case user agent for Duplex on the web.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: DuplexWeb-Google\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:36Z"
        },
        {
            "id": "dvbot",
            "name": "DVbot",
            "slug": "dvbot",
            "url": "https://botcrawl.com/bots/dvbot/",
            "status": "active",
            "operator": "DoubleVerify",
            "company": "DoubleVerify",
            "family": "DoubleVerify",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DVbot",
                "Mozilla/5.0 (compatible",
                "+https://doubleverify.com)",
                "DoubleVerify Bot"
            ],
            "primary_user_agent": "DVbot",
            "robots_token": "DVbot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "DoubleVerify digital advertising verification crawler that monitors ad quality and brand safety.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://doubleverify.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: DVbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DVbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://doubleverify.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "easouspider",
            "name": "EasouSpider",
            "slug": "easouspider",
            "url": "https://botcrawl.com/bots/easouspider/",
            "status": "active",
            "operator": "Easou",
            "company": "Easou",
            "family": "Easou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "EasouSpider"
            ],
            "primary_user_agent": "EasouSpider",
            "robots_token": "EasouSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Easou crawler token surfaced in baidu property robots.txt.",
            "verification_method": "This token is surfaced in a Baidu-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: EasouSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EasouSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://map.baidu.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:50Z"
        },
        {
            "id": "easybill-import-manager",
            "name": "EasyBill Import Manager",
            "slug": "easybill-import-manager",
            "url": "https://botcrawl.com/bots/easybill-import-manager/",
            "status": "active",
            "operator": "easybill.de",
            "company": "easybill.de",
            "family": "easybill.de",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "easybill-ImportManager",
                "easybill-ImportManager/ShopwareClient"
            ],
            "primary_user_agent": "easybill-ImportManager",
            "robots_token": "easybill-ImportManager",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "EasyBill Import Manager is a tool that synchronizes order data from external systems to EasyBill.",
            "short_description": "EasyBill Import Manager is a tool that synchronizes order data from external systems to EasyBill.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: easybill-ImportManager\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"easybill-ImportManager\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.easybill.de/api/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "easycron",
            "name": "EasyCron",
            "slug": "easycron",
            "url": "https://botcrawl.com/bots/easycron/",
            "status": "active",
            "operator": "EasyCron",
            "company": "EasyCron",
            "family": "EasyCron",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "EasyCron",
                "EasyCron/1.0 (https://www.easycron.com/)",
                "EasyCron/"
            ],
            "primary_user_agent": "EasyCron",
            "robots_token": "EasyCron",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "EasyCron is an online cron job service. Users can schedule an HTTP request to be made at a specific date and time.",
            "short_description": "EasyCron is an online cron job service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: EasyCron\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EasyCron\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.easycron.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "easydns",
            "name": "Easydns",
            "slug": "easydns",
            "url": "https://botcrawl.com/bots/easydns/",
            "status": "active",
            "operator": "EasyDNS",
            "company": "EasyDNS",
            "family": "EasyDNS",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "http://easyurl.net/monitoring",
                "easyDNS Monitoring ( http://easyurl.net/monitoring )",
                "easyDNS Monitoring"
            ],
            "primary_user_agent": "http://easyurl.net/monitoring",
            "robots_token": "http://easyurl.net/monitoring",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "EasyDNS' uptime monitoring probe.",
            "short_description": "EasyDNS' uptime monitoring probe.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: http://easyurl.net/monitoring\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"http://easyurl.net/monitoring\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://easydns.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "easyscan",
            "name": "EasyScan",
            "slug": "easyscan",
            "url": "https://botcrawl.com/bots/easyscan/",
            "status": "active",
            "operator": "codire GmbH",
            "company": "codire GmbH",
            "family": "codire",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "EasyScan",
                "EasyScan/1.0"
            ],
            "primary_user_agent": "EasyScan",
            "robots_token": "EasyScan",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Automated end-user content scanning for legal risk review.",
            "short_description": "Automated scanning service that reviews online content on behalf of end users to identify potential legal issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: EasyScan\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EasyScan\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.it-recht-kanzlei.de/website-scanner-fuer-mandanten.php"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "echobot-bot",
            "name": "Echobot Bot",
            "slug": "echobot-bot",
            "url": "https://botcrawl.com/bots/echobot-bot/",
            "status": "active",
            "operator": "Echobox",
            "company": "Echobox",
            "family": "Echobox",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "w4mwnpbXf3MFAbxOkJRw",
                "Mozilla/5.0 (compatible",
                "EchoboxBot/1.0",
                "hash/w4mwnpbXf3MFAbxOkJRw",
                "+http://www.echobox.com)"
            ],
            "primary_user_agent": "w4mwnpbXf3MFAbxOkJRw",
            "robots_token": "w4mwnpbXf3MFAbxOkJRw",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We scrape full article/page content to ensure we can optimally automate the content distribution for the digital publishers we work with. Every single article a publisher releases will get scraped approx. 2-4 times by independent services.",
            "short_description": "We scrape full article/page content to ensure we can optimally automate the content distribution for the digital publishers we work with.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: w4mwnpbXf3MFAbxOkJRw\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"w4mwnpbXf3MFAbxOkJRw\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.echobox.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "ecovadissustainabilitybot",
            "name": "EcoVadisSustainabilityBot",
            "slug": "ecovadissustainabilitybot",
            "url": "https://botcrawl.com/bots/ecovadissustainabilitybot/",
            "status": "active",
            "operator": "EcoVadis",
            "company": "EcoVadis",
            "family": "EcoVadis",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "EcoVadisSustainabilityBot",
                "Mozilla/5.0 (compatible",
                "+https://ecovadis.com)",
                "EcoVadis Bot"
            ],
            "primary_user_agent": "EcoVadisSustainabilityBot",
            "robots_token": "EcoVadisSustainabilityBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "EcoVadis sustainability intelligence crawler that assesses ESG performance of companies.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://ecovadis.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: EcoVadisSustainabilityBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EcoVadisSustainabilityBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ecovadis.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "elastic-crawler",
            "name": "Elastic Crawler",
            "slug": "elastic-crawler",
            "url": "https://botcrawl.com/bots/elastic-crawler/",
            "status": "active",
            "operator": "Elastic",
            "company": "Elastic",
            "family": "Elastic",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Elastic Crawler",
                "Mozilla/5.0 (compatible",
                "+https://www.elastic.co/app-search/)",
                "Elastic Bot"
            ],
            "primary_user_agent": "Elastic Crawler",
            "robots_token": "Elastic Crawler",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Elastic App Search crawler that indexes website content for Elastic's search platform.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.elastic.co/app-search/.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Elastic Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Elastic Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.elastic.co/app-search/"
                }
            ],
            "updated_at": "2026-04-01T00:55:37Z"
        },
        {
            "id": "elmah-io-uptime-monitoring",
            "name": "elmah.io Uptime Monitoring",
            "slug": "elmah-io-uptime-monitoring",
            "url": "https://botcrawl.com/bots/elmah-io-uptime-monitoring/",
            "status": "active",
            "operator": "elmah.io",
            "company": "elmah.io",
            "family": "elmah.io",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "elmahio-uptimebot",
                "elmahio-uptimebot/2.0"
            ],
            "primary_user_agent": "elmahio-uptimebot",
            "robots_token": "elmahio-uptimebot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "elmah.io Uptime Monitoring bot is a heartbeats tool monitors the availability of their users' websites.",
            "short_description": "elmah.io Uptime Monitoring bot is a heartbeats tool monitors the availability of their users' websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: elmahio-uptimebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"elmahio-uptimebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://elmah.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "embedly",
            "name": "Embedly",
            "slug": "embedly",
            "url": "https://botcrawl.com/bots/embedly/",
            "status": "active",
            "operator": "Embedly",
            "company": "Embedly",
            "family": "Embedly",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Embedly",
                "Mozilla/5.0 (compatible",
                "Embedly/0.2",
                "+http://support.embed.ly/)",
                "Embedly Bot"
            ],
            "primary_user_agent": "Embedly",
            "robots_token": "Embedly",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Embedly link preview service that fetches URLs to generate rich media previews.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://embed.ly.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Embedly\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Embedly\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://embed.ly"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "emoney-advisor",
            "name": "eMoney Advisor",
            "slug": "emoney-advisor",
            "url": "https://botcrawl.com/bots/emoney-advisor/",
            "status": "active",
            "operator": "eMoney Advisor",
            "company": "eMoney Advisor",
            "family": "eMoney Advisor",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "eMoneyBot",
                "eMoneyBot/1.0",
                "+https://emoneyadvisor.com/DataAggregationNotice/",
                "eMoneyBot/"
            ],
            "primary_user_agent": "eMoneyBot",
            "robots_token": "eMoneyBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Collects raw financial data that can later be used for financial planning and analysis",
            "short_description": "Collects raw financial data that can later be used for financial planning and analysis",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: eMoneyBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"eMoneyBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://emoneyadvisor.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "epivoz-crawler",
            "name": "Epivoz Crawler",
            "slug": "epivoz-crawler",
            "url": "https://botcrawl.com/bots/epivoz-crawler/",
            "status": "active",
            "operator": "Techmeme",
            "company": "Techmeme",
            "family": "Techmeme",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "EpivozCrawler",
                "EpivozCrawler/1.7",
                "EpivozCrawler/"
            ],
            "primary_user_agent": "EpivozCrawler",
            "robots_token": "EpivozCrawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "News aggregator needs to crawl news/blog articles to generate short summaries for page preview of attributed links.",
            "short_description": "News aggregator needs to crawl news/blog articles to generate short summaries for page preview of attributed links.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: EpivozCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EpivozCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.techmeme.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "erepublik-tools",
            "name": "eRepublik.tools",
            "slug": "erepublik-tools",
            "url": "https://botcrawl.com/bots/erepublik-tools/",
            "status": "active",
            "operator": "Sebastian Foth - Software Solutions",
            "company": "Sebastian Foth - Software Solutions",
            "family": "Sebastian Foth - Software Solutions",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "eRepublik.tools",
                "eRepublik.tools - Multithreaded Crawler - Organization Profile - v0.0.1",
                "eRepublik.tools - Multithreaded Crawler - Leaderboard - v0.1.0",
                "eRepublik.tools Crawler/1.0 (Sebastian Foth - Software Solutions)"
            ],
            "primary_user_agent": "eRepublik.tools",
            "robots_token": "eRepublik.tools",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Search engine providing gaming statistics and tools for the browser game \"eRepublik\".",
            "short_description": "Search engine providing gaming statistics and tools for the browser game \"eRepublik\".",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: eRepublik.tools\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"eRepublik.tools\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://erepublik.tools"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "etaospider",
            "name": "EtaoSpider",
            "slug": "etaospider",
            "url": "https://botcrawl.com/bots/etaospider/",
            "status": "active",
            "operator": "Etao",
            "company": "Etao",
            "family": "Etao",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "EtaoSpider"
            ],
            "primary_user_agent": "EtaoSpider",
            "robots_token": "EtaoSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Etao crawler token surfaced in sogou robots.txt.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: EtaoSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EtaoSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:49Z"
        },
        {
            "id": "evouptimebot",
            "name": "EvoUptimeBot",
            "slug": "evouptimebot",
            "url": "https://botcrawl.com/bots/evouptimebot/",
            "status": "active",
            "operator": "Evo Agency Ltd.",
            "company": "Evo Agency Ltd.",
            "family": "Evo Agency Ltd.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "EvoUptimeBot",
                "EvoUptimeBot/1.0",
                "EvoUptimeBot/"
            ],
            "primary_user_agent": "EvoUptimeBot",
            "robots_token": "EvoUptimeBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Built-in uptime monitoring for the EvoCommerce platform",
            "short_description": "Built-in uptime monitoring for the EvoCommerce platform",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: EvoUptimeBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"EvoUptimeBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.evo.agency"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "exabot",
            "name": "ExaBot",
            "slug": "exabot",
            "url": "https://botcrawl.com/bots/exabot/",
            "status": "active",
            "operator": "Exa",
            "company": "Exa",
            "family": "Exa",
            "category": "ai",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "ExaBot",
                "Mozilla/5.0 (compatible",
                "ExaBot/1.0",
                "+https://exa.ai/crawler)",
                "Exa Search Bot"
            ],
            "primary_user_agent": "ExaBot",
            "robots_token": "ExaBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Exa AI search crawler that indexes web content to power semantic search APIs.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://exa.ai/crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ExaBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ExaBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://exa.ai/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:18Z"
        },
        {
            "id": "exodus",
            "name": "Exodus",
            "slug": "exodus",
            "url": "https://botcrawl.com/bots/exodus/",
            "status": "active",
            "operator": "Exodus",
            "company": "Exodus",
            "family": "Exodus",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ExodusMovement",
                "ExodusMovement/1.0 GlobalCoinHeight/1.0"
            ],
            "primary_user_agent": "ExodusMovement",
            "robots_token": "ExodusMovement",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A crypto wallet application to manage cryptocurrencies like Bitcoin, Ethereum, Ripple, and more. Secure.",
            "short_description": "A crypto wallet application to manage cryptocurrencies like Bitcoin, Ethereum, Ripple, and more.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ExodusMovement\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ExodusMovement\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.exodus.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "ezoicbot",
            "name": "EzoicBot",
            "slug": "ezoicbot",
            "url": "https://botcrawl.com/bots/ezoicbot/",
            "status": "active",
            "operator": "Ezoic Inc",
            "company": "Ezoic Inc",
            "family": "Ezoic Inc",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "http://www.ezoic.com/bot.html",
                "Mozilla/5.0 (Linux",
                "Android 8.0",
                "Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/72.0.3626.121 Mobile Safari/537.36 (compatible",
                "EzLynx/0.1",
                "+http://www.ezoic.com/bot.html)"
            ],
            "primary_user_agent": "http://www.ezoic.com/bot.html",
            "robots_token": "http://www.ezoic.com/bot.html",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Ezoic is a technology platform for digital publishers. You can learn more about what Ezoic does here.EzoicBot is our web crawler designed to extract valuable information about how the internet, search engines, and websites all work together. EzoicBot can helps publishers better understand how their sites work. This includes the ability for search engines, like Google, to index and rank their content.",
            "short_description": "Ezoic is a technology platform for digital publishers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: http://www.ezoic.com/bot.html\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"http://www.ezoic.com/bot.html\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.ezoic.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "facebook-webhooks",
            "name": "Facebook Webhooks",
            "slug": "facebook-webhooks",
            "url": "https://botcrawl.com/bots/facebook-webhooks/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "facebook-webhooks",
                "Facebook Webhooks"
            ],
            "primary_user_agent": "facebook-webhooks",
            "robots_token": "facebook-webhooks",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "Facebook's webhook service that delivers real-time event notifications for Meta platform events and changes.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"facebook-webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/facebook-webhooks"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "facebookbot",
            "name": "FacebookBot",
            "slug": "facebookbot",
            "url": "https://botcrawl.com/bots/facebookbot/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "FacebookBot",
                "Facebook Bot",
                "Meta FacebookBot"
            ],
            "primary_user_agent": "FacebookBot",
            "robots_token": "FacebookBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Meta crawler identifier for Facebook ecosystem traffic.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: FacebookBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FacebookBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/social-bots/facebookbot"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "facebookexternalhit",
            "name": "FacebookExternalHit",
            "slug": "facebookexternalhit",
            "url": "https://botcrawl.com/bots/facebookexternalhit/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "facebookexternalhit",
                "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
                "facebookexternalhit/1.1",
                "facebookcatalog/1.0",
                "Meta preview crawler",
                "facebookcatalog"
            ],
            "primary_user_agent": "facebookexternalhit",
            "robots_token": "facebookexternalhit",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The primary purpose of FacebookExternalHit is to crawl the content of an app or website that was shared on one of Meta’s family of apps, such as Facebook, Instagram, or Messenger. The link might have been shared by copying and pasting or by using the Facebook social plugin. This crawler gathers, caches, and displays information about the app or website such as its title, description, and thumbnail image.",
            "short_description": "The primary purpose of FacebookExternalHit is to crawl the content of an app or website that was shared on one of Meta’s family of apps, such as Facebook, Instagram, or…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: facebookexternalhit\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"facebookexternalhit\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "facebot",
            "name": "Facebot",
            "slug": "facebot",
            "url": "https://botcrawl.com/bots/facebot/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "facebot",
                "Facebook Bot",
                "Facebook Crawler"
            ],
            "primary_user_agent": "facebot",
            "robots_token": "facebot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Facebook's link preview bot that fetches URLs shared on Facebook.",
            "verification_method": "Verify the exact user-agent against Meta's crawler documentation and confirm Meta ownership where possible.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: facebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"facebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "factset_spyderbot",
            "name": "Factset_spyderbot",
            "slug": "factset_spyderbot",
            "url": "https://botcrawl.com/bots/factset_spyderbot/",
            "status": "active",
            "operator": "Factset",
            "company": "Factset",
            "family": "Factset",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "factset_spyderbot",
                "Mozilla/5.0(windows NT 10.0",
                "Win64",
                "x64) AppleWebkit/537.36(KHTML",
                "like Gecko) Chrome/90.0.4430.72 Safari/537.36 factset_spyderbot"
            ],
            "primary_user_agent": "factset_spyderbot",
            "robots_token": "factset_spyderbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Factset uses a Python Selenium Crawler for web scraping to deliver reliable, current financial data.",
            "short_description": "Factset uses a Python Selenium Crawler for web scraping to deliver reliable, current financial data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: factset_spyderbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"factset_spyderbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.factset.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "falbot",
            "name": "FalBot",
            "slug": "falbot",
            "url": "https://botcrawl.com/bots/falbot/",
            "status": "active",
            "operator": "fal.ai",
            "company": "fal.ai",
            "family": "fal.ai",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "falbot"
            ],
            "primary_user_agent": "falbot",
            "robots_token": "falbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "fal.ai's webhook service that delivers asynchronous notifications for AI model processing and generation tasks.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"falbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/falbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "fastmail-bot",
            "name": "Fastmail Bot",
            "slug": "fastmail-bot",
            "url": "https://botcrawl.com/bots/fastmail-bot/",
            "status": "active",
            "operator": "Fastmail",
            "company": "Fastmail",
            "family": "Fastmail",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "FastmailUA",
                "FastmailUA/1.0",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/118.0.5993.117 Safari/537.36 FastmailUA/1.0"
            ],
            "primary_user_agent": "FastmailUA",
            "robots_token": "FastmailUA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fastmail fetch and image proxy bot",
            "short_description": "Fastmail fetch and image proxy bot",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FastmailUA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FastmailUA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.fastmail.com/policies/bots/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "fdl-stats-bots",
            "name": "FDL Stats Bots",
            "slug": "fdl-stats-bots",
            "url": "https://botcrawl.com/bots/fdl-stats-bots/",
            "status": "active",
            "operator": "FTW Entertainment LLC",
            "company": "FTW Entertainment LLC",
            "family": "FTW Entertainment LLC",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "FDL Stats Bot"
            ],
            "primary_user_agent": "FDL Stats Bot",
            "robots_token": "FDL Stats Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "FDL Stats bot is used to generate analytical data around rocket league player information. The bot will attempt to crawl various platforms that provide player data that is publicly available to build stats about player participation.",
            "short_description": "FDL Stats bot is used to generate analytical data around rocket league player information.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FDL Stats Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FDL Stats Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ftwentertainment.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "fedicabot",
            "name": "Fedicabot",
            "slug": "fedicabot",
            "url": "https://botcrawl.com/bots/fedicabot/",
            "status": "active",
            "operator": "Fedica",
            "company": "Fedica",
            "family": "Fedica",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Fedica",
                "Fedicabot",
                "FedicaApp"
            ],
            "primary_user_agent": "Fedica",
            "robots_token": "Fedica",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Fedica\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Fedica\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://fedica.com/info/fedicabot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "fedreporter-bot-for-ffiec",
            "name": "FedReporter Bot for FFIEC",
            "slug": "fedreporter-bot-for-ffiec",
            "url": "https://botcrawl.com/bots/fedreporter-bot-for-ffiec/",
            "status": "active",
            "operator": "Fed Reporter, Inc.",
            "company": "Fed Reporter, Inc.",
            "family": "Fed Reporter, Inc.",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "FedReporterDataBot",
                "FedReporterDataBot/1.0 (+https://fedreporter.net/FedReporterBotDocumentation",
                "contact: support@fedreporter.net)",
                "FedReporterDataBot-Testing/0.1 (+https://dev-pipe1.fedreporter.net/FedReporterBotDocumentation"
            ],
            "primary_user_agent": "FedReporterDataBot",
            "robots_token": "FedReporterDataBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot to download data from the ffiec, Active/Closed/Branches File, Holding Company Data, 002 Data",
            "short_description": "Bot to download data from the ffiec, Active/Closed/Branches File, Holding Company Data, 002 Data",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FedReporterDataBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FedReporterDataBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://fedreporter.net/FedReporterBotDocumentation/Readme.txt"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "feedbin",
            "name": "Feedbin",
            "slug": "feedbin",
            "url": "https://botcrawl.com/bots/feedbin/",
            "status": "active",
            "operator": "Feedbin",
            "company": "Feedbin",
            "family": "Feedbin",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Feedbin",
                "Feedbin 3",
                "Feedbin feed-id:311 - 1 subscribers"
            ],
            "primary_user_agent": "Feedbin",
            "robots_token": "Feedbin",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Feedbin's RSS reader service.",
            "short_description": "Feedbin's RSS reader service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Feedbin\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Feedbin\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://feedbin.com/help"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "feeder",
            "name": "Feeder",
            "slug": "feeder",
            "url": "https://botcrawl.com/bots/feeder/",
            "status": "active",
            "operator": "Really Simple AB",
            "company": "Really Simple AB",
            "family": "Really Simple AB",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "feeder.co",
                "Mozilla/5.0 (feeder.co",
                "Macintosh",
                "Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/106.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "feeder.co",
            "robots_token": "feeder.co",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A cloud-based RSS reader with over 500 000 users subscribed to over 3 million feed URLs",
            "short_description": "A cloud-based RSS reader with over 500 000 users subscribed to over 3 million feed URLs",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: feeder.co\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"feeder.co\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://feeder.co"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:33Z"
        },
        {
            "id": "feedfetcher-google",
            "name": "FeedFetcher-Google",
            "slug": "feedfetcher-google",
            "url": "https://botcrawl.com/bots/feedfetcher-google/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "FeedFetcher-Google",
                "(+http://www.google.com/feedfetcher.html)",
                "Google feed fetcher"
            ],
            "primary_user_agent": "FeedFetcher-Google",
            "robots_token": "FeedFetcher-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google fetcher for RSS and Atom feeds.",
            "verification_method": "Verify as a Google fetcher using reverse DNS and Google's published user-triggered fetcher IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "Serve a 404 or 410 response to FeedFetcher-Google requests if you need to stop feed retrieval.",
                "cloudflare": "(http.user_agent contains \"FeedFetcher-Google\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/feedfetcher"
                },
                {
                    "type": "operator",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "feedfetcher-mojeek",
            "name": "FeedFetcher-Mojeek",
            "slug": "feedfetcher-mojeek",
            "url": "https://botcrawl.com/bots/feedfetcher-mojeek/",
            "status": "active",
            "operator": "Mojeek",
            "company": "Mojeek",
            "family": "Mojeek",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "FeedFetcher-Mojeek",
                "Mojeek feed crawler"
            ],
            "primary_user_agent": "FeedFetcher-Mojeek",
            "robots_token": "FeedFetcher-Mojeek",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Mojeek RSS/Atom feed crawler used for Mojeek news search.",
            "verification_method": "Verify with a reverse DNS lookup that resolves to mojeek.com, then confirm with a forward lookup back to the original IP as described in Mojeek’s documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: FeedFetcher-Mojeek\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FeedFetcher-Mojeek\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mojeek.com/support/search/crawling/feedfetcher.html"
                }
            ],
            "updated_at": "2026-04-01T01:04:46Z"
        },
        {
            "id": "feedly",
            "name": "Feedly",
            "slug": "feedly",
            "url": "https://botcrawl.com/bots/feedly/",
            "status": "active",
            "operator": "Feedly",
            "company": "Feedly",
            "family": "Feedly",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Feedly/1",
                "Feedly/1.0 (+http://www.feedly.com/fetcher.html",
                "1 subscribers",
                "like FeedFetcher-Google)"
            ],
            "primary_user_agent": "Feedly/1",
            "robots_token": "Feedly/1",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Feedly RSS fetcher service.",
            "short_description": "Feedly RSS fetcher service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Feedly/1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Feedly/1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.feedly.com/fetcher.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "feedotter",
            "name": "FeedOtter",
            "slug": "feedotter",
            "url": "https://botcrawl.com/bots/feedotter/",
            "status": "active",
            "operator": "FeedOtter",
            "company": "FeedOtter",
            "family": "FeedOtter",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "FeedOtter",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "FeedOtter/2.1",
                "+http://www.feedotter.com/privacy) Chrome/126.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "FeedOtter",
            "robots_token": "FeedOtter",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "FeedOtter is a feed fetcher bot that retrieves RSS and Atom feeds for content aggregation and distribution.",
            "short_description": "FeedOtter is a feed fetcher bot that retrieves RSS and Atom feeds for content aggregation and distribution.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: FeedOtter\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FeedOtter\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.feedotter.com/privacy"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "feedwind-crawler",
            "name": "FeedWind Crawler",
            "slug": "feedwind-crawler",
            "url": "https://botcrawl.com/bots/feedwind-crawler/",
            "status": "active",
            "operator": "Mikle",
            "company": "Mikle",
            "family": "Mikle",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Feedwind",
                "Mozilla/5.0 (compatible",
                "Feedwind/3.0",
                "+https://feed.mikle.com/support/description/)",
                "Feedwind/"
            ],
            "primary_user_agent": "Feedwind",
            "robots_token": "Feedwind",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Accesses feed sources to ensure feed widget is up to date (crawls every 5 minutes to 5 hours depending on a user's plan).",
            "short_description": "Accesses feed sources to ensure feed widget is up to date (crawls every 5 minutes to 5 hours depending on a user's plan).",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Feedwind\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Feedwind\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://feed.mikle.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "firecrawlagent",
            "name": "FirecrawlAgent",
            "slug": "firecrawlagent",
            "url": "https://botcrawl.com/bots/firecrawlagent/",
            "status": "active",
            "operator": "Firecrawl",
            "company": "Firecrawl",
            "family": "Firecrawl",
            "category": "ai",
            "kind": "unknown",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "FirecrawlAgent",
                "Mozilla/5.0 (compatible",
                "FirecrawlAgent/1.0",
                "+https://firecrawl.dev)",
                "Firecrawl Bot"
            ],
            "primary_user_agent": "FirecrawlAgent",
            "robots_token": "FirecrawlAgent",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Firecrawl web crawler that extracts web content and converts it into structured data for AI.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://firecrawl.dev.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: FirecrawlAgent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FirecrawlAgent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://firecrawl.dev"
                }
            ],
            "updated_at": "2026-04-01T00:55:15Z"
        },
        {
            "id": "fishbot",
            "name": "FishBot",
            "slug": "fishbot",
            "url": "https://botcrawl.com/bots/fishbot/",
            "status": "active",
            "operator": "FishBot",
            "company": "FishBot",
            "family": "FishBot",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "FishBot"
            ],
            "primary_user_agent": "FishBot",
            "robots_token": "FishBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "FishBot crawls webpages to deliver Open Source AI for All",
            "short_description": "FishBot crawls webpages to deliver Open Source AI for All",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: FishBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FishBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://fish.audio"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "fit-feeds-fetcher",
            "name": "FIT Feeds Fetcher",
            "slug": "fit-feeds-fetcher",
            "url": "https://botcrawl.com/bots/fit-feeds-fetcher/",
            "status": "active",
            "operator": "follow.it",
            "company": "follow.it",
            "family": "FIT Feeds Fetcher",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "follow.it",
                "FeedBurner/1.0 (http://www.FeedBurner.com) Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/63.0.3239.132 Safari/537.36 Specificfeeds- http://www.specificfeeds.com https://follow.it",
                "Mozilla/5.0 (Macintosh"
            ],
            "primary_user_agent": "follow.it",
            "robots_token": "follow.it",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "This bot is used to fetch the RSS feed content of the websites owned by rightful publishers at follow.it",
            "short_description": "This bot is used to fetch the RSS feed content of the websites owned by rightful publishers at follow.it",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: follow.it\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"follow.it\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://follow.it"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "flashy-php-1-0-0",
            "name": "Flashy-PHP/1.0.0",
            "slug": "flashy-php-1-0-0",
            "url": "https://botcrawl.com/bots/flashy-php-1-0-0/",
            "status": "active",
            "operator": "Flashy-PHP/1.0.0",
            "company": "Flashy-PHP/1.0.0",
            "family": "Flashy-PHP/1.0.0",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Flashy-PHP",
                "Flashy-PHP/1.0.0"
            ],
            "primary_user_agent": "Flashy-PHP",
            "robots_token": "Flashy-PHP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "All-In-One Marketing Automation Platform",
            "short_description": "All-In-One Marketing Automation Platform",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: Flashy-PHP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Flashy-PHP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://flashy.app/help/integrations/troubleshooting/ip-address-whitelisting/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "flipboardproxy",
            "name": "FlipboardProxy",
            "slug": "flipboardproxy",
            "url": "https://botcrawl.com/bots/flipboardproxy/",
            "status": "active",
            "operator": "Flipboard",
            "company": "Flipboard",
            "family": "Flipboard",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "FlipboardProxy",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10.11",
                "rv:49.0) Gecko/20100101 Firefox/49.0 (FlipboardProxy/1.2",
                "+http://flipboard.com/browserproxy)"
            ],
            "primary_user_agent": "FlipboardProxy",
            "robots_token": "FlipboardProxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We work with publishers/partners to obtain their content so it is formatted for the Flipboard app.",
            "short_description": "We work with publishers/partners to obtain their content so it is formatted for the Flipboard app.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FlipboardProxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FlipboardProxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://about.flipboard.com/browserproxy/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "flipboardrss",
            "name": "FlipboardRSS",
            "slug": "flipboardrss",
            "url": "https://botcrawl.com/bots/flipboardrss/",
            "status": "active",
            "operator": "Flipboard",
            "company": "Flipboard",
            "family": "Flipboard",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "FlipboardRSS",
                "Mozilla/5.0 (compatible",
                "FlipboardRSS/1.2",
                "+http://flipboard.com/browserproxy)"
            ],
            "primary_user_agent": "FlipboardRSS",
            "robots_token": "FlipboardRSS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Flipboard will use visitor's RSS feed to discover articles and generate article summaries",
            "short_description": "Flipboard will use visitor's RSS feed to discover articles and generate article summaries",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FlipboardRSS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FlipboardRSS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://about.flipboard.com/rss-guidelines/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "flockwp",
            "name": "FlockWP",
            "slug": "flockwp",
            "url": "https://botcrawl.com/bots/flockwp/",
            "status": "active",
            "operator": "FlockWP",
            "company": "FlockWP",
            "family": "FlockWP",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "FlockWP",
                "Mozilla/5.0 (compatible",
                "FlockWP/1.0",
                "+https://flockwp.com)"
            ],
            "primary_user_agent": "FlockWP",
            "robots_token": "FlockWP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Manages WordPress sites on behalf of authenticated owners. Syncs data, applies updates, and monitors uptime.",
            "short_description": "Manages WordPress sites on behalf of authenticated owners. Syncs data, applies updates, and monitors uptime.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: FlockWP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FlockWP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://flockwp.com"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "foregenix-threatview-webscan",
            "name": "Foregenix ThreatView/WebScan",
            "slug": "foregenix-threatview-webscan",
            "url": "https://botcrawl.com/bots/foregenix-threatview-webscan/",
            "status": "active",
            "operator": "Foregenix Limited",
            "company": "Foregenix Limited",
            "family": "Foregenix Limited",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Foregenix",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "Foregenix) Chrome/91.0.4472.77 Safari/537.36",
                "Foregenix ThreatView Security Auditor (threatview.app)"
            ],
            "primary_user_agent": "Foregenix",
            "robots_token": "Foregenix",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Foregenix perform security and risk scanning on the web sites of eCommerce merchants for a number of banks and card brands globally. The service assists these organisations in controlling and identifying fraud and financial losses, with a particular focus on trying to identify compromised merchants before they end up in the card brand's compromise investigation process. Early detection (prior to fraud losses escalating) can save the banks and merchants alike considerable sums. The solution has two primary modes of operation Scanning for active malware, this normally entails pulling a very limited number of pages within a sandboxed context for analysis at various stages of DOM initialisation. From the target sites perspective, the operation is simply another browser requesting a small number of pages as normal. Scanning for known publicly exploitable vulnerabilities and outdated software solutions as these attributes are frequently exploited by threat actors to introduce malware targeting financial information. Typically a complete scan comprises less than one hundred requests and is already rate limited on our side. Scanning is always \"passive\" in nature, relying on GET, HEAD and OPTIONS requests only. The scanning heads by default abide by the \"robots.txt\" file but this can be overridden by the scan initiator (usually one of our banking clients). This override, to force a scan/assessment is not actioned all that frequently.",
            "short_description": "Foregenix perform security and risk scanning on the web sites of eCommerce merchants for a number of banks and card brands globally.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Foregenix\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Foregenix\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.foregenix.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "foureff",
            "name": "foureff",
            "slug": "foureff",
            "url": "https://botcrawl.com/bots/foureff/",
            "status": "active",
            "operator": "BITS - Benedikt IT Services e.U.",
            "company": "BITS - Benedikt IT Services e.U.",
            "family": "BITS",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "4f.at/crawler"
            ],
            "primary_user_agent": "4f.at/crawler",
            "robots_token": "4f.at/crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "4F operates a web crawler on behalf of its users. It continuously monitors sites for broken links and failing assets.",
            "short_description": "4F operates a web crawler on behalf of its users. It continuously monitors sites for broken links and failing assets.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: 4f.at/crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"4f.at/crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.4f.at"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "freespoke",
            "name": "Freespoke",
            "slug": "freespoke",
            "url": "https://botcrawl.com/bots/freespoke/",
            "status": "active",
            "operator": "Freespoke",
            "company": "Freespoke",
            "family": "Freespoke",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "crawler.freespoke.com",
                "Mozilla/5.0 (compatible",
                "Freespoke/2.0",
                "+https://crawler.freespoke.com)"
            ],
            "primary_user_agent": "crawler.freespoke.com",
            "robots_token": "crawler.freespoke.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Freespoke is a search engine that believes in free speech and shows you all viewpoints.",
            "short_description": "Freespoke is a search engine that believes in free speech and shows you all viewpoints.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: crawler.freespoke.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"crawler.freespoke.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.freespoke.com/search/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "freshbot",
            "name": "FreshBot",
            "slug": "freshbot",
            "url": "https://botcrawl.com/bots/freshbot/",
            "status": "active",
            "operator": "Seznam",
            "company": "Seznam",
            "family": "Seznam",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "FreshBot",
                "Seznam FreshBot"
            ],
            "primary_user_agent": "FreshBot",
            "robots_token": "FreshBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Seznam fast-refresh crawler that revisits news pages and RSS sources where new content appears.",
            "verification_method": "Match the FreshBot token with Seznam’s crawler documentation and confirm Seznam ownership where possible. Seznam’s published crawler guidance is the primary source for this entry.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: FreshBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FreshBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://o-seznam.cz/napoveda/vyhledavani/seznambot/"
                }
            ],
            "updated_at": "2026-04-01T01:04:45Z"
        },
        {
            "id": "freshping",
            "name": "Freshping",
            "slug": "freshping",
            "url": "https://botcrawl.com/bots/freshping/",
            "status": "active",
            "operator": "freshping",
            "company": "freshping",
            "family": "freshping",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Freshping",
                "FreshpingBot/1.0 (+https://freshping.io/)",
                "FreshpingBot/1.0",
                "FreshpingBot"
            ],
            "primary_user_agent": "Freshping",
            "robots_token": "Freshping",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Check website is online and issue an alert when its down",
            "short_description": "Check website is online and issue an alert when its down",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Freshping\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Freshping\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.freshping.io/en/support/solutions/articles/50000003709-freshping-api-documentation"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "fullfact-ingest",
            "name": "FullFact Ingest",
            "slug": "fullfact-ingest",
            "url": "https://botcrawl.com/bots/fullfact-ingest/",
            "status": "active",
            "operator": "FullFact",
            "company": "FullFact",
            "family": "FullFact",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "FullFact",
                "FullFact (FullFact Media Ingest. https://fullfact.ai)"
            ],
            "primary_user_agent": "FullFact",
            "robots_token": "FullFact",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "It pulls user-submitted RSS feeds every hour to process, extract and identify claims to analyse for misinformation.",
            "short_description": "It pulls user-submitted RSS feeds every hour to process, extract and identify claims to analyse for misinformation.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: FullFact\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FullFact\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://fullfact.ai"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "fullstory",
            "name": "FullStory",
            "slug": "fullstory",
            "url": "https://botcrawl.com/bots/fullstory/",
            "status": "active",
            "operator": "Full Story",
            "company": "Full Story",
            "family": "Full Story",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "FullStoryBot",
                "FullStoryBot/1.0"
            ],
            "primary_user_agent": "FullStoryBot",
            "robots_token": "FullStoryBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "FullStory is your digital experience analytics platform for on-the-fly funnels, pixel-perfect replay, custom events, heat maps, advanced search, Dev Tools, and more. FullStoryBot’s fetches and stores assets required to rebuild sites when viewing recorded sessions.",
            "short_description": "FullStory is your digital experience analytics platform for on-the-fly funnels, pixel-perfect replay, custom events, heat maps, advanced search, Dev Tools, and more.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: FullStoryBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"FullStoryBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.fullstory.com/spp-ref/343521-what-is-the-fullstorybot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "funnelback",
            "name": "Funnelback",
            "slug": "funnelback",
            "url": "https://botcrawl.com/bots/funnelback/",
            "status": "active",
            "operator": "Squiz - FunnelBack",
            "company": "Squiz - FunnelBack",
            "family": "Squiz - FunnelBack",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Funnelback"
            ],
            "primary_user_agent": "Funnelback",
            "robots_token": "Funnelback",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Funnelback is an enterprise search platform, and its crawler indexes content from an organization's websites and data repositories. This powers the organization's internal search function.",
            "short_description": "Funnelback is an enterprise search platform, and its crawler indexes content from an organization's websites and data repositories.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Funnelback\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Funnelback\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.squiz.net/funnelback/docs/latest/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "gatus",
            "name": "Gatus",
            "slug": "gatus",
            "url": "https://botcrawl.com/bots/gatus/",
            "status": "active",
            "operator": "Gatus",
            "company": "Gatus",
            "family": "Gatus",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Gatus",
                "Gatus/1.0"
            ],
            "primary_user_agent": "Gatus",
            "robots_token": "Gatus",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Endpoint and uptime monitoring.",
            "short_description": "Gatus.io's advanced endpoint monitoring bot.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Gatus\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Gatus\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://gatus.io"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "geedoproductsearchbot",
            "name": "GeedoProductSearchBot",
            "slug": "geedoproductsearchbot",
            "url": "https://botcrawl.com/bots/geedoproductsearchbot/",
            "status": "active",
            "operator": "Geedo",
            "company": "Geedo",
            "family": "Geedo",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "geedoproductsearchbot"
            ],
            "primary_user_agent": "geedoproductsearchbot",
            "robots_token": "geedoproductsearchbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "E-commerce",
            "short_description": "Indexes product information from e-commerce websites for product discovery.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"geedoproductsearchbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/geedoproductsearchbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "gemini-deep-research",
            "name": "Gemini Deep Research",
            "slug": "gemini-deep-research",
            "url": "https://botcrawl.com/bots/gemini-deep-research/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "gemini-deep-research",
                "Gemini Deep Research"
            ],
            "primary_user_agent": "gemini-deep-research",
            "robots_token": "gemini-deep-research",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "AI Assistant",
            "short_description": "Google's AI-powered research tool that performs multi-step research on complex topics using web content.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"gemini-deep-research\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/gemini-deep-research"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "ghost-inspector",
            "name": "Ghost Inspector",
            "slug": "ghost-inspector",
            "url": "https://botcrawl.com/bots/ghost-inspector/",
            "status": "active",
            "operator": "Ghost Inspector",
            "company": "Ghost Inspector",
            "family": "Ghost Inspector",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Ghost Inspector",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/107.0 Safari/537.36 Ghost Inspector (63c85ddbde52d6697f57c623)"
            ],
            "primary_user_agent": "Ghost Inspector",
            "robots_token": "Ghost Inspector",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Ghost Inspector is an automated browser testing framework.",
            "short_description": "Ghost Inspector is an automated browser testing framework.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Ghost Inspector\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Ghost Inspector\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ghostinspector.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "gigabot",
            "name": "Gigabot",
            "slug": "gigabot",
            "url": "https://botcrawl.com/bots/gigabot/",
            "status": "active",
            "operator": "Gigablast",
            "company": "Gigablast",
            "family": "Gigablast",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Gigabot"
            ],
            "primary_user_agent": "Gigabot",
            "robots_token": "Gigabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Gigablast is the only non-Big Tech search engine in the U.S. that uses its own search index and algorithms.",
            "short_description": "Gigablast is the only non-Big Tech search engine in the U.S.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Gigabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Gigabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.gigablast.com/?c=main"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "github-camo",
            "name": "GitHub Camo",
            "slug": "github-camo",
            "url": "https://botcrawl.com/bots/github-camo/",
            "status": "active",
            "operator": "GitHub",
            "company": "GitHub",
            "family": "GitHub",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "github-camo",
                "GitHub Camo"
            ],
            "primary_user_agent": "github-camo",
            "robots_token": "github-camo",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "GitHub's image proxy service.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"github-camo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/github-camo"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "github-hookshot",
            "name": "GitHub Hookshot",
            "slug": "github-hookshot",
            "url": "https://botcrawl.com/bots/github-hookshot/",
            "status": "active",
            "operator": "GitHub",
            "company": "GitHub",
            "family": "GitHub",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "github-hookshot",
                "GitHub Hookshot"
            ],
            "primary_user_agent": "github-hookshot",
            "robots_token": "github-hookshot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "GitHub's webhook delivery service for repository and application events.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"github-hookshot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/github-hookshot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "google-admob-reward-verification",
            "name": "Google AdMob Reward Verification",
            "slug": "google-admob-reward-verification",
            "url": "https://botcrawl.com/bots/google-admob-reward-verification/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "google-admob-reward-verification"
            ],
            "primary_user_agent": "google-admob-reward-verification",
            "robots_token": "google-admob-reward-verification",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Confirms completed rewarded ad views using server-side callback validation.",
            "short_description": "Server-side verification callbacks for rewarded AdMob ad views.",
            "verification_method": "Verified on bots.fyi. Treat as a webhook/service identifier rather than a browser-style crawl signature.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: google-admob-reward-verification\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"google-admob-reward-verification\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/google-admob-reward-verification"
                },
                {
                    "type": "operator",
                    "url": "https://support.google.com/admob/answer/9603226?hl=en"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "google-ads-creatives-assistant",
            "name": "Google Ads Creatives Assistant",
            "slug": "google-ads-creatives-assistant",
            "url": "https://botcrawl.com/bots/google-ads-creatives-assistant/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Ads-Creatives-Assistant",
                "google-ads-creatives-assistant"
            ],
            "primary_user_agent": "Google-Ads-Creatives-Assistant",
            "robots_token": "Google-Ads-Creatives-Assistant",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "User-triggered content retrieval for Google Ads creative generation.",
            "short_description": "Fetches content for Google Ads creative generation and enhancement tools.",
            "verification_method": "Verified on bots.fyi. Community snippets indicate the Google-Ads-Creatives-Assistant token; validate before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Ads-Creatives-Assistant\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Ads-Creatives-Assistant\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/google-ads-creatives-assistant"
                },
                {
                    "type": "operator",
                    "url": "https://support.google.com/google-ads/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:34Z"
        },
        {
            "id": "google-adsbot",
            "name": "Google AdsBot",
            "slug": "google-adsbot",
            "url": "https://botcrawl.com/bots/google-adsbot/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "AdsBot-Google",
                "AdsBot-Google (+http://www.google.com/adsbot.html)",
                "Google Ads crawler"
            ],
            "primary_user_agent": "AdsBot-Google",
            "robots_token": "AdsBot-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google AdsBot is special-case crawler that monitors websites where Google Ads are served.",
            "short_description": "Google AdsBot is special-case crawler that monitors websites where Google Ads are served.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/special-crawlers.json",
            "rules": {
                "robots": "User-agent: AdsBot-Google\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "adsbot-google",
            "name": "Google AdsBot",
            "slug": "adsbot-google",
            "url": "https://botcrawl.com/bots/adsbot-google/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "AdsBot-Google",
                "AdsBot-Google (+http://www.google.com/adsbot.html)",
                "Google Ads crawler"
            ],
            "primary_user_agent": "AdsBot-Google",
            "robots_token": "AdsBot-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Google AdsBot is special-case crawler that monitors websites where Google Ads are served.",
            "short_description": "Google AdsBot is special-case crawler that monitors websites where Google Ads are served.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/special-crawlers.json",
            "rules": {
                "robots": "User-agent: AdsBot-Google\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AdsBot-Google\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:06Z"
        },
        {
            "id": "google-adsense-mediapartners-google",
            "name": "Google Adsense: Mediapartners-Google",
            "slug": "google-adsense-mediapartners-google",
            "url": "https://botcrawl.com/bots/google-adsense-mediapartners-google/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Mediapartners-Google",
                "Google AdSense crawler"
            ],
            "primary_user_agent": "Mediapartners-Google",
            "robots_token": "Mediapartners-Google",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google AdSense bot monitors the content of websites using Google AdSense.",
            "short_description": "The Google AdSense bot monitors the content of websites using Google AdSense.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/special-crawlers.json",
            "rules": {
                "robots": "User-agent: Mediapartners-Google\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Mediapartners-Google\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-api",
            "name": "Google API",
            "slug": "google-api",
            "url": "https://botcrawl.com/bots/google-api/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "APIs-Google",
                "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)",
                "Google API push notification agent"
            ],
            "primary_user_agent": "APIs-Google",
            "robots_token": "APIs-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "APIs-Google is the user agent used by Google APIs to deliver push notification messages. Application developers can request these notifications to avoid the need for continually polling Google's servers to find out if the resources they are interested in have changed. To make sure nobody abuses this service, Google requires developers to prove that they own the domain before allowing them to register a URL with a domain as the location where they want to receive messages.",
            "short_description": "APIs-Google is the user agent used by Google APIs to deliver push notification messages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: APIs-Google\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"APIs-Google\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-business-link-verification",
            "name": "Google Business Link Verification",
            "slug": "google-business-link-verification",
            "url": "https://botcrawl.com/bots/google-business-link-verification/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-BusinessLinkVerification",
                "google-businesslink-verification"
            ],
            "primary_user_agent": "Google-BusinessLinkVerification",
            "robots_token": "Google-BusinessLinkVerification",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Verifies that Business Profile links are reachable, relevant, and return valid responses.",
            "short_description": "Google Business Profile link verification crawler.",
            "verification_method": "Use the documented Google-BusinessLinkVerification user-agent. Google states these verification crawlers do not follow robots.txt.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-BusinessLinkVerification\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-BusinessLinkVerification\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/business/answer/13769188?hl=en"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-digital-asset-links",
            "name": "Google Digital Asset Links",
            "slug": "google-digital-asset-links",
            "url": "https://botcrawl.com/bots/google-digital-asset-links/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "GoogleAssociationService",
                "GoogleAssociationService/"
            ],
            "primary_user_agent": "GoogleAssociationService",
            "robots_token": "GoogleAssociationService",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Digital Asset Links bot verifies statements lists made by website operators.",
            "short_description": "The Digital Asset Links bot verifies statements lists made by website operators.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GoogleAssociationService\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleAssociationService\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-docs",
            "name": "Google Docs",
            "slug": "google-docs",
            "url": "https://botcrawl.com/bots/google-docs/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "google-docs",
                "Google Docs"
            ],
            "primary_user_agent": "google-docs",
            "robots_token": "google-docs",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Fetches images and page content when users insert links into Google Docs.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"google-docs\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/google-docs"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-favicon",
            "name": "Google Favicon",
            "slug": "google-favicon",
            "url": "https://botcrawl.com/bots/google-favicon/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon",
                "Favicon fetcher",
                "Official"
            ],
            "primary_user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon",
            "robots_token": "Googlebot-Image",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Retired Google favicon fetcher.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Googlebot-Image\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:36Z"
        },
        {
            "id": "google-feed-fetcher",
            "name": "Google Feed Fetcher",
            "slug": "google-feed-fetcher",
            "url": "https://botcrawl.com/bots/google-feed-fetcher/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "google.com/feedfetcher",
                "Feedfetcher-Google",
                "(+http://www.google.com/feedfetcher.html)"
            ],
            "primary_user_agent": "google.com/feedfetcher",
            "robots_token": "google.com/feedfetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google FeedFetcher is the RSS reader for Google.",
            "short_description": "Google FeedFetcher is the RSS reader for Google.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: google.com/feedfetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"google.com/feedfetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-firebase",
            "name": "Google Firebase",
            "slug": "google-firebase",
            "url": "https://botcrawl.com/bots/google-firebase/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Firebase"
            ],
            "primary_user_agent": "Google-Firebase",
            "robots_token": "Google-Firebase",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Firebase\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Firebase\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-image-proxy",
            "name": "Google Image Proxy",
            "slug": "google-image-proxy",
            "url": "https://botcrawl.com/bots/google-image-proxy/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "GoogleImageProxy",
                "Mozilla/5.0 (Windows NT 5.1",
                "rv:11.0) Gecko Firefox/11.0 (via ggpht.com GoogleImageProxy)"
            ],
            "primary_user_agent": "GoogleImageProxy",
            "robots_token": "GoogleImageProxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google Image Proxy bot is used to render link content sent via email in Gmail.",
            "short_description": "The Google Image Proxy bot is used to render link content sent via email in Gmail.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GoogleImageProxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleImageProxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-images",
            "name": "Google Images",
            "slug": "google-images",
            "url": "https://botcrawl.com/bots/google-images/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Googlebot-Image",
                "Googlebot-Image/1.0",
                "Google image crawler"
            ],
            "primary_user_agent": "Googlebot-Image",
            "robots_token": "Googlebot-Image",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google Images bot is the search engine crawler for Google Images Search.",
            "short_description": "The Google Images bot is the search engine crawler for Google Images Search.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Googlebot-Image\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-Image\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-inspection-tool",
            "name": "Google Inspection Tool",
            "slug": "google-inspection-tool",
            "url": "https://botcrawl.com/bots/google-inspection-tool/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-InspectionTool",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/113.0.5672.92 Mobile Safari/537.36 (compatible",
                "Google-InspectionTool/1.0",
                ")",
                "Google Search testing fetcher"
            ],
            "primary_user_agent": "Google-InspectionTool",
            "robots_token": "Google-InspectionTool",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google-InspectionTool is the crawler used by Search testing tools such as the Rich Result Test and URL inspection in Search Console. Apart from the user agent and user agent token, it mimics Googlebot.",
            "short_description": "Google-InspectionTool is the crawler used by Search testing tools such as the Rich Result Test and URL inspection in Search Console.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: Google-InspectionTool\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-InspectionTool\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers#google-inspectiontool"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-page-renderer",
            "name": "Google Page Renderer",
            "slug": "google-page-renderer",
            "url": "https://botcrawl.com/bots/google-page-renderer/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Google-PageRenderer",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google-PageRenderer Google (+https://developers.google.com/+/web/snippet/)",
                "Google Web Snippet"
            ],
            "primary_user_agent": "Google-PageRenderer",
            "robots_token": "Google-PageRenderer",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Tools and product functions where the end user triggers a fetch.",
            "short_description": "Tools and product functions where the end user triggers a fetch.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-PageRenderer\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-PageRenderer\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-publisher-center",
            "name": "Google Publisher Center",
            "slug": "google-publisher-center",
            "url": "https://botcrawl.com/bots/google-publisher-center/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "GoogleProducer",
                "(+https://developers.google.com/search/docs/crawling-indexing/google-producer)",
                "Google Publisher Center feed fetcher"
            ],
            "primary_user_agent": "GoogleProducer",
            "robots_token": "GoogleProducer",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fetches and processes feeds that publishers explicitly supplied for use in Google News landing pages.",
            "short_description": "Fetches and processes feeds that publishers explicitly supplied for use in Google News landing pages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "User-agent: GoogleProducer\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/google-user-triggered-fetchers#googleproducer"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "googleproducer",
            "name": "Google Publisher Center",
            "slug": "googleproducer",
            "url": "https://botcrawl.com/bots/googleproducer/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "GoogleProducer",
                "(+https://developers.google.com/search/docs/crawling-indexing/google-producer)",
                "Google Publisher Center feed fetcher"
            ],
            "primary_user_agent": "GoogleProducer",
            "robots_token": "GoogleProducer",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Fetches and processes feeds that publishers explicitly supplied for use in Google News landing pages.",
            "short_description": "Fetches and processes feeds that publishers explicitly supplied for use in Google News landing pages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "User-agent: GoogleProducer\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleProducer\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/google-user-triggered-fetchers#googleproducer"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:07Z"
        },
        {
            "id": "google-read-aloud",
            "name": "Google Read Aloud",
            "slug": "google-read-aloud",
            "url": "https://botcrawl.com/bots/google-read-aloud/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Read-Aloud",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/41.0.2272.118 Safari/537.36 (compatible",
                "+https://support.google.com/webmasters/answer/1061943)",
                "Google TTS fetcher",
                "google-speakr"
            ],
            "primary_user_agent": "Google-Read-Aloud",
            "robots_token": "Google-Read-Aloud",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google Read Aloud service enables reading web pages using text-to-speech (TTS).",
            "short_description": "Google Read Aloud service enables reading web pages using text-to-speech (TTS).",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "User-agent: Google-Read-Aloud\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Read-Aloud\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-safety",
            "name": "Google Safety",
            "slug": "google-safety",
            "url": "https://botcrawl.com/bots/google-safety/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Safety",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/60.0.3112.113 Safari/537.36 (compatible",
                "+http://www.google.com/bot.html)",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible"
            ],
            "primary_user_agent": "Google-Safety",
            "robots_token": "Google-Safety",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google-Safety user agent handles abuse-specific crawling, such as malware discovery for publicly posted links on Google properties",
            "short_description": "The Google-Safety user agent handles abuse-specific crawling, such as malware discovery for publicly posted links on Google properties",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Safety\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Safety\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.google.com/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-schema-markup-testing-tool",
            "name": "Google Schema Markup Testing Tool",
            "slug": "google-schema-markup-testing-tool",
            "url": "https://botcrawl.com/bots/google-schema-markup-testing-tool/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Structured-Data-Testing",
                "Mozilla/5.0 (compatible",
                "Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool)"
            ],
            "primary_user_agent": "Google-Structured-Data-Testing",
            "robots_token": "Google-Structured-Data-Testing",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google Schema Markup Testing Tool bot, now part of the Rich Results Test, crawls pages to validate their structured data. This helps webmasters check if their schema markup is correctly implemented for Google Search.",
            "short_description": "The Google Schema Markup Testing Tool bot, now part of the Rich Results Test, crawls pages to validate their structured data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Structured-Data-Testing\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Structured-Data-Testing\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-scholar",
            "name": "Google Scholar",
            "slug": "google-scholar",
            "url": "https://botcrawl.com/bots/google-scholar/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Googlebot-IA",
                "Googlebot-IA/2.1",
                "Googlebot-IA/"
            ],
            "primary_user_agent": "Googlebot-IA",
            "robots_token": "Googlebot-IA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google Scholar uses a bot to crawl and index scholarly literature from academic publishers, repositories, and university websites. This populates its academic search engine.",
            "short_description": "Google Scholar uses a bot to crawl and index scholarly literature from academic publishers, repositories, and university websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Googlebot-IA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-IA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://scholar.google.com/intl/en/scholar/libraries.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-site-verification",
            "name": "Google Site Verification",
            "slug": "google-site-verification",
            "url": "https://botcrawl.com/bots/google-site-verification/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Site-Verification",
                "Mozilla/5.0 (compatible",
                "Google-Site-Verification/1.0)",
                "Google Site Verifier"
            ],
            "primary_user_agent": "Google-Site-Verification",
            "robots_token": "Google-Site-Verification",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Verification is the process of proving that you own the property that you claim to own. Search Console needs to verify ownership because verified owners have access to sensitive Google Search data for a site, and can affect a site's presence and behavior on Google Search and other Google properties. A verified owner can grant full or view access to other people.",
            "short_description": "Verification is the process of proving that you own the property that you claim to own.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "User-agent: Google-Site-Verification\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Site-Verification\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/9008080?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:35Z"
        },
        {
            "id": "google-storebot",
            "name": "Google StoreBot",
            "slug": "google-storebot",
            "url": "https://botcrawl.com/bots/google-storebot/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Storebot-Google",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "Storebot-Google/1.0) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/79.0.3945.88 Safari/537.36",
                "Mozilla/5.0 (Linux",
                "Android 8.0",
                "Pixel 2 Build/OPD3.170816.012",
                "like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36",
                "Google Shopping crawler"
            ],
            "primary_user_agent": "Storebot-Google",
            "robots_token": "Storebot-Google",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google StoreBot is a search-engine-based program that automatically 'crawls' through web pages to gather and analyse data. Google uses crawlers that go through product pages and checkout processes using machine learning algorithms to fill in forms with information such as delivery addresses, and help compile other information on price, delivery, payments and more.",
            "short_description": "The Google StoreBot is a search-engine-based program that automatically 'crawls' through web pages to gather and analyse data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/crawling/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: Storebot-Google\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Storebot-Google\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/advanced/crawling/overview-google-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-trust-services-dcv-check",
            "name": "Google Trust Services (DCV Check)",
            "slug": "google-trust-services-dcv-check",
            "url": "https://botcrawl.com/bots/google-trust-services-dcv-check/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Trust-Services",
                "Mozilla/5.0 (compatible",
                "Google-Trust-Services/2.0",
                "http://pki.goog/)"
            ],
            "primary_user_agent": "Google-Trust-Services",
            "robots_token": "Google-Trust-Services",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Helping build a safer Internet by providing a transparent, trusted, and reliable Certificate Authority.",
            "short_description": "Helping build a safer Internet by providing a transparent, trusted, and reliable Certificate Authority.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Trust-Services\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Trust-Services\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://pki.goog/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-videos",
            "name": "Google Videos",
            "slug": "google-videos",
            "url": "https://botcrawl.com/bots/google-videos/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Googlebot-Video",
                "Googlebot-Video/1.0",
                "Google video crawler"
            ],
            "primary_user_agent": "Googlebot-Video",
            "robots_token": "Googlebot-Video",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Google Videos bot is the search engine crawler for Google Video Search.",
            "short_description": "The Google Videos bot is the search engine crawler for Google Video Search.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Googlebot-Video\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-Video\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/webmasters/answer/1061943?hl=en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-web-snippet",
            "name": "Google Web Snippet",
            "slug": "google-web-snippet",
            "url": "https://botcrawl.com/bots/google-web-snippet/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Google Web Snippet",
                "Google-PageRenderer Google (+https://developers.google.com/+/web/snippet/)",
                "Google-PageRenderer"
            ],
            "primary_user_agent": "Google Web Snippet",
            "robots_token": "Google-PageRenderer",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "This user agent belongs to Google Web Snippet. Google Inc developed this Bot. This Bot run on Linux.",
            "verification_method": "Verify the published Google token/user-agent and cross-check with Google crawler documentation if needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers"
                }
            ],
            "updated_at": "2026-03-31T14:10:48Z"
        },
        {
            "id": "google-adwords-express",
            "name": "Google-AdWords-Express",
            "slug": "google-adwords-express",
            "url": "https://botcrawl.com/bots/google-adwords-express/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-AdWords-Express"
            ],
            "primary_user_agent": "Google-AdWords-Express",
            "robots_token": "Google-AdWords-Express",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google-AdWords-Express is a bot for a Google Ads product aimed at small businesses. It crawls advertiser websites to assist with ad creation and to verify site information.",
            "short_description": "Google-AdWords-Express is a bot for a Google Ads product aimed at small businesses.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-AdWords-Express\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-AdWords-Express\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-adwords-instant",
            "name": "Google-Adwords-Instant",
            "slug": "google-adwords-instant",
            "url": "https://botcrawl.com/bots/google-adwords-instant/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Adwords-Instant",
                "Google-Adwords-Instant (+http://www.google.com/adsbot.html)"
            ],
            "primary_user_agent": "Google-Adwords-Instant",
            "robots_token": "Google-Adwords-Instant",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google-Adwords-Instant is a bot connected to the Google Ads platform. It visits advertiser landing pages to perform verification and quality checks.",
            "short_description": "Google-Adwords-Instant is a bot connected to the Google Ads platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Adwords-Instant\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Adwords-Instant\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.google.com/adsbot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-agent",
            "name": "Google-Agent",
            "slug": "google-agent",
            "url": "https://botcrawl.com/bots/google-agent/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-Agent",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "+https://developers.google.com/crawling/docs/crawlers-fetchers/google-agent) Chrome/W.X.Y.Z Safari/537.36",
                "Google hosted agent fetcher"
            ],
            "primary_user_agent": "Google-Agent",
            "robots_token": "Google-Agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google-hosted browsing agent triggered by user requests.",
            "verification_method": "Verify as a Google user-triggered agent using reverse DNS and Google's published user-triggered agent IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-agents.json",
            "rules": {
                "robots": "No general robots.txt block is guaranteed; this is a user-triggered agent and may require server-side access controls.",
                "cloudflare": "(http.user_agent contains \"Google-Agent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-cloudvertexbot",
            "name": "Google-CloudVertexBot",
            "slug": "google-cloudvertexbot",
            "url": "https://botcrawl.com/bots/google-cloudvertexbot/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-CloudVertexBot",
                "CloudVertexBot",
                "Vertex AI crawler"
            ],
            "primary_user_agent": "Google-CloudVertexBot",
            "robots_token": "Google-CloudVertexBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google crawler used on owner request for Vertex AI Agents.",
            "verification_method": "Use Google's official crawler verification guidance because user-agent strings can be spoofed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Google-CloudVertexBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-CloudVertexBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-cws",
            "name": "Google-CWS",
            "slug": "google-cws",
            "url": "https://botcrawl.com/bots/google-cws/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-CWS",
                "Mozilla/5.0 (compatible",
                "Google-CWS)",
                "Chrome Web Store fetcher"
            ],
            "primary_user_agent": "Google-CWS",
            "robots_token": "Google-CWS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google-CWS is the Chrome Web Store user-triggered fetcher for URLs developers provide in extension and theme metadata.",
            "verification_method": "Published Google crawler documentation and Google crawler/fetcher verification guidance",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Google-CWS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-CWS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-display-ads-bot",
            "name": "Google-Display-Ads-Bot",
            "slug": "google-display-ads-bot",
            "url": "https://botcrawl.com/bots/google-display-ads-bot/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Google-Display-Ads-Bot"
            ],
            "primary_user_agent": "Google-Display-Ads-Bot",
            "robots_token": "Google-Display-Ads-Bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Google AdSense uses a crawler called Google-Display-Ads-Bot to verify your site when you add a site to AdSense.",
            "short_description": "Google AdSense uses a crawler called Google-Display-Ads-Bot to verify your site when you add a site to AdSense.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Google-Display-Ads-Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Display-Ads-Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.google.com/adsense/answer/99376"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-extended",
            "name": "Google-Extended",
            "slug": "google-extended",
            "url": "https://botcrawl.com/bots/google-extended/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "control-token",
            "purpose": "control-token",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-Extended",
                "Google AI opt-out token"
            ],
            "primary_user_agent": "Google-Extended",
            "robots_token": "Google-Extended",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google control token for Gemini training and grounding permissions.",
            "verification_method": "Control token only; it relies on existing Google crawler traffic rather than a separate HTTP user-agent.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Google-Extended\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Google-Extended\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-notebooklm",
            "name": "Google-NotebookLM",
            "slug": "google-notebooklm",
            "url": "https://botcrawl.com/bots/google-notebooklm/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-NotebookLM",
                "NotebookLM",
                "NotebookLM source fetcher"
            ],
            "primary_user_agent": "Google-NotebookLM",
            "robots_token": "Google-NotebookLM",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "NotebookLM fetcher for user-supplied sources.",
            "verification_method": "Verify as a Google fetcher using reverse DNS and Google's published user-triggered fetcher IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "No general robots.txt block is guaranteed; this is a user-triggered fetcher and may require server-side access controls.",
                "cloudflare": "(http.user_agent contains \"Google-NotebookLM\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "google-pinpoint",
            "name": "Google-Pinpoint",
            "slug": "google-pinpoint",
            "url": "https://botcrawl.com/bots/google-pinpoint/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Google-Pinpoint",
                "Google Pinpoint source fetcher"
            ],
            "primary_user_agent": "Google-Pinpoint",
            "robots_token": "Google-Pinpoint",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google Pinpoint fetcher for user-supplied sources.",
            "verification_method": "Verify as a Google fetcher using reverse DNS and Google's published user-triggered fetcher IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "No general robots.txt block is guaranteed; this is a user-triggered fetcher and may require server-side access controls.",
                "cloudflare": "(http.user_agent contains \"Google-Pinpoint\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googlebot",
            "name": "Googlebot",
            "slug": "googlebot",
            "url": "https://botcrawl.com/bots/googlebot/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "Googlebot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Googlebot/2.1",
                "+http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36",
                "Google Search crawler"
            ],
            "primary_user_agent": "Googlebot",
            "robots_token": "Googlebot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Googlebot is the search engine crawler for Google Search.",
            "short_description": "Google's main crawler for Search and related surfaces.",
            "verification_method": "Use Google's official crawler verification guidance because user-agent strings can be spoofed.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/crawling/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: Googlebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googlebot-desktop",
            "name": "Googlebot-Desktop",
            "slug": "googlebot-desktop",
            "url": "https://botcrawl.com/bots/googlebot-desktop/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Googlebot-Desktop",
                "Mozilla/5.0 (compatible",
                "Googlebot/2.1",
                "+http://www.google.com/bot.html)",
                "Google Desktop Bot"
            ],
            "primary_user_agent": "Googlebot-Desktop",
            "robots_token": "Googlebot-Desktop",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Google desktop web crawler that indexes desktop versions of web pages.",
            "verification_method": "Verify the exact user-agent together with the published IP ranges at https://developers.google.com/static/search/apis/ipranges/googlebot.json.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
            "rules": {
                "robots": "User-agent: Googlebot-Desktop\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-Desktop\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/googlebot"
                }
            ],
            "updated_at": "2026-04-01T00:55:31Z"
        },
        {
            "id": "googlebot-news",
            "name": "Googlebot-News",
            "slug": "googlebot-news",
            "url": "https://botcrawl.com/bots/googlebot-news/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "Googlebot-News",
                "Uses various Googlebot HTTP request user-agent strings",
                "no separate HTTP request string.",
                "Google News crawler token"
            ],
            "primary_user_agent": "Googlebot-News",
            "robots_token": "Googlebot-News",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google's robots token for Google News crawling preferences.",
            "verification_method": "Treat this as a Google-specific robots token and verify underlying traffic as Googlebot traffic.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Googlebot-News\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-News\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googlebot-smartphone",
            "name": "Googlebot-Smartphone",
            "slug": "googlebot-smartphone",
            "url": "https://botcrawl.com/bots/googlebot-smartphone/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "unknown",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Googlebot-Smartphone",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible",
                "Googlebot/2.1",
                "+http://www.google.com/bot.html)",
                "Google Smartphone Bot"
            ],
            "primary_user_agent": "Googlebot-Smartphone",
            "robots_token": "Googlebot-Smartphone",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Google Smartphone crawler that indexes mobile web pages using a smartphone user agent.",
            "verification_method": "Verify the exact user-agent together with the published IP ranges at https://developers.google.com/static/search/apis/ipranges/googlebot.json.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
            "rules": {
                "robots": "User-agent: Googlebot-Smartphone\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Googlebot-Smartphone\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/search/docs/crawling-indexing/googlebot"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "googlemessages",
            "name": "GoogleMessages",
            "slug": "googlemessages",
            "url": "https://botcrawl.com/bots/googlemessages/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "GoogleMessages",
                "Google Messages link preview fetcher"
            ],
            "primary_user_agent": "GoogleMessages",
            "robots_token": "GoogleMessages",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Google Messages link-preview fetcher.",
            "verification_method": "Verify as a Google fetcher using reverse DNS and Google's published user-triggered fetcher IP ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers-google.json",
            "rules": {
                "robots": "No general robots.txt block is guaranteed; this is a user-triggered fetcher and may require server-side access controls.",
                "cloudflare": "(http.user_agent contains \"GoogleMessages\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-user-triggered-fetchers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googleother",
            "name": "GoogleOther",
            "slug": "googleother",
            "url": "https://botcrawl.com/bots/googleother/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "GoogleOther",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "GoogleOther) Chrome/W.X.Y.Z Safari/537.36",
                "Google generic crawler"
            ],
            "primary_user_agent": "GoogleOther",
            "robots_token": "GoogleOther",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Generic crawler that may be used by various product teams for fetching publicly accessible content from sites. For example, it may be used for one-off crawls for internal research and development. https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers#googleother",
            "short_description": "Generic Google crawler for public-content fetches outside core Search.",
            "verification_method": "Use Google's official crawler verification guidance because user-agent strings can be spoofed.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://developers.google.com/crawling/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: GoogleOther\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleOther\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googleother-image",
            "name": "GoogleOther-Image",
            "slug": "googleother-image",
            "url": "https://botcrawl.com/bots/googleother-image/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "GoogleOther-Image",
                "GoogleOther-Image/1.0"
            ],
            "primary_user_agent": "GoogleOther-Image",
            "robots_token": "GoogleOther-Image",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "GoogleOther-Image is GoogleOther optimized for fetching publicly accessible image URLs.",
            "verification_method": "Use Google's official crawler verification guidance because user-agent strings can be spoofed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/crawling/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: GoogleOther\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleOther-Image\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googleother-video",
            "name": "GoogleOther-Video",
            "slug": "googleother-video",
            "url": "https://botcrawl.com/bots/googleother-video/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "GoogleOther-Video",
                "GoogleOther-Video/1.0"
            ],
            "primary_user_agent": "GoogleOther-Video",
            "robots_token": "GoogleOther-Video",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "GoogleOther-Video is GoogleOther optimized for fetching publicly accessible video URLs.",
            "verification_method": "Use Google's official crawler verification guidance because user-agent strings can be spoofed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://developers.google.com/crawling/ipranges/common-crawlers.json",
            "rules": {
                "robots": "User-agent: GoogleOther\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoogleOther-Video\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "googleweblight",
            "name": "googleweblight",
            "slug": "googleweblight",
            "url": "https://botcrawl.com/bots/googleweblight/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19",
                "Mozilla/5.0 (Linux",
                "Android 4.2.1",
                "en-us",
                "Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML",
                "like Gecko",
                "googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19",
                "Web Light",
                "Official"
            ],
            "primary_user_agent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19",
            "robots_token": "googleweblight",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Retired Google Web Light user agent for lightweight page delivery.",
            "verification_method": "Verify via reverse DNS matching Google's special-case crawler hostnames and confirm the IP matches Google's special crawler ranges.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: googleweblight\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-special-case-crawlers"
                }
            ],
            "updated_at": "2026-03-31T21:27:37Z"
        },
        {
            "id": "gopay",
            "name": "GoPay",
            "slug": "gopay",
            "url": "https://botcrawl.com/bots/gopay/",
            "status": "active",
            "operator": "GoPay.cz",
            "company": "GoPay.cz",
            "family": "GoPay.cz",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "GoPay"
            ],
            "primary_user_agent": "GoPay",
            "robots_token": "GoPay",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "GoPay payment getaway http notification service",
            "short_description": "GoPay payment getaway http notification service",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GoPay\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GoPay\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://doc.gopay.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "gpt-actions",
            "name": "GPT-Actions",
            "slug": "gpt-actions",
            "url": "https://botcrawl.com/bots/gpt-actions/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "family": "OpenAI",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "GPT-Actions"
            ],
            "primary_user_agent": "GPT-Actions",
            "robots_token": "GPT-Actions",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Lets ChatGPT use actions to retrieve live information and interact with external APIs on user request.",
            "short_description": "OpenAI actions layer for user-initiated API interactions and real-time retrieval.",
            "verification_method": "Verified on bots.fyi. Treat this as a product/service identifier for user-initiated action traffic, not an automated training crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GPT-Actions\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GPT-Actions\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/gpt-actions"
                },
                {
                    "type": "operator",
                    "url": "https://developers.openai.com/api/docs/actions/introduction"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "gptbot",
            "name": "GPTBot",
            "slug": "gptbot",
            "url": "https://botcrawl.com/bots/gptbot/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "family": "OpenAI",
            "category": "ai",
            "kind": "crawler",
            "purpose": "training",
            "identity_type": "official-documented",
            "user_agents": [
                "GPTBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko)",
                "compatible",
                "GPTBot/1.3",
                "+https://openai.com/gptbot",
                "OpenAI training crawler"
            ],
            "primary_user_agent": "GPTBot",
            "robots_token": "GPTBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "GPTBot is used to crawl content that may be used in training OpenAI's generative AI foundation models",
            "short_description": "OpenAI crawler used for training generative AI foundation models.",
            "verification_method": "Verify the user-agent together with OpenAI's published IP ranges for GPTBot.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://openai.com/gptbot.json",
            "rules": {
                "robots": "User-agent: GPTBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GPTBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.openai.com/api/docs/bots/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "grafanas-synthetic-monitoring",
            "name": "Grafana’s Synthetic Monitoring",
            "slug": "grafanas-synthetic-monitoring",
            "url": "https://botcrawl.com/bots/grafanas-synthetic-monitoring/",
            "status": "active",
            "operator": "Grafana Labs",
            "company": "Grafana Labs",
            "family": "Grafana Labs",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "grafana/synthetic-monitoring",
                "synthetic-monitoring-agent/v0.20.1-0-g66af84ad (linux amd64",
                "66af84ad6a8755895d4b69606281ca7354c1589a",
                "2024-02-12 16:50:07+00:00",
                "+https://github.com/grafana/synthetic-monitoring-agent)"
            ],
            "primary_user_agent": "grafana/synthetic-monitoring",
            "robots_token": "grafana/synthetic-monitoring",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Grafana's Synthetic Monitoring agent provides probe functionality and executes network checks for monitoring remote targets.",
            "short_description": "Grafana's Synthetic Monitoring agent provides probe functionality and executes network checks for monitoring remote targets.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: grafana/synthetic-monitoring\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"grafana/synthetic-monitoring\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://grafana.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "grapeshot",
            "name": "Grapeshot",
            "slug": "grapeshot",
            "url": "https://botcrawl.com/bots/grapeshot/",
            "status": "active",
            "operator": "Grapeshot",
            "company": "Grapeshot",
            "family": "Grapeshot",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "GrapeshotCrawler",
                "Mozilla/5.0 (compatible",
                "GrapeshotCrawler/2.0",
                "+http://www.grapeshot.co.uk/crawler.php)",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML",
                "like Gecko) Version/8.0 Mobile/12F70 Safari/600.1. 4 (compatible"
            ],
            "primary_user_agent": "GrapeshotCrawler",
            "robots_token": "GrapeshotCrawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Grapeshot bot, part of Oracle Advertising, crawles web pages for content analysis and classification.",
            "short_description": "The Grapeshot bot, part of Oracle Advertising, crawles web pages for content analysis and classification.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GrapeshotCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GrapeshotCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "groovinads",
            "name": "Groovinads",
            "slug": "groovinads",
            "url": "https://botcrawl.com/bots/groovinads/",
            "status": "active",
            "operator": "Groovinads",
            "company": "Groovinads",
            "family": "Groovinads",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "GroovinaAdsbot",
                "Mozilla/5.0 (compatible",
                "GroovinaAdsbot/1.0",
                "+https://www.groovinads.com/en/#bot)"
            ],
            "primary_user_agent": "GroovinaAdsbot",
            "robots_token": "GroovinaAdsbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Groovinads bot gathers data from e-commerce websites to support its ad services.",
            "short_description": "The Groovinads bot gathers data from e-commerce websites to support its ad services.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GroovinaAdsbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GroovinaAdsbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.groovinads.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:36Z"
        },
        {
            "id": "gtmetrix",
            "name": "GTmetrix",
            "slug": "gtmetrix",
            "url": "https://botcrawl.com/bots/gtmetrix/",
            "status": "active",
            "operator": "GTmetrix",
            "company": "GTmetrix",
            "family": "GTmetrix",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "GTmetrix",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "GTmetrix https://gtmetrix.com/) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/75.0.3770.100 Safari/537.36",
                "GTmetrix analysis bot"
            ],
            "primary_user_agent": "GTmetrix",
            "robots_token": "GTmetrix",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "GTmetrix is a free tool that analyzes a page's speed performance. Using PageSpeed and YSlow, GTmetrix generates scores for pages and offers actionable recommendations on how to fix them.",
            "short_description": "GTmetrix is a free tool that analyzes a page's speed performance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GTmetrix\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GTmetrix\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://gtmetrix.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "guestpostsbot",
            "name": "GuestpostsBot",
            "slug": "guestpostsbot",
            "url": "https://botcrawl.com/bots/guestpostsbot/",
            "status": "active",
            "operator": "Guest Posts",
            "company": "Guest Posts",
            "family": "Guest Posts",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "GuestpostsBot",
                "Mozilla/5.0 (compatible",
                "GuestpostsBot/2.0",
                "+https://guestposts.com.br/blog/robot/)",
                "GuestpostsBot/"
            ],
            "primary_user_agent": "GuestpostsBot",
            "robots_token": "GuestpostsBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "GuestpostsBot is a Web Crawler that has several functions to facilitate the website owner who has registered his site on the guestposts.com.br platform to monitor his site. The bot constantly tracks the sites registered on the platform in order to check if the partnerships made on the guestpost platform are still active, in addition to validating if the site exists to allow registration and also monitoring the status of the site from time to time to warn the website owner in case of any inoperability.",
            "short_description": "GuestpostsBot is a Web Crawler that has several functions to facilitate the website owner who has registered his site on the guestposts.com.br platform to monitor his…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: GuestpostsBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"GuestpostsBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://guestposts.com.br"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "haiku-searchbot",
            "name": "Haiku-SearchBot",
            "slug": "haiku-searchbot",
            "url": "https://botcrawl.com/bots/haiku-searchbot/",
            "status": "active",
            "operator": "CLERK SAS",
            "company": "CLERK SAS",
            "family": "CLERK",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Haiku-SearchBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko)",
                "compatible",
                "Haiku-SearchBot/1.0",
                "+https://haiku.fr/robots-doc"
            ],
            "primary_user_agent": "Haiku-SearchBot",
            "robots_token": "Haiku-SearchBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Haiku-SearchBot is for user actions in Haiku. When users ask Haiku a question, it may visit a web page to help answer.",
            "short_description": "Haiku-SearchBot is for user actions in Haiku. When users ask Haiku a question, it may visit a web page to help answer.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: Haiku-SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Haiku-SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.haiku.fr/robots-doc"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "hatena",
            "name": "Hatena",
            "slug": "hatena",
            "url": "https://botcrawl.com/bots/hatena/",
            "status": "active",
            "operator": "Hatena",
            "company": "Hatena",
            "family": "Hatena",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Hatena",
                "HatenaBlog-bot/0.02",
                "Hatena::RUssia::Crawler/0.01",
                "Hatena-Favicon/2",
                "Hatena::Scissors/0.01",
                "Hatena Antenna/2",
                "HatenaBookmark/4.0",
                "HatenaBookmark/0.03",
                "Hatena::Fetcher/0.01",
                "Hatena Star UserAgent/2"
            ],
            "primary_user_agent": "Hatena",
            "robots_token": "Hatena",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Hatena's service automatically visits web pages to get the information it needs to use in the service.",
            "short_description": "Hatena's service automatically visits web pages to get the information it needs to use in the service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Hatena\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Hatena\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.hatena.ne.jp/ja/documents/other/apis/useragents/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hellowork",
            "name": "HelloWork",
            "slug": "hellowork",
            "url": "https://botcrawl.com/bots/hellowork/",
            "status": "active",
            "operator": "HelloWork Group",
            "company": "HelloWork Group",
            "family": "HelloWork Group",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "HelloworkJobPostingBot",
                "HelloworkJobPostingBot/1.0"
            ],
            "primary_user_agent": "HelloworkJobPostingBot",
            "robots_token": "HelloworkJobPostingBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "HelloWork is a French job board, and its bot aggregates job listings for its platform. It crawls company career pages and other sources to collect this information.",
            "short_description": "HelloWork is a French job board, and its bot aggregates job listings for its platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HelloworkJobPostingBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HelloworkJobPostingBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.hellowork-group.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "henry-shopping-agent",
            "name": "Henry Shopping Agent",
            "slug": "henry-shopping-agent",
            "url": "https://botcrawl.com/bots/henry-shopping-agent/",
            "status": "active",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Henry Shopping Agent"
            ],
            "primary_user_agent": "Henry Shopping Agent",
            "robots_token": "Henry Shopping Agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Executes checkout via browser automation using a user's card and signed mandate.",
            "verification_method": "Verify the user-agent token and validate against the operator documentation or the Cloudflare verified-bot directory when needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Henry Shopping Agent\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.henrylabs.ai/"
                }
            ],
            "updated_at": "2026-03-31T14:27:09Z"
        },
        {
            "id": "hetrixtools",
            "name": "HetrixTools",
            "slug": "hetrixtools",
            "url": "https://botcrawl.com/bots/hetrixtools/",
            "status": "active",
            "operator": "HetrixTools Inc",
            "company": "HetrixTools Inc",
            "family": "HetrixTools Inc",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "HetrixTools",
                "HetrixTools Uptime Monitoring Bot. https://hetrix.tools/uptime-monitoring-bot.html",
                "HetrixTools uptime monitor"
            ],
            "primary_user_agent": "HetrixTools",
            "robots_token": "HetrixTools",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "HetrixTools offers an Uptime Monitoring service, where our monitoring locations (bots) will check whether our users' websites are online or not. Similar to more known services such as Pingdom or UptimeRobot.",
            "short_description": "HetrixTools offers an Uptime Monitoring service, where our monitoring locations (bots) will check whether our users' websites are online or not.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HetrixTools\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HetrixTools\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://hetrixtools.com/uptime-monitor/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hey-email-privacy-proxy",
            "name": "HEY Email Privacy Proxy",
            "slug": "hey-email-privacy-proxy",
            "url": "https://botcrawl.com/bots/hey-email-privacy-proxy/",
            "status": "active",
            "operator": "HEY",
            "company": "HEY",
            "family": "HEY Email Privacy Proxy",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "hey.com/imageproxy",
                "hey.com/imageproxy Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/134.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "hey.com/imageproxy",
            "robots_token": "hey.com/imageproxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "HEY email stops spy pixels and prevents user IP tracking by proxying all HTML email images, fonts, and external assets",
            "short_description": "HEY email stops spy pixels and prevents user IP tracking by proxying all HTML email images, fonts, and external assets",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: hey.com/imageproxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"hey.com/imageproxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.hey.com/spy-trackers/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hifibot",
            "name": "HIFIBot",
            "slug": "hifibot",
            "url": "https://botcrawl.com/bots/hifibot/",
            "status": "active",
            "operator": "HIFI",
            "company": "HIFI",
            "family": "HIFI",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "HIFIBot",
                "Mozilla/5.0 (compatible",
                "HIFIBot/1",
                "+https://hi.fi)"
            ],
            "primary_user_agent": "HIFIBot",
            "robots_token": "HIFIBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Short Description: HIFI is a financial services company for musicians and professional creators. HIFI acts as an agent on behalf of its clients to automate the retrieval and processing of royalty earnings statements. HIFI’s clients provide access credentials for each of their portal accounts and then HIFI automates the otherwise labor intensive process of logging into each portal, downloading and then processing the relevant CSVs. HIFI analyzes and aggregates the underlying data and presents its clients with a business management comprehensive dashboard.",
            "short_description": "Short Description: HIFI is a financial services company for musicians and professional creators.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HIFIBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HIFIBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://hi.fi/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "honeybadgerbot",
            "name": "honeybadgerbot",
            "slug": "honeybadgerbot",
            "url": "https://botcrawl.com/bots/honeybadgerbot/",
            "status": "active",
            "operator": "Honeybadger Industries",
            "company": "Honeybadger Industries",
            "family": "Honeybadger Industries",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Honeybadger Uptime Check"
            ],
            "primary_user_agent": "Honeybadger Uptime Check",
            "robots_token": "Honeybadger Uptime Check",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "HoneybadgerBot is the bot used by the Honeybadger error and uptime monitoring service.",
            "short_description": "HoneybadgerBot is the bot used by the Honeybadger error and uptime monitoring service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Honeybadger Uptime Check\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Honeybadger Uptime Check\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.honeybadger.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hookdeck",
            "name": "Hookdeck",
            "slug": "hookdeck",
            "url": "https://botcrawl.com/bots/hookdeck/",
            "status": "active",
            "operator": "Hookdeck",
            "company": "Hookdeck",
            "family": "Hookdeck",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "hookdeck"
            ],
            "primary_user_agent": "hookdeck",
            "robots_token": "hookdeck",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "A reliable event gateway for event-driven applications.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"hookdeck\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/hookdeck"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hosttracker",
            "name": "HostTracker",
            "slug": "hosttracker",
            "url": "https://botcrawl.com/bots/hosttracker/",
            "status": "active",
            "operator": "HostTracker",
            "company": "HostTracker",
            "family": "HostTracker",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "HostTracker",
                "Mozilla/5.0 (compatible",
                "HostTracker/2.0",
                "+http://www.host-tracker.com/)"
            ],
            "primary_user_agent": "HostTracker",
            "robots_token": "HostTracker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The HostTracker monitor tracks website availability and performance for their customers.",
            "short_description": "The HostTracker monitor tracks website availability and performance for their customers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HostTracker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HostTracker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.host-tracker.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hotjar",
            "name": "Hotjar",
            "slug": "hotjar",
            "url": "https://botcrawl.com/bots/hotjar/",
            "status": "active",
            "operator": "Hotjar",
            "company": "Hotjar",
            "family": "Hotjar",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Hotjar",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML",
                "like Gecko) Hotjar Version/11.0 Mobile/15E148 Safari/604.1"
            ],
            "primary_user_agent": "Hotjar",
            "robots_token": "Hotjar",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Hotjar provides user analytics and feedback for website owners.",
            "short_description": "Hotjar provides user analytics and feedback for website owners.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Hotjar\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Hotjar\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.hotjar.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hsts-preload-bot",
            "name": "HSTS preload bot",
            "slug": "hsts-preload-bot",
            "url": "https://botcrawl.com/bots/hsts-preload-bot/",
            "status": "active",
            "operator": "Chromium",
            "company": "Chromium",
            "family": "Chromium",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "hstspreload-bot"
            ],
            "primary_user_agent": "hstspreload-bot",
            "robots_token": "hstspreload-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Checks that links can be preloaded for Chromium",
            "short_description": "Checks that links can be preloaded for Chromium",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: hstspreload-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"hstspreload-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://hstspreload.org/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hubspot-crawler",
            "name": "HubSpot Crawler",
            "slug": "hubspot-crawler",
            "url": "https://botcrawl.com/bots/hubspot-crawler/",
            "status": "active",
            "operator": "Hubspot",
            "company": "Hubspot",
            "family": "Hubspot",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "HubSpot Crawler",
                "HubSpot Crawler 1.0 http://www.hubspot.com/"
            ],
            "primary_user_agent": "HubSpot Crawler",
            "robots_token": "HubSpot Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "HubSpot offers a full platform of marketing, sales, customer service, and CRM software — plus the methodology, resources, and support — to help businesses grow better. Get started with free tools, and upgrade as you grow.",
            "short_description": "HubSpot offers a full platform of marketing, sales, customer service, and CRM software — plus the methodology, resources, and support — to help businesses grow better.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HubSpot Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HubSpot Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.hubspot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hubspot-feed-fetcher",
            "name": "HubSpot Feed Fetcher",
            "slug": "hubspot-feed-fetcher",
            "url": "https://botcrawl.com/bots/hubspot-feed-fetcher/",
            "status": "active",
            "operator": "HubSpot",
            "company": "HubSpot",
            "family": "HubSpot",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "HubSpot-FeedFetcher"
            ],
            "primary_user_agent": "HubSpot-FeedFetcher",
            "robots_token": "HubSpot-FeedFetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "When posting to LinkedIn from Hubspot, images need to be pulled through to LinkedIn when published. The crawler performs this function",
            "short_description": "When posting to LinkedIn from Hubspot, images need to be pulled through to LinkedIn when published.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HubSpot-FeedFetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HubSpot-FeedFetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.hubspot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hubspot-page-fetcher",
            "name": "HubSpot Page Fetcher",
            "slug": "hubspot-page-fetcher",
            "url": "https://botcrawl.com/bots/hubspot-page-fetcher/",
            "status": "active",
            "operator": "HubSpot",
            "company": "HubSpot",
            "family": "HubSpot",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "HubSpot Page Fetcher",
                "HubSpot Page Fetcher/1.0 http://www.hubspot.com/ web-crawlers@hubspot.com"
            ],
            "primary_user_agent": "HubSpot Page Fetcher",
            "robots_token": "HubSpot Page Fetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "When posting to LinkedIn from Hubspot, images need to be pulled through to LinkedIn when published. The crawler performs this function",
            "short_description": "When posting to LinkedIn from Hubspot, images need to be pulled through to LinkedIn when published.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: HubSpot Page Fetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"HubSpot Page Fetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.hubspot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "huckabuy-bot",
            "name": "Huckabuy Bot",
            "slug": "huckabuy-bot",
            "url": "https://botcrawl.com/bots/huckabuy-bot/",
            "status": "active",
            "operator": "Huckabuy",
            "company": "Huckabuy",
            "family": "Huckabuy",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Huckabot",
                "Mozilla/5.0 (Linux",
                "Android 6.0.1",
                "Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible",
                "Huckabot/0.0",
                "+https://huckabuy.com/)",
                "Huckabot/"
            ],
            "primary_user_agent": "Huckabot",
            "robots_token": "Huckabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Huckabot is Huckabuy’s main crawler which is utilized by almost all of Huckabuy’s products. The primary purpose of Huckabot is to crawl and index a customer’s website, which is then rendered and optimized with our Dynamic Rendering Product. Several of the Page Speed product boosters, such as Fold Prioritization, also leverage Huckabot in order to optimize and improve a website’s performance.",
            "short_description": "Huckabot is Huckabuy’s main crawler which is utilized by almost all of Huckabuy’s products.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Huckabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Huckabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://huckabuy.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hydrozen",
            "name": "Hydrozen",
            "slug": "hydrozen",
            "url": "https://botcrawl.com/bots/hydrozen/",
            "status": "active",
            "operator": "Hydrozen",
            "company": "Hydrozen",
            "family": "Hydrozen",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "hydrozen"
            ],
            "primary_user_agent": "hydrozen",
            "robots_token": "hydrozen",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Monitors availability of websites, cron jobs, APIs, domains, and SSL endpoints.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"hydrozen\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/hydrozen"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "hype-machine",
            "name": "Hype Machine",
            "slug": "hype-machine",
            "url": "https://botcrawl.com/bots/hype-machine/",
            "status": "active",
            "operator": "Hype Machine",
            "company": "Hype Machine",
            "family": "Hype Machine",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Hype Machine",
                "Hype Machine/4.0 hypem.com (anthony@hypem.com)"
            ],
            "primary_user_agent": "Hype Machine",
            "robots_token": "Hype Machine",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Since 2005, Hype Machine monitors music publications/blogs for posts about new artists and builds playlists using this metadata for listeners.",
            "short_description": "Since 2005, Hype Machine monitors music publications/blogs for posts about new artists and builds playlists using this metadata for listeners.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Hype Machine\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Hype Machine\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://hypem.com/latest"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "ia_archiver",
            "name": "ia_archiver",
            "slug": "ia_archiver",
            "url": "https://botcrawl.com/bots/ia_archiver/",
            "status": "active",
            "operator": "Internet Archive",
            "company": "Internet Archive",
            "family": "Internet Archive",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "ia_archiver",
                "Mozilla/5.0 (compatible",
                "archive.org_bot +http://www.archive.org/details/archive.org_bot)",
                "Internet Archive Archiver"
            ],
            "primary_user_agent": "ia_archiver",
            "robots_token": "ia_archiver",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Internet Archive primary web crawler for the Wayback Machine.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://archive.org.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ia_archiver\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ia_archiver\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://archive.org"
                }
            ],
            "updated_at": "2026-04-01T00:55:21Z"
        },
        {
            "id": "ias-crawler",
            "name": "IAS crawler",
            "slug": "ias-crawler",
            "url": "https://botcrawl.com/bots/ias-crawler/",
            "status": "active",
            "operator": "Integral Ad Science",
            "company": "Integral Ad Science",
            "family": "Integral Ad Science",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ias_crawler",
                "IAS crawler (ias_crawler",
                "http://integralads.com/site-indexing-policy/)"
            ],
            "primary_user_agent": "ias_crawler",
            "robots_token": "ias_crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Integral Ad Science (IAS) is the global market leader in digital ad verification, offering technologies that drive high-quality advertising media.",
            "short_description": "Integral Ad Science (IAS) is the global market leader in digital ad verification, offering technologies that drive high-quality advertising media.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ias_crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ias_crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://integralads.com/site-indexing-policy/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "iboubot",
            "name": "IbouBot",
            "slug": "iboubot",
            "url": "https://botcrawl.com/bots/iboubot/",
            "status": "active",
            "operator": "Babbar",
            "company": "Babbar",
            "family": "Babbar",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "IbouBot",
                "Mozilla/5.0 (compatible",
                "IbouBot/1.0",
                "+bot@ibou.io",
                "+https://ibou.io/iboubot.html)"
            ],
            "primary_user_agent": "IbouBot",
            "robots_token": "IbouBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "IbouBot is the crawler of the Ibou Search Engine",
            "short_description": "IbouBot is the crawler of the Ibou Search Engine",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: IbouBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IbouBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ibou.io/iboubot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "icc-crawler",
            "name": "ICC Crawler",
            "slug": "icc-crawler",
            "url": "https://botcrawl.com/bots/icc-crawler/",
            "status": "active",
            "operator": "NICT",
            "company": "NICT",
            "family": "NICT",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ICC-Crawler",
                "ICC-Crawler/3.0 (Mozilla-compatible",
                "https://ucri.nict.go.jp/en/icccrawler.html)",
                "ICC-Crawler/"
            ],
            "primary_user_agent": "ICC-Crawler",
            "robots_token": "ICC-Crawler",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ICC-Crawler automatically crawls the Internet and collects web pages. ICC-Crawler is operated by the Universal Communication Research Institute at the National Institute of Information and Communications Technology (NICT).",
            "short_description": "ICC-Crawler automatically crawls the Internet and collects web pages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ICC-Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ICC-Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ucri.nict.go.jp"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:37Z"
        },
        {
            "id": "iframely",
            "name": "Iframely",
            "slug": "iframely",
            "url": "https://botcrawl.com/bots/iframely/",
            "status": "active",
            "operator": "Iframely",
            "company": "Iframely",
            "family": "Iframely",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "https://iframely.com/docs/about",
                "Iframely/1.3.1 (+https://iframely.com/docs/about)"
            ],
            "primary_user_agent": "https://iframely.com/docs/about",
            "robots_token": "https://iframely.com/docs/about",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Rich media embed solution",
            "short_description": "Rich media embed solution",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: https://iframely.com/docs/about\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"https://iframely.com/docs/about\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://iframely.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "ifttt-rss-feed-service",
            "name": "IFTTT RSS Feed Service",
            "slug": "ifttt-rss-feed-service",
            "url": "https://botcrawl.com/bots/ifttt-rss-feed-service/",
            "status": "active",
            "operator": "IFTTT RSS Feed Servuce",
            "company": "IFTTT RSS Feed Servuce",
            "family": "IFTTT RSS Feed Servuce",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "IFTTT",
                "IFTTT/1.0 (https://ifttt.com/support)"
            ],
            "primary_user_agent": "IFTTT",
            "robots_token": "IFTTT",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RSS feed fetcher to power user-configured automations",
            "short_description": "RSS feed fetcher to power user-configured automations",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: IFTTT\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IFTTT\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ifttt.com/feed/details"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "imagesiftbot",
            "name": "ImagesiftBot",
            "slug": "imagesiftbot",
            "url": "https://botcrawl.com/bots/imagesiftbot/",
            "status": "active",
            "operator": "Hive",
            "company": "Hive",
            "family": "Hive",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "imagesiftbot"
            ],
            "primary_user_agent": "imagesiftbot",
            "robots_token": "imagesiftbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "AI Crawler",
            "short_description": "Scrapes publicly available images to support Hive's web intelligence products.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"imagesiftbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/imagesiftbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "imagespider",
            "name": "imageSpider",
            "slug": "imagespider",
            "url": "https://botcrawl.com/bots/imagespider/",
            "status": "active",
            "operator": "ByteDance",
            "company": "ByteDance",
            "family": "ByteDance",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "imageSpider",
                "Mozilla/5.0 (compatible",
                "+https://bytedance.com)",
                "ByteDance Image Spider"
            ],
            "primary_user_agent": "imageSpider",
            "robots_token": "imageSpider",
            "verified": "unknown",
            "respects_robots": "no",
            "risk": "high",
            "recommended_action": "block",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "ByteDance image spider used to collect image data for AI training and content platforms.",
            "verification_method": "No official IP-range verification is documented here. Match the exact user-agent and request behavior against ByteDance's published crawler documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: imageSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"imageSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bytedance.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:16Z"
        },
        {
            "id": "indeedjobbot",
            "name": "IndeedJobBot",
            "slug": "indeedjobbot",
            "url": "https://botcrawl.com/bots/indeedjobbot/",
            "status": "active",
            "operator": "Indeed",
            "company": "Indeed",
            "family": "Indeed",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "IndeedJobBot"
            ],
            "primary_user_agent": "IndeedJobBot",
            "robots_token": "IndeedJobBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Indeed's job crawling bot that crawls job and job related information",
            "short_description": "Indeed's job crawling bot that crawls job and job related information",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: IndeedJobBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IndeedJobBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.indeed.com/about/indeed-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "inngest",
            "name": "Inngest",
            "slug": "inngest",
            "url": "https://botcrawl.com/bots/inngest/",
            "status": "active",
            "operator": "Inngest",
            "company": "Inngest",
            "family": "Inngest",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "inngest"
            ],
            "primary_user_agent": "inngest",
            "robots_token": "inngest",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Delivers webhook-style event notifications for event-driven applications.",
            "short_description": "Inngest event-driven platform webhook traffic.",
            "verification_method": "Verified on bots.fyi. Treat as a webhook/service identifier rather than a browser-style crawl signature.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: inngest\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"inngest\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/inngest"
                },
                {
                    "type": "operator",
                    "url": "https://www.inngest.com/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "innguma-fetcher",
            "name": "Innguma Fetcher",
            "slug": "innguma-fetcher",
            "url": "https://botcrawl.com/bots/innguma-fetcher/",
            "status": "active",
            "operator": "Innguma",
            "company": "Innguma",
            "family": "Innguma",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Innguma",
                "Innguma/1.0 (+https://factory.innguma.com/fetcher/)"
            ],
            "primary_user_agent": "Innguma",
            "robots_token": "Innguma",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Innguma fetcher collects and periodically refreshes these user-initiated feeds.",
            "short_description": "Innguma fetcher collects and periodically refreshes these user-initiated feeds.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Innguma\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Innguma\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://factory.innguma.com/fetcher/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "innologica",
            "name": "Innologica",
            "slug": "innologica",
            "url": "https://botcrawl.com/bots/innologica/",
            "status": "active",
            "operator": "Innologica",
            "company": "Innologica",
            "family": "Innologica",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "inoreader.com",
                "Mozilla/5.0 (compatible"
            ],
            "primary_user_agent": "inoreader.com",
            "robots_token": "inoreader.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Inoreader is one of the most popular RSS feed readers used by more than a million people.",
            "short_description": "Inoreader is one of the most popular RSS feed readers used by more than a million people.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: inoreader.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"inoreader.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.inoreader.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "inoreader",
            "name": "Inoreader",
            "slug": "inoreader",
            "url": "https://botcrawl.com/bots/inoreader/",
            "status": "active",
            "operator": "Inoreader",
            "company": "Inoreader",
            "family": "Inoreader",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Inoreader",
                "Mozilla/5.0 (compatible",
                "Inoreader/1.0",
                "+https://www.inoreader.com)",
                "Inoreader Bot"
            ],
            "primary_user_agent": "Inoreader",
            "robots_token": "Inoreader",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Inoreader RSS/Atom feed aggregator that fetches content for its news reader service.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.inoreader.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Inoreader\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Inoreader\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.inoreader.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "instapaper",
            "name": "Instapaper",
            "slug": "instapaper",
            "url": "https://botcrawl.com/bots/instapaper/",
            "status": "active",
            "operator": "Instant Paper",
            "company": "Instant Paper",
            "family": "Instant Paper",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Instapaper",
                "Instapaper/4.0",
                "Instaparser/1.0"
            ],
            "primary_user_agent": "Instapaper",
            "robots_token": "Instapaper",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Instapaper is an app that lets people save articles to read later.",
            "short_description": "Instapaper is an app that lets people save articles to read later.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Instapaper\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Instapaper\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.instapaper.com/publishers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "integromat",
            "name": "Integromat",
            "slug": "integromat",
            "url": "https://botcrawl.com/bots/integromat/",
            "status": "active",
            "operator": "Make",
            "company": "Make",
            "family": "Make",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Integromat",
                "Integromat/production"
            ],
            "primary_user_agent": "Integromat",
            "robots_token": "Integromat",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Integromat\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Integromat\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.make.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "internet-archive",
            "name": "Internet Archive",
            "slug": "internet-archive",
            "url": "https://botcrawl.com/bots/internet-archive/",
            "status": "active",
            "operator": "Internet Archive",
            "company": "Internet Archive",
            "family": "Internet Archive",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "archive.org_bot",
                "Mozilla/5.0 (compatible",
                "special_archiver/3.1.1 +http://www.archive.org/details/archive.org_bot)",
                "archive.org_bot +http://www.archive.org/details/archive.org_bot)",
                "Internet Archive",
                "Wayback Machine crawler"
            ],
            "primary_user_agent": "archive.org_bot",
            "robots_token": "archive.org_bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Internet Archive bot, also known as archive.org_bot, is the web crawler for the Internet Archive's Wayback Machine. It systematically crawls and preserves publicly accessible web pages for historical record.",
            "short_description": "The Internet Archive bot, also known as archive.org_bot, is the web crawler for the Internet Archive's Wayback Machine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: archive.org_bot\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.archive.org/details/archive.org_bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "archive-org_bot",
            "name": "Internet Archive",
            "slug": "archive-org_bot",
            "url": "https://botcrawl.com/bots/archive-org_bot/",
            "status": "active",
            "operator": "Internet Archive",
            "company": "Internet Archive",
            "family": "Internet Archive",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "archive.org_bot",
                "Mozilla/5.0 (compatible",
                "special_archiver/3.1.1 +http://www.archive.org/details/archive.org_bot)",
                "archive.org_bot +http://www.archive.org/details/archive.org_bot)",
                "Internet Archive",
                "Wayback Machine crawler"
            ],
            "primary_user_agent": "archive.org_bot",
            "robots_token": "archive.org_bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "The Internet Archive bot, also known as archive.org_bot, is the web crawler for the Internet Archive's Wayback Machine. It systematically crawls and preserves publicly accessible web pages for historical record.",
            "short_description": "The Internet Archive bot, also known as archive.org_bot, is the web crawler for the Internet Archive's Wayback Machine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: archive.org_bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"archive.org_bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.archive.org/details/archive.org_bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:14Z"
        },
        {
            "id": "internet-archive-archive-it",
            "name": "Internet Archive – Archive-It",
            "slug": "internet-archive-archive-it",
            "url": "https://botcrawl.com/bots/internet-archive-archive-it/",
            "status": "active",
            "operator": "Archive-It",
            "company": "Archive-It",
            "family": "Archive-It",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Archive-It",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "special_archiver",
                "+http://archive-it.org/files/site-owners-special.html) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/130.0.0.0 Safari/537.36",
                "archive.org_bot",
                "+http://archive-it.org/files/site-owners.html) AppleWebKit/537.36 (KHTML",
                "Mozilla/5.0 (compatible",
                "+@http://archive-it.org/files/site-owners-special.html)",
                "+@http://archive-it.org/files/site-owners.html)"
            ],
            "primary_user_agent": "Archive-It",
            "robots_token": "Archive-It",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Internet Archive’s Archive-It service preserves publicly accessible web pages for the historical record.",
            "short_description": "Internet Archive’s Archive-It service preserves publicly accessible web pages for the historical record.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Archive-It\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Archive-It\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://archive-it.org/files/site-owners.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "internetarchivebot",
            "name": "InternetArchiveBot",
            "slug": "internetarchivebot",
            "url": "https://botcrawl.com/bots/internetarchivebot/",
            "status": "active",
            "operator": "Internet Archive",
            "company": "Internet Archive",
            "family": "Internet Archive",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "IABot",
                "IABot/2.0 (+https://meta.wikimedia.org/wiki/InternetArchiveBot/FAQ_for_sysadmins) (Checking if link from Wikipedia is broken and needs removal)",
                "IABot/"
            ],
            "primary_user_agent": "IABot",
            "robots_token": "IABot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "InternetArchiveBot looks for URL references on Wikipedia and assesses if the URL is still alive, or delivering 404s.",
            "short_description": "InternetArchiveBot looks for URL references on Wikipedia and assesses if the URL is still alive, or delivering 404s.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: IABot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IABot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://iabot.toolforge.org"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "ioncrawl",
            "name": "IonCrawl",
            "slug": "ioncrawl",
            "url": "https://botcrawl.com/bots/ioncrawl/",
            "status": "active",
            "operator": "IONOS",
            "company": "IONOS",
            "family": "IONOS",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "IonCrawl",
                "Mozilla/5.0 (compatible",
                "+https://www.ionos.com)",
                "IONOS Crawler"
            ],
            "primary_user_agent": "IonCrawl",
            "robots_token": "IonCrawl",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "IONOS web hosting crawler that gathers intelligence for search functionality and content indexing.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.ionos.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: IonCrawl\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IonCrawl\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.ionos.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "isdownbot",
            "name": "IsDownBot",
            "slug": "isdownbot",
            "url": "https://botcrawl.com/bots/isdownbot/",
            "status": "active",
            "operator": "IsDownBot",
            "company": "IsDownBot",
            "family": "IsDownBot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "IsDownBot",
                "Mozilla/5.0 (compatible",
                "IsDownBot/1.0",
                ")"
            ],
            "primary_user_agent": "IsDownBot",
            "robots_token": "IsDownBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "IsDown monitors endpoints (websites, APIs) to make sure they are up and running",
            "short_description": "IsDown monitors endpoints (websites, APIs) to make sure they are up and running",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: IsDownBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"IsDownBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.isdown.app/custom-monitors/isdownbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "jagged-pixel-uptimebot",
            "name": "Jagged Pixel UptimeBot",
            "slug": "jagged-pixel-uptimebot",
            "url": "https://botcrawl.com/bots/jagged-pixel-uptimebot/",
            "status": "active",
            "operator": "Jagged Pixel Inc.",
            "company": "Jagged Pixel Inc.",
            "family": "Jagged Pixel Inc.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "www.getuptime.co",
                "Mozilla/5.0 (compatible",
                "UptimeBot/1.0",
                "+https://www.getuptime.co)"
            ],
            "primary_user_agent": "www.getuptime.co",
            "robots_token": "www.getuptime.co",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime is a synthetic monitoring tool allowing Shopify merchants to validate key customer flows are not broken after making theme changes.",
            "short_description": "Uptime is a synthetic monitoring tool allowing Shopify merchants to validate key customer flows are not broken after making theme changes.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: www.getuptime.co\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"www.getuptime.co\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://jaggedpixel.co"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "jetpack",
            "name": "Jetpack",
            "slug": "jetpack",
            "url": "https://botcrawl.com/bots/jetpack/",
            "status": "active",
            "operator": "Automattic",
            "company": "Automattic",
            "family": "Automattic",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "jetmon",
                "jetmon/1.0 (Jetpack Site Uptime Monitor by WordPress.com)"
            ],
            "primary_user_agent": "jetmon",
            "robots_token": "jetmon",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime monitor for users of WordPress.com/Jetpack — https://jetpack.com/support/monitor/",
            "short_description": "Uptime monitor for users of WordPress.com/Jetpack — https://jetpack.com/support/monitor/",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: jetmon\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"jetmon\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://automattic.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "jikespider",
            "name": "JikeSpider",
            "slug": "jikespider",
            "url": "https://botcrawl.com/bots/jikespider/",
            "status": "active",
            "operator": "Jike",
            "company": "Jike",
            "family": "Jike",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "JikeSpider"
            ],
            "primary_user_agent": "JikeSpider",
            "robots_token": "JikeSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Jike crawler token surfaced in sogou robots.txt.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: JikeSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"JikeSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:48Z"
        },
        {
            "id": "jobswithgptcom-bot",
            "name": "jobswithgptcom-bot",
            "slug": "jobswithgptcom-bot",
            "url": "https://botcrawl.com/bots/jobswithgptcom-bot/",
            "status": "active",
            "operator": "jobswithgptcom",
            "company": "jobswithgptcom",
            "family": "jobswithgptcom",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "jobswithgptcom",
                "jobswithgptcom-bot"
            ],
            "primary_user_agent": "jobswithgptcom",
            "robots_token": "jobswithgptcom",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Simple crawler focussing on only job postings for job search site.",
            "short_description": "Simple crawler focussing on only job postings for job search site.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: jobswithgptcom\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"jobswithgptcom\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://jobswithgpt.com/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "jugendschutzprogramm-crawler",
            "name": "Jugendschutzprogramm-Crawler",
            "slug": "jugendschutzprogramm-crawler",
            "url": "https://botcrawl.com/bots/jugendschutzprogramm-crawler/",
            "status": "active",
            "operator": "JusProg",
            "company": "JusProg",
            "family": "JusProg",
            "category": "security",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Jugendschutzprogramm-Crawler",
                "Mozilla/5.0 (compatible",
                "+https://www.jugendschutzprogramm.de)",
                "JusProg Crawler"
            ],
            "primary_user_agent": "Jugendschutzprogramm-Crawler",
            "robots_token": "Jugendschutzprogramm-Crawler",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "German youth protection organization crawler that maintains filter lists for child safety.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.jugendschutzprogramm.de.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Jugendschutzprogramm-Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Jugendschutzprogramm-Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.jugendschutzprogramm.de"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "jumio",
            "name": "Jumio",
            "slug": "jumio",
            "url": "https://botcrawl.com/bots/jumio/",
            "status": "active",
            "operator": "Coinbase",
            "company": "Coinbase",
            "family": "Coinbase",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Jumio"
            ],
            "primary_user_agent": "Jumio",
            "robots_token": "Jumio",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Digital ID Verification and Scanning Tool used by Coinbase team.",
            "short_description": "Digital ID Verification and Scanning Tool used by Coinbase team.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Jumio\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Jumio\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://github.com/Jumio/implementation-guides/blob/master/netverify/callback.md"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "jyxobot",
            "name": "Jyxobot",
            "slug": "jyxobot",
            "url": "https://botcrawl.com/bots/jyxobot/",
            "status": "active",
            "operator": "Jyxo",
            "company": "Jyxo",
            "family": "Jyxo",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Jyxobot",
                "Mozilla/5.0 (compatible",
                "Jyxobot/1",
                "+http://www.jyxo.com/bot)",
                "Jyxo Bot"
            ],
            "primary_user_agent": "Jyxobot",
            "robots_token": "Jyxobot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Jyxo search engine crawler that indexes web content for the Jyxo search engine.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.jyxo.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Jyxobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Jyxobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.jyxo.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:37Z"
        },
        {
            "id": "kagi-bot",
            "name": "Kagi Bot",
            "slug": "kagi-bot",
            "url": "https://botcrawl.com/bots/kagi-bot/",
            "status": "active",
            "operator": "Kagi",
            "company": "Kagi",
            "family": "Kagi",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Kagibot",
                "Mozilla/5.0 (compatible",
                "Kagibot/1.0",
                "+https://kagi.com/bot)",
                "Kagibot/"
            ],
            "primary_user_agent": "Kagibot",
            "robots_token": "Kagibot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Kagi Bot is the web crawler for the Kagi search engine. It crawls the web to build its own search index, which supports its ad-free search product.",
            "short_description": "Kagi Bot is the web crawler for the Kagi search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Kagibot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Kagibot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://kagi.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "kagi-fetcher",
            "name": "kagi-fetcher",
            "slug": "kagi-fetcher",
            "url": "https://botcrawl.com/bots/kagi-fetcher/",
            "status": "active",
            "operator": "Kagi",
            "company": "Kagi",
            "family": "Kagi",
            "category": "ai",
            "kind": "unknown",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "kagi-fetcher",
                "Mozilla/5.0 (compatible",
                "kagi-fetcher/1.0",
                "+https://kagi.com/bot)",
                "Kagi Fetcher",
                "Kagi Bot"
            ],
            "primary_user_agent": "kagi-fetcher",
            "robots_token": "kagi-fetcher",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Kagi AI assistant bot that fetches web content to answer user queries.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://kagi.com/bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: kagi-fetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"kagi-fetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://kagi.com/bot"
                }
            ],
            "updated_at": "2026-04-01T00:55:20Z"
        },
        {
            "id": "kakaotalk-scrap",
            "name": "KakaoTalk Scrap",
            "slug": "kakaotalk-scrap",
            "url": "https://botcrawl.com/bots/kakaotalk-scrap/",
            "status": "active",
            "operator": "Kakao",
            "company": "Kakao",
            "family": "KakaoTalk Scrap",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "kakaotalk-scrap",
                "kakaotalk-scrap/1.0",
                "+https://devtalk.kakao.com/t/scrap/33984"
            ],
            "primary_user_agent": "kakaotalk-scrap",
            "robots_token": "kakaotalk-scrap",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The KakaoTalk scrap server collects and processes webpage data to create optimized previews for URLs.",
            "short_description": "The KakaoTalk scrap server collects and processes webpage data to create optimized previews for URLs.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: kakaotalk-scrap\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"kakaotalk-scrap\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.kakao.com/docs/latest/en/message-template/common#scrap"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:38Z"
        },
        {
            "id": "kangaroo-bot",
            "name": "Kangaroo Bot",
            "slug": "kangaroo-bot",
            "url": "https://botcrawl.com/bots/kangaroo-bot/",
            "status": "active",
            "operator": "Kangaroo LLM",
            "company": "Kangaroo LLM",
            "family": "Kangaroo LLM",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "Kangaroo Bot",
                "Mozilla/5.0 (compatible",
                "+https://kangaroo.ai)",
                "KangarooBot"
            ],
            "primary_user_agent": "Kangaroo Bot",
            "robots_token": "Kangaroo Bot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Kangaroo LLM crawler that downloads data to train open source AI models.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://kangaroo.ai.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Kangaroo Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Kangaroo Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://kangaroo.ai"
                }
            ],
            "updated_at": "2026-04-01T00:55:16Z"
        },
        {
            "id": "kargobot-artemis",
            "name": "KargoBot-Artemis",
            "slug": "kargobot-artemis",
            "url": "https://botcrawl.com/bots/kargobot-artemis/",
            "status": "active",
            "operator": "Kargo",
            "company": "Kargo",
            "family": "Kargo",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "KargoBot-Artemis",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML",
                "like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1 (compatible",
                "KargoBot-Artemis-Mobile",
                "+https://www.kargo.com/kargobot-artemis)"
            ],
            "primary_user_agent": "KargoBot-Artemis",
            "robots_token": "KargoBot-Artemis",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "KargoBot-Artemis is Kargo's autonomous content verification bot. It's a simulation of a user on an iOS device. The bot is used to scan sites for content that may be unsuitable for customers on the Kargo ad network.",
            "short_description": "KargoBot-Artemis is Kargo's autonomous content verification bot.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: KargoBot-Artemis\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"KargoBot-Artemis\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://kargo.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "kargobot-artemis-mobile",
            "name": "KargoBot-Artemis-Mobile",
            "slug": "kargobot-artemis-mobile",
            "url": "https://botcrawl.com/bots/kargobot-artemis-mobile/",
            "status": "active",
            "operator": "Kargo",
            "company": "Kargo",
            "family": "Kargo",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "KargoBot-Artemis-Mobile",
                "Mozilla/5.0 (compatible",
                "+https://kargo.com)",
                "Kargo Mobile Bot"
            ],
            "primary_user_agent": "KargoBot-Artemis-Mobile",
            "robots_token": "KargoBot-Artemis-Mobile",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Kargo mobile advertising crawler that collects web content information for ad targeting.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://kargo.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: KargoBot-Artemis-Mobile\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"KargoBot-Artemis-Mobile\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://kargo.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "kb-dk_bot",
            "name": "kb.dk_bot",
            "slug": "kb-dk_bot",
            "url": "https://botcrawl.com/bots/kb-dk_bot/",
            "status": "active",
            "operator": "Netarkivet",
            "company": "Netarkivet",
            "family": "Netarkivet",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "netarkivindsamling",
                "Mozilla/5.0 (compatible",
                "heritrix/3.4.0 +https://www.kb.dk/netarkivindsamling/) Firefox/57"
            ],
            "primary_user_agent": "netarkivindsamling",
            "robots_token": "netarkivindsamling",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Royal Danish Library collects the Danish Internet according to the Danish Legal Deposit Act for research purposes.",
            "short_description": "Royal Danish Library collects the Danish Internet according to the Danish Legal Deposit Act for research purposes.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: netarkivindsamling\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"netarkivindsamling\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.kb.dk/en/find-materials/collections/netarkivet"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "kernel-browsers",
            "name": "Kernel Browsers",
            "slug": "kernel-browsers",
            "url": "https://botcrawl.com/bots/kernel-browsers/",
            "status": "active",
            "operator": "Kernel",
            "company": "Kernel",
            "family": "Kernel",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "kernel",
                "Kernel Browsers"
            ],
            "primary_user_agent": "kernel",
            "robots_token": "kernel",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Runs web agents, automations, and scraping tasks for customers.",
            "short_description": "Browser automation platform traffic on behalf of Kernel customers.",
            "verification_method": "Verified on bots.fyi. Exact browser user-agent strings can vary, so validate traffic context before hard allow-listing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: kernel\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"kernel\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/kernel"
                },
                {
                    "type": "operator",
                    "url": "https://www.kernel.sh/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "kinsta",
            "name": "Kinsta",
            "slug": "kinsta",
            "url": "https://botcrawl.com/bots/kinsta/",
            "status": "active",
            "operator": "Kinsta",
            "company": "Kinsta",
            "family": "Kinsta",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "kinsta-bot"
            ],
            "primary_user_agent": "kinsta-bot",
            "robots_token": "kinsta-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our bot makes requests towards Kinsta-hosted client sites for uptime monitoring purposes as well as to confirm successful domain pointing.",
            "short_description": "Our bot makes requests towards Kinsta-hosted client sites for uptime monitoring purposes as well as to confirm successful domain pointing.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: kinsta-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"kinsta-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://kinsta.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "klaviyo",
            "name": "klaviyo",
            "slug": "klaviyo",
            "url": "https://botcrawl.com/bots/klaviyo/",
            "status": "active",
            "operator": "Klaviyo",
            "company": "Klaviyo",
            "family": "Klaviyo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Klaviyo/1.0"
            ],
            "primary_user_agent": "Klaviyo/1.0",
            "robots_token": "Klaviyo/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The integration with Klaviyo will automatically capture information about who visits your site and views your products including details on what they viewed so you can send super personal follow up emails",
            "short_description": "The integration with Klaviyo will automatically capture information about who visits your site and views your products including details on what they viewed so you can…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Klaviyo/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Klaviyo/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.klaviyo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "klaviyoaibot",
            "name": "KlaviyoAIBot",
            "slug": "klaviyoaibot",
            "url": "https://botcrawl.com/bots/klaviyoaibot/",
            "status": "active",
            "operator": "Klaviyo",
            "company": "Klaviyo",
            "family": "KlaviyoAIBot",
            "category": "ai",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "KlaviyoAIBot"
            ],
            "primary_user_agent": "KlaviyoAIBot",
            "robots_token": "KlaviyoAIBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Klaviyo’s web crawler for its Kai Customer Agent feature.",
            "short_description": "Klaviyo’s web crawler for its Kai Customer Agent feature.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: KlaviyoAIBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"KlaviyoAIBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.klaviyo.com/hc/en-us/articles/40496146232219"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "known-agent",
            "name": "Known Agent",
            "slug": "known-agent",
            "url": "https://botcrawl.com/bots/known-agent/",
            "status": "active",
            "operator": "Known Agents",
            "company": "Known Agents",
            "family": "Known Agents",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Known Agent",
                "Mozilla/5.0 (compatible",
                "Known Agent/1.0",
                "+https://knownagents.com)"
            ],
            "primary_user_agent": "Known Agent",
            "robots_token": "Known Agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Traffic identification and management for AI agent and bot activity.",
            "short_description": "Known Agents helps website owners track, control, and optimize traffic from AI agents and bots.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Known Agent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Known Agent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://knownagents.com"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "laion-huggingface-processor",
            "name": "laion-huggingface-processor",
            "slug": "laion-huggingface-processor",
            "url": "https://botcrawl.com/bots/laion-huggingface-processor/",
            "status": "active",
            "operator": "LAION",
            "company": "LAION",
            "family": "LAION",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "laion-huggingface-processor",
                "Mozilla/5.0 (compatible",
                "+https://laion.ai)",
                "LAION Crawler",
                "HuggingFace LAION"
            ],
            "primary_user_agent": "laion-huggingface-processor",
            "robots_token": "laion-huggingface-processor",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "LAION web crawler operated with Hugging Face that collects data for open AI datasets.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://laion.ai.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: laion-huggingface-processor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"laion-huggingface-processor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://laion.ai"
                }
            ],
            "updated_at": "2026-04-01T00:55:16Z"
        },
        {
            "id": "lcc",
            "name": "LCC",
            "slug": "lcc",
            "url": "https://botcrawl.com/bots/lcc/",
            "status": "active",
            "operator": "University of Leipzig",
            "company": "University of Leipzig",
            "family": "University of Leipzig",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "LCC",
                "Mozilla/5.0 (compatible",
                "+https://wortschatz.uni-leipzig.de/en/download/)",
                "Leipzig Corpora Collection"
            ],
            "primary_user_agent": "LCC",
            "robots_token": "LCC",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "University of Leipzig web crawler that collects text data from websites to build large-scale corpora.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://wortschatz.uni-leipzig.de.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: LCC\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LCC\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wortschatz.uni-leipzig.de"
                }
            ],
            "updated_at": "2026-04-01T00:55:21Z"
        },
        {
            "id": "legalmonster",
            "name": "LegalMonster",
            "slug": "legalmonster",
            "url": "https://botcrawl.com/bots/legalmonster/",
            "status": "active",
            "operator": "Legal Monster ApS",
            "company": "Legal Monster ApS",
            "family": "Legal Monster ApS",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "LegalMonster"
            ],
            "primary_user_agent": "LegalMonster",
            "robots_token": "LegalMonster",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Monitor customers' sites for compliance with cookie- and other privacy-related legislation",
            "short_description": "Monitor customers' sites for compliance with cookie- and other privacy-related legislation",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LegalMonster\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LegalMonster\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.legalmonster.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "leikibot",
            "name": "LeikiBot",
            "slug": "leikibot",
            "url": "https://botcrawl.com/bots/leikibot/",
            "status": "active",
            "operator": "DoubleVerify",
            "company": "DoubleVerify",
            "family": "DoubleVerify",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Leikibot",
                "Mozilla/5.0 (Windows NT 6.3",
                "compatible",
                "Leikibot/1.0",
                "+http://www.leiki.com)"
            ],
            "primary_user_agent": "Leikibot",
            "robots_token": "Leikibot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "LeikiBot, run by DoubleVerify, crawls webpages content for advertisers.",
            "short_description": "LeikiBot, run by DoubleVerify, crawls webpages content for advertisers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Leikibot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Leikibot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.doubleverify.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "lets-encrypt",
            "name": "Let’s Encrypt",
            "slug": "lets-encrypt",
            "url": "https://botcrawl.com/bots/lets-encrypt/",
            "status": "active",
            "operator": "Let's Encrypt",
            "company": "Let's Encrypt",
            "family": "Let's Encrypt",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "https://www.letsencrypt.org",
                "Mozilla/5.0 (compatible",
                "Let's Encrypt validation server",
                "+https://www.letsencrypt.org)"
            ],
            "primary_user_agent": "https://www.letsencrypt.org",
            "robots_token": "https://www.letsencrypt.org",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Let's Encrypt domain control validation service.",
            "short_description": "Let's Encrypt domain control validation service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: https://www.letsencrypt.org\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"https://www.letsencrypt.org\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://letsencrypt.org"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "level9searchbot",
            "name": "Level9SearchBot",
            "slug": "level9searchbot",
            "url": "https://botcrawl.com/bots/level9searchbot/",
            "status": "active",
            "operator": "Level 9",
            "company": "Level 9",
            "family": "Level 9",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Level9SearchBot",
                "Mozilla/5.0 (compatible",
                "Level9SearchBot/2-1.5)",
                "Level9SearchBot/"
            ],
            "primary_user_agent": "Level9SearchBot",
            "robots_token": "Level9SearchBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Level9SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Level9SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://level9.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "library-of-congress-web-archive",
            "name": "Library of Congress Web Archive",
            "slug": "library-of-congress-web-archive",
            "url": "https://botcrawl.com/bots/library-of-congress-web-archive/",
            "status": "active",
            "operator": "United States Library of Congress",
            "company": "United States Library of Congress",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Library of Congress Web Archive",
                "Mozilla/5.0 (...) (+https://www.loc.gov/programs/web-archiving/for-site-owners/)",
                "Library of Congress Web Archiving crawler",
                "LOC web archive"
            ],
            "primary_user_agent": "Library of Congress Web Archive",
            "robots_token": "https://www.loc.gov/programs/web-archiving/for-site-owners/",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Library of Congress research archiving crawler.",
            "verification_method": "Validate against Library of Congress web archiving guidance and identify the documented crawler signature.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: https://www.loc.gov/programs/web-archiving/for-site-owners/\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://radar.cloudflare.com/bots/directory/library-of-congress-web-archiving"
                }
            ],
            "updated_at": "2026-03-31T12:58:10Z"
        },
        {
            "id": "library-of-congress-web-archiving",
            "name": "Library Of Congress Web Archiving",
            "slug": "library-of-congress-web-archiving",
            "url": "https://botcrawl.com/bots/library-of-congress-web-archiving/",
            "status": "active",
            "operator": "United States Library of Congress",
            "company": "United States Library of Congress",
            "family": "United States Library of Congress",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "https://www.loc.gov/programs/web-archiving/for-site-owners/",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/78.0.3904.97 Safari/537.36 (+https://www.loc.gov/programs/web-archiving/for-site-owners/)"
            ],
            "primary_user_agent": "https://www.loc.gov/programs/web-archiving/for-site-owners/",
            "robots_token": "https://www.loc.gov/programs/web-archiving/for-site-owners/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Library of Congress Web Archive manages, preserves, and provides access to archived web content selected by subject experts from across the Library, so that it will be available for researchers today and in the future. More information on the programme here: https://www.loc.gov/programs/web-archiving/about-this-program/ And information about crawling policy here: https://www.loc.gov/programs/web-archiving/for-site-owners/",
            "short_description": "The Library of Congress Web Archive manages, preserves, and provides access to archived web content selected by subject experts from across the Library, so that it will…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: https://www.loc.gov/programs/web-archiving/for-site-owners/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"https://www.loc.gov/programs/web-archiving/for-site-owners/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.loc.gov/programs/web-archiving/for-site-owners"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "liner-bot",
            "name": "LINER Bot",
            "slug": "liner-bot",
            "url": "https://botcrawl.com/bots/liner-bot/",
            "status": "active",
            "operator": "Liner Bot",
            "company": "Liner Bot",
            "family": "Liner Bot",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "LinerBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "LinerBot/1.0",
                "+https://docs.getliner.com/docs/linerbot)"
            ],
            "primary_user_agent": "LinerBot",
            "robots_token": "LinerBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The web pages crawled by Linerbot are used to find the right sources that can generate answers to your questions.",
            "short_description": "The web pages crawled by Linerbot are used to find the right sources that can generate answers to your questions.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LinerBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinerBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.getliner.com/docs/linerbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linespider",
            "name": "Linespider",
            "slug": "linespider",
            "url": "https://botcrawl.com/bots/linespider/",
            "status": "active",
            "operator": "LINE Corporation",
            "company": "LINE Corporation",
            "family": "LINE Corporation",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Linespider"
            ],
            "primary_user_agent": "Linespider",
            "robots_token": "Linespider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Linespider is a Web crawler that provides a wide range of search results for LINE services while complying with the Robots Exclusion Protocol. https://help2.line.me/linesearchbot/web/?contentId=50006055&lang=en",
            "short_description": "Linespider is a Web crawler that provides a wide range of search results for LINE services while complying with the Robots Exclusion Protocol.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Linespider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Linespider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://linecorp.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "link-tiger",
            "name": "Link Tiger",
            "slug": "link-tiger",
            "url": "https://botcrawl.com/bots/link-tiger/",
            "status": "active",
            "operator": "LinkTiger, Inc.",
            "company": "LinkTiger, Inc.",
            "family": "LinkTiger, Inc.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "LinkTiger",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/76.0.3809.87 Safari/537.36 LinkTiger"
            ],
            "primary_user_agent": "LinkTiger",
            "robots_token": "LinkTiger",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "LinkTiger crawls customer sites and reports broken links",
            "short_description": "LinkTiger crawls customer sites and reports broken links",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LinkTiger\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinkTiger\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://linktiger.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linkcheck",
            "name": "LinkCheck",
            "slug": "linkcheck",
            "url": "https://botcrawl.com/bots/linkcheck/",
            "status": "active",
            "operator": "SiteImprove",
            "company": "SiteImprove",
            "family": "SiteImprove",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "LinkCheck by Siteimprove",
                "Mozilla/5.0 (compatible",
                "MSIE 10.0",
                "Windows NT 6.1",
                "Trident/6.0) LinkCheck by Siteimprove",
                "Siteimprove LinkCheck"
            ],
            "primary_user_agent": "LinkCheck by Siteimprove",
            "robots_token": "LinkCheck by Siteimprove",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Siteimprove LinkCheck crawler analyzes and monitors websites for quality assurance, SEO, and accessibility purposes, and keeps website content in line with brand guidelines and organizational policies.",
            "short_description": "The Siteimprove LinkCheck crawler analyzes and monitors websites for quality assurance, SEO, and accessibility purposes, and keeps website content in line with brand…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LinkCheck by Siteimprove\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinkCheck by Siteimprove\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.siteimprove.com/hc/en-gb/articles/115000402352-Siteimprove-s-Crawler-Frequently-Asked-Questions"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linkchecker-bot",
            "name": "LinkChecker Bot",
            "slug": "linkchecker-bot",
            "url": "https://botcrawl.com/bots/linkchecker-bot/",
            "status": "active",
            "operator": "LinkChecker",
            "company": "LinkChecker",
            "family": "LinkChecker",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "linkchecker.pro",
                "Mozilla/5.0 (compatible",
                "LinkCheckerBot/2.0",
                "+https://linkchecker.pro/robot/)",
                "LinkCheckerBot",
                "backlink monitor"
            ],
            "primary_user_agent": "linkchecker.pro",
            "robots_token": "linkchecker.pro",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Web Crawler that monitors the backlinks profile",
            "short_description": "Web Crawler that monitors the backlinks profile",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: linkchecker.pro\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"linkchecker.pro\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://linkchecker.pro/robot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linkdex",
            "name": "Linkdex",
            "slug": "linkdex",
            "url": "https://botcrawl.com/bots/linkdex/",
            "status": "active",
            "operator": "Linkdex",
            "company": "Linkdex",
            "family": "Linkdex",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Linkdex",
                "Mozilla/5.0 (compatible",
                "Linkdex Bot/1.0",
                "+http://www.linkdex.com/bots/)",
                "Linkdex Bot"
            ],
            "primary_user_agent": "Linkdex",
            "robots_token": "Linkdex",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Linkdex enterprise SEO platform crawler for link intelligence and content analysis.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.linkdex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Linkdex\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Linkdex\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.linkdex.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:36Z"
        },
        {
            "id": "linkedinbot",
            "name": "LinkedInBot",
            "slug": "linkedinbot",
            "url": "https://botcrawl.com/bots/linkedinbot/",
            "status": "active",
            "operator": "LinkedIn",
            "company": "LinkedIn",
            "family": "LinkedIn",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "LinkedInBot",
                "LinkedInBot/1.0 (compatible",
                "Mozilla/5.0",
                "+http://www.linkedin.com)",
                "Apache-HttpClient +http://www.linkedin.com)",
                "LinkedIn preview bot"
            ],
            "primary_user_agent": "LinkedInBot",
            "robots_token": "LinkedInBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "LinkedInBot is used by LinkedIn when rendering link preview information.",
            "short_description": "LinkedInBot is used by LinkedIn when rendering link preview information.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LinkedInBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinkedInBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.linkedin.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linksindexerbot",
            "name": "LinksIndexerBot",
            "slug": "linksindexerbot",
            "url": "https://botcrawl.com/bots/linksindexerbot/",
            "status": "active",
            "operator": "Links Indexer",
            "company": "Links Indexer",
            "family": "Links Indexer",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "LinksIndexerBot",
                "Mozilla/5.0 (compatible",
                "LinksIndexerBot/1.0",
                "+http://linksindexer.com/bot)",
                "Links Indexer bot"
            ],
            "primary_user_agent": "LinksIndexerBot",
            "robots_token": "LinksIndexerBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "LinksIndexerBot is an SEO bot that crawls websites to index backlinks and aggregate website summaries.",
            "short_description": "LinksIndexerBot is an SEO bot that crawls websites to index backlinks and aggregate website summaries.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LinksIndexerBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinksIndexerBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://linksindexer.com/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "linkupbot",
            "name": "LinkupBot",
            "slug": "linkupbot",
            "url": "https://botcrawl.com/bots/linkupbot/",
            "status": "active",
            "operator": "Linkup",
            "company": "Linkup",
            "family": "Linkup",
            "category": "ai",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "LinkupBot",
                "Mozilla/5.0 (compatible",
                "LinkupBot/1.0",
                "+https://www.linkup.so/bot)",
                "Linkup Bot"
            ],
            "primary_user_agent": "LinkupBot",
            "robots_token": "LinkupBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Linkup AI search crawler that indexes enterprise web content for its AI search platform.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.linkup.so.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: LinkupBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LinkupBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.linkup.so"
                }
            ],
            "updated_at": "2026-04-01T00:55:19Z"
        },
        {
            "id": "logicmonitor",
            "name": "logicmonitor",
            "slug": "logicmonitor",
            "url": "https://botcrawl.com/bots/logicmonitor/",
            "status": "active",
            "operator": "LogicMonitor",
            "company": "LogicMonitor",
            "family": "LogicMonitor",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "LogicMonitor SiteMonitor/",
                "LogicMonitor SiteMonitor/1.0",
                "LogicMonitor SiteMonitor"
            ],
            "primary_user_agent": "LogicMonitor SiteMonitor/",
            "robots_token": "LogicMonitor SiteMonitor/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The logicmonitor bot performs synthetic monitoring checks on websites and online services to test their availability and performance.",
            "short_description": "The logicmonitor bot performs synthetic monitoring checks on websites and online services to test their availability and performance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LogicMonitor SiteMonitor/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LogicMonitor SiteMonitor/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.logicmonitor.com/support/about-logicmonitor/overview/logicmonitor-public-ip-addresses-dns-names"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "loomly-bot",
            "name": "Loomly Bot",
            "slug": "loomly-bot",
            "url": "https://botcrawl.com/bots/loomly-bot/",
            "status": "active",
            "operator": "Loomly",
            "company": "Loomly",
            "family": "Loomly",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "LoomlyBot"
            ],
            "primary_user_agent": "LoomlyBot",
            "robots_token": "LoomlyBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "LoomlyBot is used to extract metadata from web pages in order to show a social media post preview within Loomly so that clients can see what their social media posts will look like when published.",
            "short_description": "LoomlyBot is used to extract metadata from web pages in order to show a social media post preview within Loomly so that clients can see what their social media posts…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: LoomlyBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"LoomlyBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.loomly.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:39Z"
        },
        {
            "id": "macrobondbot",
            "name": "Macrobondbot",
            "slug": "macrobondbot",
            "url": "https://botcrawl.com/bots/macrobondbot/",
            "status": "active",
            "operator": "Macrobondbot",
            "company": "Macrobondbot",
            "family": "Macrobondbot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Macrobondbot",
                "Mozilla/5.0 (compatible",
                "Macrobondbot +http://redir.macrobond.com/go/bot/)"
            ],
            "primary_user_agent": "Macrobondbot",
            "robots_token": "Macrobondbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Macrobond implements custom web crawlers to fetch macroeconomic data published by official data sources.",
            "short_description": "Macrobond implements custom web crawlers to fetch macroeconomic data published by official data sources.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Macrobondbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Macrobondbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.macrobond.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "magibot",
            "name": "MagiBot",
            "slug": "magibot",
            "url": "https://botcrawl.com/bots/magibot/",
            "status": "active",
            "operator": "Peak Labs",
            "company": "Peak Labs",
            "family": "Peak Labs",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MagiBot",
                "Mozilla/5.0 (compatible",
                "MagiBot/1.0.0",
                "Matarael",
                "+https://magi.com/bots)"
            ],
            "primary_user_agent": "MagiBot",
            "robots_token": "MagiBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MagiBot is owned by Peak Labs which focuses on the research and development of information extraction and retrieval technology to transform knowledge in natural language into immeasurable value.",
            "short_description": "MagiBot is owned by Peak Labs which focuses on the research and development of information extraction and retrieval technology to transform knowledge in natural…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MagiBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MagiBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.peak-labs.com/docs/en/magi/about-magibot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "magnetmebot",
            "name": "MagnetmeBot",
            "slug": "magnetmebot",
            "url": "https://botcrawl.com/bots/magnetmebot/",
            "status": "active",
            "operator": "Magnet.me",
            "company": "Magnet.me",
            "family": "Magnet.me",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Magnet.me-web/",
                "Magnet.me-web/1.0 (+https://magnet.me/bot.html)"
            ],
            "primary_user_agent": "Magnet.me-web/",
            "robots_token": "Magnet.me-web/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MagnetmeBot checks the websites of our paying customers and ensures the job openings are being kept in sync",
            "short_description": "MagnetmeBot checks the websites of our paying customers and ensures the job openings are being kept in sync",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Magnet.me-web/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Magnet.me-web/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://magnet.me/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mailrubot",
            "name": "MailRUBot",
            "slug": "mailrubot",
            "url": "https://botcrawl.com/bots/mailrubot/",
            "status": "active",
            "operator": "Mail Russia",
            "company": "Mail Russia",
            "family": "Mail Russia",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Mail.RU_Bot",
                "Mozilla/5.0 (compatible",
                "Linux x86_64",
                "Mail.RU_Bot/2.0",
                "+http://go.mail.ru/help/robots)"
            ],
            "primary_user_agent": "Mail.RU_Bot",
            "robots_token": "Mail.RU_Bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The mail.ru bot is a mail fetcher on behalf of the Mail.ru email service.",
            "short_description": "The mail.ru bot is a mail fetcher on behalf of the Mail.ru email service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Mail.RU_Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Mail.RU_Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://go.mail.ru/help/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mainwp",
            "name": "MainWP",
            "slug": "mainwp",
            "url": "https://botcrawl.com/bots/mainwp/",
            "status": "active",
            "operator": "Direct Support / Website Managed",
            "company": "Direct Support / Website Managed",
            "family": "Direct Support / Website Managed",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "MainWP",
                "Mozilla/5.0 (compatible",
                "MainWP/4.2.7.1",
                "+http://mainwp.com)",
                "MainWP/"
            ],
            "primary_user_agent": "MainWP",
            "robots_token": "MainWP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website Managed - MainWP Control Dashboard for accessing MainWP child sites.",
            "short_description": "Website Managed - MainWP Control Dashboard for accessing MainWP child sites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MainWP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MainWP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.dshost.com.au/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "majesticbot",
            "name": "MajesticBot",
            "slug": "majesticbot",
            "url": "https://botcrawl.com/bots/majesticbot/",
            "status": "active",
            "operator": "Majestic",
            "company": "Majestic",
            "family": "Majestic",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MajesticBot",
                "Mozilla/5.0 (compatible",
                "+https://majestic.com/support/crawler)",
                "Majestic-12 Bot",
                "Majestic SEO Bot"
            ],
            "primary_user_agent": "MajesticBot",
            "robots_token": "MajesticBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Majestic SEO web crawler that builds the Majestic backlink index for SEO analysis.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://majestic.com/support/crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: MajesticBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MajesticBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://majestic.com/support/crawler"
                }
            ],
            "updated_at": "2026-04-01T00:55:29Z"
        },
        {
            "id": "make-platform",
            "name": "Make Platform",
            "slug": "make-platform",
            "url": "https://botcrawl.com/bots/make-platform/",
            "status": "active",
            "operator": "Make",
            "company": "Make",
            "family": "Make",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Make/production"
            ],
            "primary_user_agent": "Make/production",
            "robots_token": "Make/production",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Make is a no-code platform to help with task and workflow automation.",
            "short_description": "Make is a no-code platform to help with task and workflow automation.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Make/production\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Make/production\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.make.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "managewp",
            "name": "ManageWP",
            "slug": "managewp",
            "url": "https://botcrawl.com/bots/managewp/",
            "status": "active",
            "operator": "ManageWP",
            "company": "ManageWP",
            "family": "ManageWP",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ManageWP"
            ],
            "primary_user_agent": "ManageWP",
            "robots_token": "ManageWP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The ManageWP webhooks integration to manage mulitple Wordpress websites with a single dashboard.",
            "short_description": "The ManageWP webhooks integration to manage mulitple Wordpress websites with a single dashboard.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ManageWP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ManageWP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://managewp.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "manus-bot",
            "name": "Manus Bot",
            "slug": "manus-bot",
            "url": "https://botcrawl.com/bots/manus-bot/",
            "status": "active",
            "operator": "Manus",
            "company": "Manus",
            "family": "Manus",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Manus Bot"
            ],
            "primary_user_agent": "Manus Bot",
            "robots_token": "Manus Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Manus is the action engine that goes beyond answers to execute tasks, automate workflows, and extend your human reach.",
            "short_description": "Manus is the action engine that goes beyond answers to execute tasks, automate workflows, and extend your human reach.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Manus Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Manus Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://manus.im/help/Manus-user"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marfeel-audits-crawler",
            "name": "Marfeel Audits Crawler",
            "slug": "marfeel-audits-crawler",
            "url": "https://botcrawl.com/bots/marfeel-audits-crawler/",
            "status": "active",
            "operator": "Marfeel",
            "company": "Marfeel",
            "family": "Marfeel",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "marfeel-audits",
                "Marfeel Audits Crawler"
            ],
            "primary_user_agent": "marfeel-audits",
            "robots_token": "marfeel-audits",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Marfeel audit crawler that re-crawls traffic URLs to detect structured data, meta tag, and HTML issues.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"marfeel-audits\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/marfeel-audits"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marfeel-flowcards-crawler",
            "name": "Marfeel Flowcards Crawler",
            "slug": "marfeel-flowcards-crawler",
            "url": "https://botcrawl.com/bots/marfeel-flowcards-crawler/",
            "status": "active",
            "operator": "Marfeel",
            "company": "Marfeel",
            "family": "Marfeel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "marfeel-flowcards",
                "Marfeel Flowcards Crawler"
            ],
            "primary_user_agent": "marfeel-flowcards",
            "robots_token": "marfeel-flowcards",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Marfeel crawler that fetches content for Flowcards loaded from specific URLs.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"marfeel-flowcards\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/marfeel-flowcards"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marfeel-preview-crawler",
            "name": "Marfeel Preview Crawler",
            "slug": "marfeel-preview-crawler",
            "url": "https://botcrawl.com/bots/marfeel-preview-crawler/",
            "status": "active",
            "operator": "Marfeel",
            "company": "Marfeel",
            "family": "Marfeel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "marfeel-preview",
                "Marfeel Preview Crawler"
            ],
            "primary_user_agent": "marfeel-preview",
            "robots_token": "marfeel-preview",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Marfeel preview crawler used to render preview experiences for desktop and mobile views.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"marfeel-preview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/marfeel-preview"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marfeel-social-crawler",
            "name": "Marfeel Social Crawler",
            "slug": "marfeel-social-crawler",
            "url": "https://botcrawl.com/bots/marfeel-social-crawler/",
            "status": "active",
            "operator": "Marfeel",
            "company": "Marfeel",
            "family": "Marfeel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "marfeel-social",
                "Marfeel Social Crawler"
            ],
            "primary_user_agent": "marfeel-social",
            "robots_token": "marfeel-social",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Social Media",
            "short_description": "Marfeel crawler used for social experiences across networks such as Facebook, X, Telegram, Reddit, and LinkedIn.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"marfeel-social\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/marfeel-social"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marginalia-search",
            "name": "Marginalia Search",
            "slug": "marginalia-search",
            "url": "https://botcrawl.com/bots/marginalia-search/",
            "status": "active",
            "operator": "Marginalia",
            "company": "Marginalia",
            "family": "Marginalia",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "search.marginalia.nu"
            ],
            "primary_user_agent": "search.marginalia.nu",
            "robots_token": "search.marginalia.nu",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Marginalia Search is a noncommercial niche search engine focusing on old websites, personal websites, and blogs that suffer crippling discoverability problems in today's fiercely SEO-optimized lanscape.",
            "short_description": "Marginalia Search is a noncommercial niche search engine focusing on old websites, personal websites, and blogs that suffer crippling discoverability problems in…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: search.marginalia.nu\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"search.marginalia.nu\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://search.marginalia.nu/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "marketgoo",
            "name": "marketgoo",
            "slug": "marketgoo",
            "url": "https://botcrawl.com/bots/marketgoo/",
            "status": "active",
            "operator": "marketgoo",
            "company": "marketgoo",
            "family": "marketgoo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MarketGoo",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML",
                "like Gecko) Version/7.1 Safari/537.85.10 MarketGoo/2.1",
                "Intel Mac OS X 10.9",
                "rv:29.0) Gecko/20100101 Firefox/29.0 MarketGoo/2.1"
            ],
            "primary_user_agent": "MarketGoo",
            "robots_token": "MarketGoo",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "marketgoo provides white label SEO tools",
            "short_description": "marketgoo provides white label SEO tools",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MarketGoo\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MarketGoo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.marketgoo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mars-finder",
            "name": "Mars Finder",
            "slug": "mars-finder",
            "url": "https://botcrawl.com/bots/mars-finder/",
            "status": "active",
            "operator": "Mars Flag",
            "company": "Mars Flag",
            "family": "Mars Flag",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "(dbot)",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) (dbot)"
            ],
            "primary_user_agent": "(dbot)",
            "robots_token": "(dbot)",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Mars Finder is a website search service designed to utilize the maximum potential of a website. MARS FINDER has held the top share of website search service...",
            "verification_method": "Validate the user-agent pattern, operator documentation, and any published IP ranges before trusting.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: (dbot)\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"(dbot)\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.marsflag.com/ja/marsfinder/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mavifinds-bot",
            "name": "Mavifinds Bot",
            "slug": "mavifinds-bot",
            "url": "https://botcrawl.com/bots/mavifinds-bot/",
            "status": "active",
            "operator": "Mavifinds",
            "company": "Mavifinds",
            "family": "Mavifinds",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Mavifinds | Bot"
            ],
            "primary_user_agent": "Mavifinds | Bot",
            "robots_token": "Mavifinds | Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Mavifinds Bot is part of a security service that monitors websites. It can automatically activate security measures, such as an under attack mode, in response to threats.",
            "short_description": "The Mavifinds Bot is part of a security service that monitors websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Mavifinds | Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Mavifinds | Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://brandimi.com/mavifinds-bot/"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mediaboardbot",
            "name": "mediaboardbot",
            "slug": "mediaboardbot",
            "url": "https://botcrawl.com/bots/mediaboardbot/",
            "status": "active",
            "operator": "Mediaboard",
            "company": "Mediaboard",
            "family": "Mediaboard",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "mediaboardbot",
                "Mozilla/5.0 (compatible",
                "mediaboardbot/1.0",
                "+https://www.mediaboard.com/)"
            ],
            "primary_user_agent": "mediaboardbot",
            "robots_token": "mediaboardbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Mediaboard's web crawler (media monitoring) detects client mentions in online news and public content.",
            "short_description": "Mediaboard's web crawler (media monitoring) detects client mentions in online news and public content.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: mediaboardbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"mediaboardbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://media.mediaboard.com/docs/mediaboardbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "medialogia-bot",
            "name": "Medialogia Bot",
            "slug": "medialogia-bot",
            "url": "https://botcrawl.com/bots/medialogia-bot/",
            "status": "active",
            "operator": "Medialogia",
            "company": "Medialogia",
            "family": "Medialogia",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MedialogiaBot",
                "Mozilla/5.0 (compatible",
                "+http://home.prod.mlg.ru/bots.txt)"
            ],
            "primary_user_agent": "MedialogiaBot",
            "robots_token": "MedialogiaBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Medialogia Bot is the web crawler for Medialogia, a Russian media monitoring company. It collects data from online news and social media for analysis.",
            "short_description": "Medialogia Bot is the web crawler for Medialogia, a Russian media monitoring company.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MedialogiaBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MedialogiaBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mlg.ru/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mediamonitoringbot",
            "name": "MediaMonitoringBot",
            "slug": "mediamonitoringbot",
            "url": "https://botcrawl.com/bots/mediamonitoringbot/",
            "status": "active",
            "operator": "MediaMonitoringBot",
            "company": "MediaMonitoringBot",
            "family": "MediaMonitoringBot",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MediaMonitoringBot",
                "MediaMonitoringBot/1.1 (+https://mediamonitoringbot.com/crawler",
                "crawler@mediamonitoringbot.com)",
                "MediaMonitoringBot/"
            ],
            "primary_user_agent": "MediaMonitoringBot",
            "robots_token": "MediaMonitoringBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MediaMonitoringBot crawls and indexes news and media publishers websites for a new materials and try to match it against keywords provided by our customers (subscribers) and send them updates based on that information.",
            "short_description": "MediaMonitoringBot crawls and indexes news and media publishers websites for a new materials and try to match it against keywords provided by our customers…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MediaMonitoringBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MediaMonitoringBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://mediamonitoringbot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mediatoolkitbot",
            "name": "Mediatoolkitbot",
            "slug": "mediatoolkitbot",
            "url": "https://botcrawl.com/bots/mediatoolkitbot/",
            "status": "active",
            "operator": "Determ",
            "company": "Determ",
            "family": "Determ",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Mediatoolkitbot",
                "Mediatoolkitbot (complaints@mediatoolkit.com)"
            ],
            "primary_user_agent": "Mediatoolkitbot",
            "robots_token": "Mediatoolkitbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Mediatoolkitbot is a media monitoring tool that crawls the open internet looking for phrases Determ users search for, helping marketers find relevant opportunities for advertising.",
            "short_description": "The Mediatoolkitbot is a media monitoring tool that crawls the open internet looking for phrases Determ users search for, helping marketers find relevant opportunities…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Mediatoolkitbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Mediatoolkitbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.determ.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:40Z"
        },
        {
            "id": "mediavine-medatada-parser",
            "name": "Mediavine Medatada Parser",
            "slug": "mediavine-medatada-parser",
            "url": "https://botcrawl.com/bots/mediavine-medatada-parser/",
            "status": "active",
            "operator": "Mediavine",
            "company": "Mediavine",
            "family": "Mediavine",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MediavineMetadataParser",
                "MediavineMetadataParser/7.7.3",
                "MediavineMetadataParser/"
            ],
            "primary_user_agent": "MediavineMetadataParser",
            "robots_token": "MediavineMetadataParser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our App is called Grow and we allow publishers to enable bookmarking, social sharing, and searching on their sites.",
            "short_description": "Our App is called Grow and we allow publishers to enable bookmarking, social sharing, and searching on their sites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MediavineMetadataParser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MediavineMetadataParser/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://mediavine.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mediavine-metadata-parser",
            "name": "Mediavine Medatada Parser",
            "slug": "mediavine-metadata-parser",
            "url": "https://botcrawl.com/bots/mediavine-metadata-parser/",
            "status": "active",
            "operator": "Mediavine",
            "company": "Mediavine",
            "family": "Mediavine",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MediavineMetadataParser",
                "MediavineMetadataParser/7.7.3",
                "MediavineMetadataParser/"
            ],
            "primary_user_agent": "MediavineMetadataParser",
            "robots_token": "MediavineMetadataParser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Our App is called Grow and we allow publishers to enable bookmarking, social sharing, and searching on their sites.",
            "short_description": "Our App is called Grow and we allow publishers to enable bookmarking, social sharing, and searching on their sites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MediavineMetadataParser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MediavineMetadataParser\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://mediavine.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:22Z"
        },
        {
            "id": "meilisearch-scraper",
            "name": "Meilisearch Scraper",
            "slug": "meilisearch-scraper",
            "url": "https://botcrawl.com/bots/meilisearch-scraper/",
            "status": "active",
            "operator": "Meilisearch",
            "company": "Meilisearch",
            "family": "Meilisearch",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Meilisearch Scraper",
                "Mozilla/5.0 (compatible",
                "+https://www.meilisearch.com)",
                "Meilisearch Bot"
            ],
            "primary_user_agent": "Meilisearch Scraper",
            "robots_token": "Meilisearch Scraper",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Meilisearch open-source search engine scraper for indexing website content.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.meilisearch.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Meilisearch Scraper\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Meilisearch Scraper\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.meilisearch.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:38Z"
        },
        {
            "id": "melonmesa-bot",
            "name": "MelonMesa Bot",
            "slug": "melonmesa-bot",
            "url": "https://botcrawl.com/bots/melonmesa-bot/",
            "status": "active",
            "operator": "MelonMesa",
            "company": "MelonMesa",
            "family": "MelonMesa Bot",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "CentComBot",
                "Mozilla/5.0 (compatible",
                "CentComBot/"
            ],
            "primary_user_agent": "CentComBot",
            "robots_token": "CentComBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "This bot is used to aggregate data about a popular online multiplayer game from consenting hosts who have opted-in to this collection. The data that is aggregated is reflected in a panel where players can freely search through the aggregated data. Any modification or deletion of data from the sources (consenting hosts) is reflected within the application's database within 30 minutes. The application scrapes each host for new data every five minutes, with a more thorough check for modified data every 30 minutes.",
            "short_description": "This bot is used to aggregate data about a popular online multiplayer game from consenting hosts who have opted-in to this collection.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: CentComBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"CentComBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://centcom.melonmesa.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "meltwater",
            "name": "Meltwater",
            "slug": "meltwater",
            "url": "https://botcrawl.com/bots/meltwater/",
            "status": "active",
            "operator": "Meltwater",
            "company": "Meltwater",
            "family": "Meltwater",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Meltwater",
                "Meltwater crawler"
            ],
            "primary_user_agent": "Meltwater",
            "robots_token": "Meltwater",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Meltwater media-monitoring crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: Meltwater\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Meltwater\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/seo-tools/meltwater"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "meta-externalads",
            "name": "Meta-ExternalAds",
            "slug": "meta-externalads",
            "url": "https://botcrawl.com/bots/meta-externalads/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "meta-externalads/1.1",
                "meta-externalads/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML",
                "like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1 (compatible",
                "meta-externalads/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler))"
            ],
            "primary_user_agent": "meta-externalads/1.1",
            "robots_token": "meta-externalads/1.1",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Meta-ExternalAds crawler crawls the web for use cases such as improving advertising and other business-related products and services.",
            "short_description": "The Meta-ExternalAds crawler crawls the web for use cases such as improving advertising and other business-related products and services.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: meta-externalads/1.1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"meta-externalads/1.1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "meta-externalagent",
            "name": "Meta-ExternalAgent",
            "slug": "meta-externalagent",
            "url": "https://botcrawl.com/bots/meta-externalagent/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "meta-externalagent",
                "meta-externalagent/1.1",
                "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
                "Meta AI crawler"
            ],
            "primary_user_agent": "meta-externalagent",
            "robots_token": "meta-externalagent",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Use cases such as training AI models or improving products by indexing content directly.",
            "short_description": "Use cases such as training AI models or improving products by indexing content directly.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: meta-externalagent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"meta-externalagent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "meta-externalfetcher",
            "name": "Meta-ExternalFetcher",
            "slug": "meta-externalfetcher",
            "url": "https://botcrawl.com/bots/meta-externalfetcher/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "meta-externalfetcher",
                "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
                "meta-externalfetcher/"
            ],
            "primary_user_agent": "meta-externalfetcher",
            "robots_token": "meta-externalfetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawler receives individual links at the user's initiative to support certain product features.",
            "short_description": "Crawler receives individual links at the user's initiative to support certain product features.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: meta-externalfetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"meta-externalfetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "meta-webindexer",
            "name": "Meta-WebIndexer",
            "slug": "meta-webindexer",
            "url": "https://botcrawl.com/bots/meta-webindexer/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "meta-webindexer/1.1",
                "meta-webindexer/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)"
            ],
            "primary_user_agent": "meta-webindexer/1.1",
            "robots_token": "meta-webindexer/1.1",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: meta-webindexer/1.1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"meta-webindexer/1.1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "metorik",
            "name": "Metorik",
            "slug": "metorik",
            "url": "https://botcrawl.com/bots/metorik/",
            "status": "active",
            "operator": "Metorik",
            "company": "Metorik",
            "family": "Metorik",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Metorik",
                "Metorik API Client/2.0.1"
            ],
            "primary_user_agent": "Metorik",
            "robots_token": "Metorik",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Analytics and email automation service used by eCommerce businesses. Metorik syncs data from customer sites by making API requests to their sites.",
            "short_description": "Analytics and email automation service used by eCommerce businesses.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Metorik\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Metorik\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://metorik.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mgidbot",
            "name": "MgidBot",
            "slug": "mgidbot",
            "url": "https://botcrawl.com/bots/mgidbot/",
            "status": "active",
            "operator": "MGID",
            "company": "MGID",
            "family": "MGID",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MgidBot",
                "MgidBot 1.0"
            ],
            "primary_user_agent": "MgidBot",
            "robots_token": "MgidBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MgidBot is used for detecting context categories of the content for advertising recommendations.",
            "short_description": "MgidBot is used for detecting context categories of the content for advertising recommendations.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MgidBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MgidBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mgid.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "microsoftpreview",
            "name": "Microsoft Preview",
            "slug": "microsoftpreview",
            "url": "https://botcrawl.com/bots/microsoftpreview/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MicrosoftPreview",
                "Mozilla/5.0 (compatible",
                "MicrosoftPreview/2.0",
                "+https://aka.ms/MicrosoftPreview)",
                "MicrosoftPreview/"
            ],
            "primary_user_agent": "MicrosoftPreview",
            "robots_token": "MicrosoftPreview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "MicrosoftPreview generates page snapshots for Microsoft products.",
            "short_description": "MicrosoftPreview generates page snapshots for Microsoft products.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MicrosoftPreview\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MicrosoftPreview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:23Z"
        },
        {
            "id": "microsoft-preview",
            "name": "Microsoft Preview",
            "slug": "microsoft-preview",
            "url": "https://botcrawl.com/bots/microsoft-preview/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MicrosoftPreview",
                "Mozilla/5.0 (compatible",
                "MicrosoftPreview/2.0",
                "+https://aka.ms/MicrosoftPreview)",
                "MicrosoftPreview/"
            ],
            "primary_user_agent": "MicrosoftPreview",
            "robots_token": "MicrosoftPreview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MicrosoftPreview generates page snapshots for Microsoft products.",
            "short_description": "MicrosoftPreview generates page snapshots for Microsoft products.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MicrosoftPreview\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mirrorwebcrawler",
            "name": "MirrorWebCrawler",
            "slug": "mirrorwebcrawler",
            "url": "https://botcrawl.com/bots/mirrorwebcrawler/",
            "status": "active",
            "operator": "MirrorWeb Ltd",
            "company": "MirrorWeb Ltd",
            "family": "MirrorWeb Ltd",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "mirrorweb",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/78.0.3904.97 Safari/537.36 +https://www.mirrorweb.com"
            ],
            "primary_user_agent": "mirrorweb",
            "robots_token": "mirrorweb",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We are a commercial web archiving supplier providing archival solutions for the financial and public sector.",
            "short_description": "We are a commercial web archiving supplier providing archival solutions for the financial and public sector.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: mirrorweb\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"mirrorweb\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mirrorweb.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "missinglettr-bot",
            "name": "Missinglettr Bot",
            "slug": "missinglettr-bot",
            "url": "https://botcrawl.com/bots/missinglettr-bot/",
            "status": "active",
            "operator": "Missinglettr",
            "company": "Missinglettr",
            "family": "Missinglettr",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "MissinglettrBot",
                "Mozilla/5.0 (compatible",
                "MissinglettrBot/2.0",
                "+http://missinglettr.com/bot/)"
            ],
            "primary_user_agent": "MissinglettrBot",
            "robots_token": "MissinglettrBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Missinglettr will crawl specific blog posts on customers' sites to help turn them into social media campaigns.",
            "short_description": "Missinglettr will crawl specific blog posts on customers' sites to help turn them into social media campaigns.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MissinglettrBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MissinglettrBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://missinglettr.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mistralai-index",
            "name": "MistralAI-Index",
            "slug": "mistralai-index",
            "url": "https://botcrawl.com/bots/mistralai-index/",
            "status": "active",
            "operator": "Mistral AI",
            "company": "Mistral AI",
            "family": "Mistral AI",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "MistralAI-Index",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "MistralAI-Index/1.0",
                "+https://docs.mistral.ai/robots)",
                "Mistral search indexer"
            ],
            "primary_user_agent": "MistralAI-Index",
            "robots_token": "MistralAI-Index",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Mistral indexing crawler for Le Chat search.",
            "verification_method": "Verify the user-agent together with Mistral's published IP ranges for MistralAI-Index.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://mistral.ai/mistralai-index-ips.json",
            "rules": {
                "robots": "User-agent: MistralAI-Index\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MistralAI-Index\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.mistral.ai/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mistralai-user",
            "name": "MistralAI-User",
            "slug": "mistralai-user",
            "url": "https://botcrawl.com/bots/mistralai-user/",
            "status": "active",
            "operator": "Mistral AI",
            "company": "Mistral AI",
            "family": "Mistral AI",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "MistralAI-User",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "MistralAI-User/1.0",
                "+https://docs.mistral.ai/robots)",
                "Mistral user fetcher"
            ],
            "primary_user_agent": "MistralAI-User",
            "robots_token": "MistralAI-User",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "Bot for user actions in le Chat by Mistral AI, for instance when asked to open a web page.",
            "short_description": "User-triggered Mistral fetcher for Le Chat web access.",
            "verification_method": "Verify the user-agent together with Mistral's published IP ranges for MistralAI-User.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://mistral.ai/mistralai-user-ips.json",
            "rules": {
                "robots": "User-agent: MistralAI-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MistralAI-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.mistral.ai/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mixrankbot",
            "name": "MixrankBot",
            "slug": "mixrankbot",
            "url": "https://botcrawl.com/bots/mixrankbot/",
            "status": "active",
            "operator": "MixRank",
            "company": "MixRank",
            "family": "MixRank",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MixrankBot",
                "Mozilla/5.0 (compatible",
                "+https://mixrank.com/bot)",
                "MixRank Bot"
            ],
            "primary_user_agent": "MixrankBot",
            "robots_token": "MixrankBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "MixRank web crawler that collects data about websites, companies, mobile apps, and technologies.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://mixrank.com/bot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: MixrankBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MixrankBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://mixrank.com/bot"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "mj12bot",
            "name": "MJ12Bot",
            "slug": "mj12bot",
            "url": "https://botcrawl.com/bots/mj12bot/",
            "status": "active",
            "operator": "Majestic",
            "company": "Majestic",
            "family": "Majestic",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MJ12bot",
                "Mozilla/5.0 (compatible",
                "MJ12bot/v1.4.8",
                "http://mj12bot.com/)"
            ],
            "primary_user_agent": "MJ12bot",
            "robots_token": "MJ12bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MJ12bot is the web crawler for Majestic. MJ12Bot does not currently cache web content or personal data. Instead it maps the link relationships between websites to build a search engine. This data is available to technologies and the public, either by searching for a keyword or a website at Majestic.",
            "short_description": "MJ12bot is the web crawler for Majestic.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MJ12bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MJ12bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://majestic.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "modular-ds",
            "name": "Modular DS",
            "slug": "modular-ds",
            "url": "https://botcrawl.com/bots/modular-ds/",
            "status": "active",
            "operator": "Uniqoders Technologies SL",
            "company": "Uniqoders Technologies SL",
            "family": "Uniqoders Technologies SL",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ModularConnector",
                "ModularConnector/1.0 (Linux)",
                "ModularConnector/"
            ],
            "primary_user_agent": "ModularConnector",
            "robots_token": "ModularConnector",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Service to manage WordPress websites via the WP JSON API.",
            "short_description": "Service to manage WordPress websites via the WP JSON API.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ModularConnector\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ModularConnector\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://uniqoders.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mojeek",
            "name": "Mojeek",
            "slug": "mojeek",
            "url": "https://botcrawl.com/bots/mojeek/",
            "status": "active",
            "operator": "Mojeek",
            "company": "Mojeek",
            "family": "Mojeek",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MojeekBot",
                "Mozilla/5.0 (compatible",
                "MojeekBot/0.6",
                "+https://www.mojeek.com/bot.html)"
            ],
            "primary_user_agent": "MojeekBot",
            "robots_token": "MojeekBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Details and information for webmasters regarding Mojeekbot, the web crawler for the Mojeek search engine.",
            "short_description": "Details and information for webmasters regarding Mojeekbot, the web crawler for the Mojeek search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MojeekBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MojeekBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mojeek.com/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "mollie-bot",
            "name": "Mollie Bot",
            "slug": "mollie-bot",
            "url": "https://botcrawl.com/bots/mollie-bot/",
            "status": "active",
            "operator": "Mollie B.V.",
            "company": "Mollie B.V.",
            "family": "Mollie B.V.",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mollie HTTP client/",
                "Mollie HTTP client/1.0"
            ],
            "primary_user_agent": "Mollie HTTP client/",
            "robots_token": "Mollie HTTP client/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Mollie B.V. is a payment service provider. We use webhooks to notify our merchants about updates to their payments.",
            "short_description": "Mollie B.V.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Mollie HTTP client/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Mollie HTTP client/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mollie.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "momenticbot",
            "name": "MomenticBot",
            "slug": "momenticbot",
            "url": "https://botcrawl.com/bots/momenticbot/",
            "status": "active",
            "operator": "Momentic",
            "company": "Momentic",
            "family": "Momentic",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "momenticbot"
            ],
            "primary_user_agent": "momenticbot",
            "robots_token": "momenticbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "AI-powered software testing bot that runs end-to-end tests against web applications.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"momenticbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/momenticbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "monitage",
            "name": "Monitage",
            "slug": "monitage",
            "url": "https://botcrawl.com/bots/monitage/",
            "status": "active",
            "operator": "Monitage",
            "company": "Monitage",
            "family": "Monitage",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Monitage",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "MonitageUptime/1.1",
                "+http://monitage.com/uptime) Chrome/136.0 Safari/537.36"
            ],
            "primary_user_agent": "Monitage",
            "robots_token": "Monitage",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime monitoring bot, part ofclicky.com web analytics",
            "short_description": "Uptime monitoring bot, part ofclicky.com web analytics",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Monitage\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Monitage\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://monitage.com/uptime"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "monitis",
            "name": "Monitis",
            "slug": "monitis",
            "url": "https://botcrawl.com/bots/monitis/",
            "status": "active",
            "operator": "Monitis",
            "company": "Monitis",
            "family": "Monitis",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "monitis",
                "Mozilla/5.0 (compatible",
                "monitis - premium monitoring service",
                "http://www.monitis.com)"
            ],
            "primary_user_agent": "monitis",
            "robots_token": "monitis",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Monitis HTTP Monitoring Probe.",
            "short_description": "The Monitis HTTP Monitoring Probe.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: monitis\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"monitis\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.monitis.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "monitorss",
            "name": "MonitoRSS",
            "slug": "monitorss",
            "url": "https://botcrawl.com/bots/monitorss/",
            "status": "active",
            "operator": "MonitoRSS",
            "company": "MonitoRSS",
            "family": "MonitoRSS",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MonitoRSS",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64",
                "rv:89.0) Gecko/20100101 Firefox/89.0 MonitoRSS/1.0",
                "MonitoRSS/"
            ],
            "primary_user_agent": "MonitoRSS",
            "robots_token": "MonitoRSS",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MonitoRSS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MonitoRSS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://monitorss.xyz"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "monsidobot",
            "name": "Monsidobot",
            "slug": "monsidobot",
            "url": "https://botcrawl.com/bots/monsidobot/",
            "status": "active",
            "operator": "Acquia",
            "company": "Acquia",
            "family": "Acquia",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Monsidobot",
                "Mozilla/5.0 (compatible",
                "Monsidobot/2.2",
                "+http://monsido.com/bot.html",
                "info@monsido.com)"
            ],
            "primary_user_agent": "Monsidobot",
            "robots_token": "Monsidobot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Scanning tool that helps perfect content and improve web accessibility.",
            "short_description": "Scanning tool that helps perfect content and improve web accessibility.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Monsidobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Monsidobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.acquia.com/acquia-optimize/getting-started/data-hosting-and-security"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:41Z"
        },
        {
            "id": "monspark",
            "name": "MonSpark",
            "slug": "monspark",
            "url": "https://botcrawl.com/bots/monspark/",
            "status": "active",
            "operator": "MonSpark",
            "company": "MonSpark",
            "family": "MonSpark",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MonSpark",
                "Mozilla/5.0+(compatible",
                "MonSpark/1.0",
                "http://www.monspark.com/)",
                "MonSpark/"
            ],
            "primary_user_agent": "MonSpark",
            "robots_token": "MonSpark",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MonSpark is a website monitoring service. Its monitoring bot checks website availability, network conditions and TLS certificate validity.",
            "short_description": "MonSpark is a website monitoring service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MonSpark\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MonSpark\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://monspark.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "montasticmonitor",
            "name": "MontasticMonitor",
            "slug": "montasticmonitor",
            "url": "https://botcrawl.com/bots/montasticmonitor/",
            "status": "active",
            "operator": "Montastic",
            "company": "Montastic",
            "family": "Montastic",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "montastic-monitor",
                "montastic-monitor http://www.montastic.com"
            ],
            "primary_user_agent": "montastic-monitor",
            "robots_token": "montastic-monitor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The MontasticMonitor bot monitors website avaiability.",
            "short_description": "The MontasticMonitor bot monitors website avaiability.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: montastic-monitor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"montastic-monitor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.montastic.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "motominerbot",
            "name": "MotoMinerBot",
            "slug": "motominerbot",
            "url": "https://botcrawl.com/bots/motominerbot/",
            "status": "active",
            "operator": "Motominer",
            "company": "Motominer",
            "family": "Motominer",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MotoMinerBot",
                "Mozilla/5.0 (compatible",
                "MotoMinerBot/1.0",
                "+https://motominer.com/Bot)"
            ],
            "primary_user_agent": "MotoMinerBot",
            "robots_token": "MotoMinerBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MotoMinerBot is MotoMiner's web crawling bot. All vehicle detail pages we index are searchable via MotoMiner's search engine.",
            "short_description": "MotoMinerBot is MotoMiner's web crawling bot.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MotoMinerBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MotoMinerBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://motominer.com/Bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "moz",
            "name": "Moz",
            "slug": "moz",
            "url": "https://botcrawl.com/bots/moz/",
            "status": "active",
            "operator": "Moz",
            "company": "Moz",
            "family": "Moz",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "rogerbot",
                "Mozilla/5.0 (compatible",
                "rogerbot/1.2",
                "+https://moz.com/help/guides/moz-procedures/what-is-rogerbot)",
                "Moz Bot",
                "Rogerbot"
            ],
            "primary_user_agent": "rogerbot",
            "robots_token": "rogerbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Moz web crawler that powers the Moz SEO toolset and domain authority metrics.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://moz.com/help/guides/moz-procedures/what-is-rogerbot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: rogerbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"rogerbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://moz.com/help/guides/moz-procedures/what-is-rogerbot"
                }
            ],
            "updated_at": "2026-04-01T00:55:36Z"
        },
        {
            "id": "dotbot",
            "name": "Moz dotbot",
            "slug": "dotbot",
            "url": "https://botcrawl.com/bots/dotbot/",
            "status": "active",
            "operator": "Moz",
            "company": "Moz",
            "family": "Moz",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DotBot",
                "Mozilla/5.0 (compatible",
                "DotBot/1.1",
                "http://www.opensiteexplorer.org/dotbot",
                "help@moz.com)",
                "Moz dotbot"
            ],
            "primary_user_agent": "DotBot",
            "robots_token": "DotBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Dotbot is Moz's web crawler, it gathers web data for the Moz Link Index. This data we collect through Dotbot is available in the Links section of your Moz Pro campaign, Link Explorer, and the Moz Links API.",
            "short_description": "Dotbot is Moz's web crawler, it gathers web data for the Moz Link Index.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: DotBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"DotBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://moz.com/link-explorer"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:25Z"
        },
        {
            "id": "moz-dotbot",
            "name": "Moz dotbot",
            "slug": "moz-dotbot",
            "url": "https://botcrawl.com/bots/moz-dotbot/",
            "status": "active",
            "operator": "Moz",
            "company": "Moz",
            "family": "Moz",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "DotBot",
                "Mozilla/5.0 (compatible",
                "DotBot/1.1",
                "http://www.opensiteexplorer.org/dotbot",
                "help@moz.com)",
                "Moz dotbot"
            ],
            "primary_user_agent": "DotBot",
            "robots_token": "DotBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Dotbot is Moz's web crawler, it gathers web data for the Moz Link Index. This data we collect through Dotbot is available in the Links section of your Moz Pro campaign, Link Explorer, and the Moz Links API.",
            "short_description": "Dotbot is Moz's web crawler, it gathers web data for the Moz Link Index.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: DotBot\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://moz.com/link-explorer"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "moz-rogerbot",
            "name": "Moz rogerbot",
            "slug": "moz-rogerbot",
            "url": "https://botcrawl.com/bots/moz-rogerbot/",
            "status": "active",
            "operator": "Moz",
            "company": "Moz",
            "family": "Moz",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "rogerbot-crawler",
                "rogerbot/1.2 (https://moz.com/help/guides/moz-procedures/what-is-rogerbot",
                "rogerbot-crawler+aardwolf-production-crawler-01@moz.com)",
                "Moz rogerbot",
                "rogerbot"
            ],
            "primary_user_agent": "rogerbot-crawler",
            "robots_token": "rogerbot-crawler",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Rogerbot is Moz's site audit crawler for Moz Pro Campaigns.",
            "short_description": "Rogerbot is Moz's site audit crawler for Moz Pro Campaigns.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: rogerbot-crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"rogerbot-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://moz.com/help/moz-procedures/crawlers/rogerbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "mrgbot",
            "name": "MRGbot",
            "slug": "mrgbot",
            "url": "https://botcrawl.com/bots/mrgbot/",
            "status": "active",
            "operator": "MRG Web Services srl",
            "company": "MRG Web Services srl",
            "family": "MRG Web Services srl",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "MRGbot",
                "Mozilla/5.0 (compatible",
                "MRGbot/1.0",
                "+https://www.mrg.ro/bot.html)",
                "MRGbot/"
            ],
            "primary_user_agent": "MRGbot",
            "robots_token": "MRGbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Search engine aimed at generating a corpus of data to be able to aggregate data in various ways.",
            "short_description": "Search engine aimed at generating a corpus of data to be able to aggregate data in various ways.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: MRGbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MRGbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mrg.ro"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "msnbot",
            "name": "MSN",
            "slug": "msnbot",
            "url": "https://botcrawl.com/bots/msnbot/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "msnbot",
                "msnbot/1.1 ( http://search.msn.com/msnbot.htm)",
                "msnbot/2.0b (+http://search.msn.com/msnbot.htm)",
                "msnbot/"
            ],
            "primary_user_agent": "msnbot",
            "robots_token": "msnbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "MSNBot was the web crawler for Microsoft's MSN Search, which has since been replaced by Bing. Its purpose was to index web pages for inclusion in the MSN search engine.",
            "short_description": "MSNBot was the web crawler for Microsoft's MSN Search, which has since been replaced by Bing.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: msnbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"msnbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:26Z"
        },
        {
            "id": "msn",
            "name": "MSN",
            "slug": "msn",
            "url": "https://botcrawl.com/bots/msn/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "msnbot",
                "msnbot/1.1 ( http://search.msn.com/msnbot.htm)",
                "msnbot/2.0b (+http://search.msn.com/msnbot.htm)",
                "msnbot/"
            ],
            "primary_user_agent": "msnbot",
            "robots_token": "msnbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "MSNBot was the web crawler for Microsoft's MSN Search, which has since been replaced by Bing. Its purpose was to index web pages for inclusion in the MSN search engine.",
            "short_description": "MSNBot was the web crawler for Microsoft's MSN Search, which has since been replaced by Bing.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: msnbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"msnbot/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "muck-rack-scraper",
            "name": "Muck Rack Scraper",
            "slug": "muck-rack-scraper",
            "url": "https://botcrawl.com/bots/muck-rack-scraper/",
            "status": "active",
            "operator": "Muck Rack",
            "company": "Muck Rack",
            "family": "Muck Rack",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "muckrack.com",
                "Mozilla/5.0 (compatible",
                "MuckRack/1.0",
                "+https://muckrack.com)",
                "MuckRackFeedParser/1.0 +https://muckrack.com"
            ],
            "primary_user_agent": "muckrack.com",
            "robots_token": "muckrack.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Muck Rack uses many approaches for source discovery such as RSS feeds, sitemaps, and other structured formats",
            "short_description": "Muck Rack uses many approaches for source discovery such as RSS feeds, sitemaps, and other structured formats",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: muckrack.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"muckrack.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://muckrack.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "muckrack",
            "name": "MuckRack",
            "slug": "muckrack",
            "url": "https://botcrawl.com/bots/muckrack/",
            "status": "active",
            "operator": "Muck Rack",
            "company": "Muck Rack",
            "family": "Muck Rack",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "MuckRack",
                "Mozilla/5.0 (compatible",
                "+https://muckrack.com)",
                "Muck Rack Bot"
            ],
            "primary_user_agent": "MuckRack",
            "robots_token": "MuckRack",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Muck Rack media monitoring crawler that gathers information about media coverage and journalist profiles.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://muckrack.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: MuckRack\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"MuckRack\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://muckrack.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "nanointeractive",
            "name": "NanoInteractive",
            "slug": "nanointeractive",
            "url": "https://botcrawl.com/bots/nanointeractive/",
            "status": "active",
            "operator": "Nano Interactive",
            "company": "Nano Interactive",
            "family": "Nano Interactive",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NanoInteractive",
                "Mozilla/5.0 (compatible",
                "+https://www.nanointeractive.com)",
                "Nano Interactive Bot"
            ],
            "primary_user_agent": "NanoInteractive",
            "robots_token": "NanoInteractive",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Nano Interactive digital advertising crawler for audience creation and ad targeting.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.nanointeractive.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: NanoInteractive\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NanoInteractive\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.nanointeractive.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "nava-labs-asp-dev",
            "name": "Nava Labs ASP (Dev)",
            "slug": "nava-labs-asp-dev",
            "url": "https://botcrawl.com/bots/nava-labs-asp-dev/",
            "status": "active",
            "operator": "Nava Labs",
            "company": "Nava Labs",
            "family": "Nava",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Nava/"
            ],
            "primary_user_agent": "Nava/",
            "robots_token": "Nava/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot to assist social workers in navigating safety net benefit websites.",
            "short_description": "Bot to assist social workers in navigating safety net benefit websites.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: Nava/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nava/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://dev.labs-asp.navateam.com/bot-disclosure"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "naver-blueno",
            "name": "naver-blueno",
            "slug": "naver-blueno",
            "url": "https://botcrawl.com/bots/naver-blueno/",
            "status": "active",
            "operator": "Naver",
            "company": "Naver",
            "family": "Naver",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "naver-blueno"
            ],
            "primary_user_agent": "naver-blueno",
            "robots_token": "naver-blueno",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Naver's preview-snippet crawler that fetches titles, descriptions, and images when users insert links in Naver services.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"naver-blueno\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/naver-blueno"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "naverbot",
            "name": "naverbot",
            "slug": "naverbot",
            "url": "https://botcrawl.com/bots/naverbot/",
            "status": "active",
            "operator": "Naver",
            "company": "Naver",
            "family": "Naver",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "naverbot"
            ],
            "primary_user_agent": "naverbot",
            "robots_token": "naverbot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Search Engine Crawler",
            "short_description": "Naver's web crawler, also known as Yeti, used to crawl and index web content.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"naverbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/naverbot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "neevabot",
            "name": "Neevabot",
            "slug": "neevabot",
            "url": "https://botcrawl.com/bots/neevabot/",
            "status": "active",
            "operator": "Neeva",
            "company": "Neeva",
            "family": "Neeva",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Neevabot",
                "Mozilla/5.0 (compatible",
                "Neevabot/1.0",
                "+https://neeva.com/neevabot)"
            ],
            "primary_user_agent": "Neevabot",
            "robots_token": "Neevabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Neevabot is the web crawler for the search engine neeva.com.",
            "short_description": "Neevabot is the web crawler for the search engine neeva.com.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Neevabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Neevabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://neeva.com/neevabot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "nekuda-payment-executor",
            "name": "Nekuda Payment Executor",
            "slug": "nekuda-payment-executor",
            "url": "https://botcrawl.com/bots/nekuda-payment-executor/",
            "status": "active",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Nekuda Payment Executor"
            ],
            "primary_user_agent": "Nekuda Payment Executor",
            "robots_token": "Nekuda Payment Executor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Executes secure checkout via browser agent using user card and signed mandate.",
            "verification_method": "Verify the user-agent token and validate against the operator documentation or the Cloudflare verified-bot directory when needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Nekuda Payment Executor\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.nekuda.ai/payment-executor"
                }
            ],
            "updated_at": "2026-03-31T14:31:48Z"
        },
        {
            "id": "netcraft",
            "name": "Netcraft",
            "slug": "netcraft",
            "url": "https://botcrawl.com/bots/netcraft/",
            "status": "active",
            "operator": "Netcraft",
            "company": "Netcraft",
            "family": "Netcraft",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Netcraft",
                "Mozilla/5.0 (compatible",
                "NetcraftSurveyAgent/1.0",
                "+info@netcraft.com)",
                "Mozilla/4.0 (compatible",
                "Netcraft Web Server Survey)"
            ],
            "primary_user_agent": "Netcraft",
            "robots_token": "Netcraft",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Netcraft Survey Agent is a bot that analyzes web server technology stacks for their Web Server Survey.",
            "short_description": "The Netcraft Survey Agent is a bot that analyzes web server technology stacks for their Web Server Survey.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Netcraft\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Netcraft\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.netcraft.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "netcraftsurveyagent",
            "name": "NetcraftSurveyAgent",
            "slug": "netcraftsurveyagent",
            "url": "https://botcrawl.com/bots/netcraftsurveyagent/",
            "status": "active",
            "operator": "Netcraft",
            "company": "Netcraft",
            "family": "Netcraft",
            "category": "security",
            "kind": "unknown",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "NetcraftSurveyAgent",
                "Mozilla/5.0 (compatible",
                "+https://www.netcraft.com/about-netcraft/)",
                "Netcraft Bot"
            ],
            "primary_user_agent": "NetcraftSurveyAgent",
            "robots_token": "NetcraftSurveyAgent",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Netcraft cybersecurity crawler that builds threat intelligence databases to detect phishing sites.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://www.netcraft.com/about-netcraft/.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: NetcraftSurveyAgent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NetcraftSurveyAgent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.netcraft.com/about-netcraft/"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "netestate-imprint-crawler",
            "name": "netEstate Imprint Crawler",
            "slug": "netestate-imprint-crawler",
            "url": "https://botcrawl.com/bots/netestate-imprint-crawler/",
            "status": "active",
            "operator": "netEstate",
            "company": "netEstate",
            "family": "netEstate",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "netEstate NE Crawler",
                "netEstate NE Crawler (+http://www.website-datenbank.de/)"
            ],
            "primary_user_agent": "netEstate NE Crawler",
            "robots_token": "netEstate NE Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The NetEstate Imprint crawler crawls websites for public contact information.",
            "short_description": "The NetEstate Imprint crawler crawls websites for public contact information.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: netEstate NE Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"netEstate NE Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.website-datenbank.de/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "neticle-crawler",
            "name": "Neticle Crawler",
            "slug": "neticle-crawler",
            "url": "https://botcrawl.com/bots/neticle-crawler/",
            "status": "active",
            "operator": "Neticle",
            "company": "Neticle",
            "family": "Neticle",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Neticle Crawler",
                "Mozilla/5.0 (compatible",
                "+https://neticle.com)",
                "Neticle Bot"
            ],
            "primary_user_agent": "Neticle Crawler",
            "robots_token": "Neticle Crawler",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Neticle media monitoring crawler that collects web content to quantify online opinions and sentiment.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://neticle.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Neticle Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Neticle Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://neticle.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:23Z"
        },
        {
            "id": "netumo",
            "name": "Netumo",
            "slug": "netumo",
            "url": "https://botcrawl.com/bots/netumo/",
            "status": "active",
            "operator": "Netumo",
            "company": "Netumo",
            "family": "Netumo",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "netumo"
            ],
            "primary_user_agent": "netumo",
            "robots_token": "netumo",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website uptime monitor service",
            "short_description": "Website uptime monitor service",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: netumo\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"netumo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.netumo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "netvigie",
            "name": "NETVIGIE",
            "slug": "netvigie",
            "url": "https://botcrawl.com/bots/netvigie/",
            "status": "active",
            "operator": "Netvigie",
            "company": "Netvigie",
            "family": "Netvigie",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NETVIGIE",
                "Mozilla [...] Chrome [...] NETVIGIE"
            ],
            "primary_user_agent": "NETVIGIE",
            "robots_token": "NETVIGIE",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We checks availability and performances of our client's websites and mobile apps.",
            "short_description": "We checks availability and performances of our client's websites and mobile apps.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NETVIGIE\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NETVIGIE\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://netvigie.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "new-relic",
            "name": "New Relic",
            "slug": "new-relic",
            "url": "https://botcrawl.com/bots/new-relic/",
            "status": "active",
            "operator": "New Relic",
            "company": "New Relic",
            "family": "New Relic",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NewRelicbot",
                "Mozilla/5.0 (compatible",
                "NewRelicbot/2.1",
                "+http://www.newrelic.com)"
            ],
            "primary_user_agent": "NewRelicbot",
            "robots_token": "NewRelicbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The New Relic bot is used by New Relic's Intelligent Observability Platform to monitor customer applications for availability and performance issues",
            "short_description": "The New Relic bot is used by New Relic's Intelligent Observability Platform to monitor customer applications for availability and performance issues",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NewRelicbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NewRelicbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://newrelic.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "new-york-times-newsgathering",
            "name": "New York Times Newsgathering",
            "slug": "new-york-times-newsgathering",
            "url": "https://botcrawl.com/bots/new-york-times-newsgathering/",
            "status": "active",
            "operator": "The New York Times",
            "company": "The New York Times",
            "family": "The New York Times",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "scraping@nytimes.com",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/110.0.0.0 Safari/537.36 nyt_scraping/scraping@nytimes.com"
            ],
            "primary_user_agent": "scraping@nytimes.com",
            "robots_token": "scraping@nytimes.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Coders within NYT's newsroom collect public, non-copyright data, e.g. our U.S. Elections pages and Covid-19 trackers.",
            "short_description": "Coders within NYT's newsroom collect public, non-copyright data, e.g.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: scraping@nytimes.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"scraping@nytimes.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://int.nyt.com/assets/scraping.json"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "newsbank",
            "name": "NewsBank",
            "slug": "newsbank",
            "url": "https://botcrawl.com/bots/newsbank/",
            "status": "active",
            "operator": "NewsBank",
            "company": "NewsBank",
            "family": "NewsBank",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "NewsBank",
                "NewsBank.com/1.0",
                "NewsBank.comMobile/1.0"
            ],
            "primary_user_agent": "NewsBank",
            "robots_token": "NewsBank",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "NewsBank aggregates licensed publisher content for schools, libraries, and government research, learning, and archiving.",
            "short_description": "NewsBank aggregates licensed publisher content for schools, libraries, and government research, learning, and archiving.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: NewsBank\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NewsBank\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.newsbank.com/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "newsblur",
            "name": "NewsBlur",
            "slug": "newsblur",
            "url": "https://botcrawl.com/bots/newsblur/",
            "status": "active",
            "operator": "NewsBlur",
            "company": "NewsBlur",
            "family": "NewsBlur",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "NewsBlur",
                "NewsBlur Feed Fetcher"
            ],
            "primary_user_agent": "NewsBlur",
            "robots_token": "NewsBlur",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RSS News fetcher",
            "short_description": "RSS News fetcher",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NewsBlur\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NewsBlur\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://newsblur.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "newsnow",
            "name": "NewsNow",
            "slug": "newsnow",
            "url": "https://botcrawl.com/bots/newsnow/",
            "status": "active",
            "operator": "NewsNowUK",
            "company": "NewsNowUK",
            "family": "NewsNowUK",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "NewsNow",
                "Mozilla/4.0 (compatible",
                "MSIE 7.0",
                "Windows NT) NewsNow/1.0"
            ],
            "primary_user_agent": "NewsNow",
            "robots_token": "NewsNow",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The NewsNow bot is the web crawler for the news aggregator service NewsNow.",
            "short_description": "The NewsNow bot is the web crawler for the news aggregator service NewsNow.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NewsNow\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NewsNow\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.newsnow.co.uk/h/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "newsroom-bi",
            "name": "NewsRoom.BI",
            "slug": "newsroom-bi",
            "url": "https://botcrawl.com/bots/newsroom-bi/",
            "status": "active",
            "operator": "Marfeel",
            "company": "Marfeel",
            "family": "Marfeel",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NewsRoom.BI",
                "Mozilla/5.0 (compatible",
                "+https://marfeel.com)",
                "Marfeel Bot",
                "NewsRoom BI"
            ],
            "primary_user_agent": "NewsRoom.BI",
            "robots_token": "NewsRoom.BI",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Marfeel analytics crawler that collects data to support analytics and personalization for publishers.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://marfeel.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: NewsRoom.BI\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NewsRoom.BI\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://marfeel.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:24Z"
        },
        {
            "id": "nicecrawler",
            "name": "Nicecrawler",
            "slug": "nicecrawler",
            "url": "https://botcrawl.com/bots/nicecrawler/",
            "status": "active",
            "operator": "NiceCrawler",
            "company": "NiceCrawler",
            "family": "NiceCrawler",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Nicecrawler",
                "Mozilla/5.0 (compatible",
                "+https://nicecrawler.com)",
                "Nice Crawler"
            ],
            "primary_user_agent": "Nicecrawler",
            "robots_token": "Nicecrawler",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "NiceCrawler web archiving crawler for website preservation.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://nicecrawler.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Nicecrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nicecrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://nicecrawler.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:22Z"
        },
        {
            "id": "nitrobot",
            "name": "NitroBot",
            "slug": "nitrobot",
            "url": "https://botcrawl.com/bots/nitrobot/",
            "status": "active",
            "operator": "NitroPack Ltd",
            "company": "NitroPack Ltd",
            "family": "NitroPack Ltd",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Nitro-",
                "Nitro-Warmup-Agent",
                "Nitro-Webhook-Agent",
                "Nitro-Optimizer-Agent"
            ],
            "primary_user_agent": "Nitro-",
            "robots_token": "Nitro-",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We run a cloud based site speed optimization solution. As such, we need to make requests to our clients' sites in order to fetch the content that needs to be optimized. We have several sub systems that can fire requests and each one can be identified based on the user agent suffix.",
            "short_description": "We run a cloud based site speed optimization solution.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Nitro-\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nitro-\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://nitropack.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:42Z"
        },
        {
            "id": "nixstatsmonitoringbot",
            "name": "NixStatsMonitoringBot",
            "slug": "nixstatsmonitoringbot",
            "url": "https://botcrawl.com/bots/nixstatsmonitoringbot/",
            "status": "active",
            "operator": "NixStats",
            "company": "NixStats",
            "family": "NixStats",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NIXStatsbot/1.1",
                "Mozilla/5.0 (compatible",
                "+http://www.nixstats.com/bot.html)",
                "NIXStatsbot"
            ],
            "primary_user_agent": "NIXStatsbot/1.1",
            "robots_token": "NIXStatsbot/1.1",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The NixStatsMonitoringBot is the HTTP monitoring probe for NixStats to monitor website availability and performance.",
            "short_description": "The NixStatsMonitoringBot is the HTTP monitoring probe for NixStats to monitor website availability and performance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NIXStatsbot/1.1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NIXStatsbot/1.1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.nixstats.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nodeping",
            "name": "Nodeping",
            "slug": "nodeping",
            "url": "https://botcrawl.com/bots/nodeping/",
            "status": "active",
            "operator": "Nodeping",
            "company": "Nodeping",
            "family": "Nodeping",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NodePing"
            ],
            "primary_user_agent": "NodePing",
            "robots_token": "NodePing",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The NodePing HTTP Monitoring probe monitors customer websites for uptime.",
            "short_description": "The NodePing HTTP Monitoring probe monitors customer websites for uptime.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NodePing\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NodePing\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://nodeping.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "noibu-js-beautifier",
            "name": "Noibu JS Beautifier",
            "slug": "noibu-js-beautifier",
            "url": "https://botcrawl.com/bots/noibu-js-beautifier/",
            "status": "active",
            "operator": "Noibu",
            "company": "Noibu",
            "family": "Noibu",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Noibu",
                "Noibu JS Beautifier"
            ],
            "primary_user_agent": "Noibu",
            "robots_token": "Noibu",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our microservice downloads js files from our users servers in order to format them and show them a human readable file. This is done to facilitate solving errors associated with said file",
            "short_description": "Our microservice downloads js files from our users servers in order to format them and show them a human readable file.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Noibu\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Noibu\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://noibu.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "noorobot",
            "name": "Noorobot",
            "slug": "noorobot",
            "url": "https://botcrawl.com/bots/noorobot/",
            "status": "active",
            "operator": "Noor Digital Agency AB",
            "company": "Noor Digital Agency AB",
            "family": "Noor Digital Agency AB",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "noorobot"
            ],
            "primary_user_agent": "noorobot",
            "robots_token": "noorobot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Noorobot is an SEO tool that periodically crawls customer websites to provide recommendations and identify potential SEO-impacting problems.",
            "short_description": "Noorobot is an SEO tool that periodically crawls customer websites to provide recommendations and identify potential SEO-impacting problems.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: noorobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"noorobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://noordigital.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nooshub",
            "name": "Nooshub",
            "slug": "nooshub",
            "url": "https://botcrawl.com/bots/nooshub/",
            "status": "active",
            "operator": "Nooshub",
            "company": "Nooshub",
            "family": "Nooshub",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Nooshub",
                "Mozilla/5.0 (compatible",
                "Nooshub/1.0",
                "+https://www.nooshub.com/statics/bots)"
            ],
            "primary_user_agent": "Nooshub",
            "robots_token": "Nooshub",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RSS Reader that fetches RSS/Atom feeds",
            "short_description": "RSS Reader that fetches RSS/Atom feeds",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Nooshub\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nooshub\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.nooshub.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nostocrawlerbot",
            "name": "NostoCrawlerBot",
            "slug": "nostocrawlerbot",
            "url": "https://botcrawl.com/bots/nostocrawlerbot/",
            "status": "active",
            "operator": "Nosto",
            "company": "Nosto",
            "family": "Nosto",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "NostoCrawlerBot",
                "Mozilla/5.0 (compatible",
                "NostoCrawlerBot/1.0",
                "+http://my.nosto.com/tagging)"
            ],
            "primary_user_agent": "NostoCrawlerBot",
            "robots_token": "NostoCrawlerBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Mozilla/5.0 (compatible; NostoCrawlerBot/1.0; +http://my.nosto.com/tagging)",
            "short_description": "Mozilla/5.0 (compatible; NostoCrawlerBot/1.0; +http://my.nosto.com/tagging)",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: NostoCrawlerBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NostoCrawlerBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.nosto.com/en/articles/586911-nosto-s-bot-crawler-s-header-agent-details"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nostra",
            "name": "Nostra",
            "slug": "nostra",
            "url": "https://botcrawl.com/bots/nostra/",
            "status": "active",
            "operator": "Nostra",
            "company": "Nostra",
            "family": "Nostra",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Nostra"
            ],
            "primary_user_agent": "Nostra",
            "robots_token": "Nostra",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Nostra accelerates site speed for managed web platforms",
            "short_description": "Nostra accelerates site speed for managed web platforms",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Nostra\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nostra\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.nostra.ai/edge-optimization"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "notabot",
            "name": "Notabot",
            "slug": "notabot",
            "url": "https://botcrawl.com/bots/notabot/",
            "status": "active",
            "operator": "Helpfeel",
            "company": "Helpfeel",
            "family": "Helpfeel",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Notabot"
            ],
            "primary_user_agent": "Notabot",
            "robots_token": "Notabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawler to integrate Helpfeel external search engine.",
            "short_description": "Crawler to integrate Helpfeel external search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Notabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Notabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://corp.helpfeel.com/ja/home"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "novellum-ai-crawl",
            "name": "Novellum AI Crawl",
            "slug": "novellum-ai-crawl",
            "url": "https://botcrawl.com/bots/novellum-ai-crawl/",
            "status": "active",
            "operator": "Novellum",
            "company": "Novellum",
            "family": "Novellum",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Novellum",
                "Novellum crawler"
            ],
            "primary_user_agent": "Novellum",
            "robots_token": "Novellum",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Novellum.ai is building out tools for building agents. This MCP tool will be used by agents to crawl sites.",
            "short_description": "Novellum.ai is building out tools for building agents.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Novellum\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Novellum\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawl.corp.novellum.ai/docs"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nutch",
            "name": "Nutch",
            "slug": "nutch",
            "url": "https://botcrawl.com/bots/nutch/",
            "status": "active",
            "operator": "Apache Nutch",
            "company": "Apache Nutch",
            "family": "Apache Nutch",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Nutch",
                "Nutch-based robot"
            ],
            "primary_user_agent": "Nutch",
            "robots_token": "Nutch",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "official",
            "short_description": "Apache Nutch says Nutch-based robots should respond to the agent name Nutch and obey robots.txt and robots META tags.",
            "verification_method": "Official Apache Nutch sysadmin/webmaster guidance; match the Nutch agent token, but note that Nutch deployments are not centrally operated by Apache.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Nutch\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Nutch\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://nutch.apache.org/community/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "nytimes-newsroom-bot",
            "name": "NYTimes-Newsroom-Bot",
            "slug": "nytimes-newsroom-bot",
            "url": "https://botcrawl.com/bots/nytimes-newsroom-bot/",
            "status": "active",
            "operator": "The New York Times",
            "company": "The New York Times",
            "family": "The New York Times",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "NYTimes-Newsroom-Bot",
                "Mozilla/5.0 (compatible",
                "+https://www.nytimes.com)",
                "NYT Bot",
                "New York Times Bot"
            ],
            "primary_user_agent": "NYTimes-Newsroom-Bot",
            "robots_token": "NYTimes-Newsroom-Bot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "New York Times newsroom scraping bot that collects publicly available data for journalistic projects.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.nytimes.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: NYTimes-Newsroom-Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"NYTimes-Newsroom-Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.nytimes.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:24Z"
        },
        {
            "id": "oai-searchbot",
            "name": "OAI-SearchBot",
            "slug": "oai-searchbot",
            "url": "https://botcrawl.com/bots/oai-searchbot/",
            "status": "active",
            "operator": "OpenAI",
            "company": "OpenAI",
            "family": "OpenAI",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "OAI-SearchBot",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/131.0.0.0 Safari/537.36",
                "compatible",
                "OAI-SearchBot/1.3",
                "+https://openai.com/searchbot",
                "OpenAI search bot"
            ],
            "primary_user_agent": "OAI-SearchBot",
            "robots_token": "OAI-SearchBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "common_use": "OAI-SearchBot is used to link to and surface websites in search results in the SearchGPT prototype",
            "short_description": "OpenAI search crawler for ChatGPT search results.",
            "verification_method": "Verify the user-agent together with OpenAI's published IP ranges for OAI-SearchBot.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://openai.com/searchbot.json",
            "rules": {
                "robots": "User-agent: OAI-SearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OAI-SearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developers.openai.com/api/docs/bots/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "oh-dear",
            "name": "Oh Dear",
            "slug": "oh-dear",
            "url": "https://botcrawl.com/bots/oh-dear/",
            "status": "active",
            "operator": "Oh Dear",
            "company": "Oh Dear",
            "family": "Oh Dear",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "ohdear.app",
                "Mozilla/5.0 (compatible",
                "OhDear/1.1",
                "+https://ohdear.app/checker",
                "OhDear.app (+https://ohdear.app/docs/checks/uptime)",
                "OhDear.app"
            ],
            "primary_user_agent": "ohdear.app",
            "robots_token": "ohdear.app",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Oh Dear application availability and performance monitoring checker.",
            "short_description": "The Oh Dear application availability and performance monitoring checker.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ohdear.app\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ohdear.app\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ohdear.app/docs/faq/what-is-the-oh-dear-checker"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "okx-dolphin-crawler",
            "name": "OKX-dolphin-crawler",
            "slug": "okx-dolphin-crawler",
            "url": "https://botcrawl.com/bots/okx-dolphin-crawler/",
            "status": "active",
            "operator": "OKX",
            "company": "OKX",
            "family": "OKX",
            "category": "security",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "OKX-dolphin-crawler",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/110.0.0.0 Safari/537.36 OKX-dolphin-crawler"
            ],
            "primary_user_agent": "OKX-dolphin-crawler",
            "robots_token": "OKX-dolphin-crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "OKX Dolphin Crawler simulates visits to target web pages as part of malicious dApp scanning",
            "short_description": "OKX Dolphin Crawler simulates visits to target web pages as part of malicious dApp scanning",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: OKX-dolphin-crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OKX-dolphin-crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://web3.okx.com/security/crawler-allowlist"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "omgili",
            "name": "omgili",
            "slug": "omgili",
            "url": "https://botcrawl.com/bots/omgili/",
            "status": "active",
            "operator": "Webz.io",
            "company": "Webz.io",
            "family": "Webz.io",
            "category": "ai",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "omgili",
                "Mozilla/5.0 (compatible",
                "omgili/0.5 +http://omgili.com)",
                "Webz.io Omgili"
            ],
            "primary_user_agent": "omgili",
            "robots_token": "omgili",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Webz.io web crawler that maintains a repository of web crawl data sold to other companies.",
            "verification_method": "Verify the exact user-agent against Webz.io's published crawler documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: omgili\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"omgili\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://webz.io"
                }
            ],
            "updated_at": "2026-04-01T00:55:17Z"
        },
        {
            "id": "omnisend",
            "name": "Omnisend",
            "slug": "omnisend",
            "url": "https://botcrawl.com/bots/omnisend/",
            "status": "active",
            "operator": "Omnisend",
            "company": "Omnisend",
            "family": "Omnisend",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Omnisend",
                "Omnisend/1.0"
            ],
            "primary_user_agent": "Omnisend",
            "robots_token": "Omnisend",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot for checking omnisend integration in woocommerce shops and shopify",
            "short_description": "Bot for checking omnisend integration in woocommerce shops and shopify",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Omnisend\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Omnisend\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.omnisend.io/cf.txt"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "oncrawl",
            "name": "OnCrawl",
            "slug": "oncrawl",
            "url": "https://botcrawl.com/bots/oncrawl/",
            "status": "active",
            "operator": "Oncrawl",
            "company": "Oncrawl",
            "family": "Oncrawl",
            "category": "scraper",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "OnCrawl Mobile/1.0",
                "Mozilla/5.0 (compatible",
                "+http://www.oncrawl.com/)",
                "Oncrawl crawler",
                "www.oncrawl.com"
            ],
            "primary_user_agent": "OnCrawl Mobile/1.0",
            "robots_token": "OnCrawl Mobile/1.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "OnCrawl crawler used by the OnCrawl SEO platform.",
            "verification_method": "Validate the user-agent pattern, operator documentation, and any published IP ranges before trusting.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: www.oncrawl.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OnCrawl Mobile/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.oncrawl.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "onetrust-cmp-scanner",
            "name": "Onetrust CMP Scanner",
            "slug": "onetrust-cmp-scanner",
            "url": "https://botcrawl.com/bots/onetrust-cmp-scanner/",
            "status": "active",
            "operator": "OneTrust, LLC.",
            "company": "OneTrust, LLC.",
            "family": "OneTrust, LLC.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "OneTrust",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/120.0.6099.199 Safari/537.36"
            ],
            "primary_user_agent": "OneTrust",
            "robots_token": "OneTrust",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "It identifies and categorizes cookies and tracking tech on customers site, pages, forms, tags, storage, and cookies",
            "short_description": "It identifies and categorizes cookies and tracking tech on customers site, pages, forms, tags, storage, and cookies",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OneTrust\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OneTrust\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.onetrust.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "online-webceo-bot",
            "name": "Online Webceo Bot",
            "slug": "online-webceo-bot",
            "url": "https://botcrawl.com/bots/online-webceo-bot/",
            "status": "active",
            "operator": "Online Webceo",
            "company": "Online Webceo",
            "family": "Online Webceo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "online-webceo-bot",
                "Mozilla/5.0 (compatible",
                "online-webceo-bot/1.0",
                "+http://online.webceo.com)"
            ],
            "primary_user_agent": "online-webceo-bot",
            "robots_token": "online-webceo-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A bot associated with WebCEO, a company that provides SEO tools and services",
            "short_description": "A bot associated with WebCEO, a company that provides SEO tools and services",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: online-webceo-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"online-webceo-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.webceo.com/webceo-bots.htm"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "onlineornot-monitor",
            "name": "OnlineOrNot Monitor",
            "slug": "onlineornot-monitor",
            "url": "https://botcrawl.com/bots/onlineornot-monitor/",
            "status": "active",
            "operator": "OnlineOrNot",
            "company": "OnlineOrNot",
            "family": "OnlineOrNot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "OnlineOrNot",
                "OnlineOrNot.com_bot_1.0_(https://onlineornot.com)"
            ],
            "primary_user_agent": "OnlineOrNot",
            "robots_token": "OnlineOrNot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "OnlineOrNot provides website monitoring in the form of uptime checks and page speed tests.",
            "short_description": "OnlineOrNot provides website monitoring in the form of uptime checks and page speed tests.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OnlineOrNot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OnlineOrNot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://onlineornot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "opengraph-bot",
            "name": "Opengraph Bot",
            "slug": "opengraph-bot",
            "url": "https://botcrawl.com/bots/opengraph-bot/",
            "status": "active",
            "operator": "Opengraph",
            "company": "Opengraph",
            "family": "Opengraph",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "OpenGraph.io/",
                "Mozilla/5.0 (compatible",
                "OpenGraph.io/1.1",
                "+https://opengraph.io/ AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/51.0.2704.103 Safari/537.36",
                "OpenGraph.io"
            ],
            "primary_user_agent": "OpenGraph.io/",
            "robots_token": "OpenGraph.io/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our API is used by mostly consumer facing products to preview links when sharing them on their platforms. For example, how when a link is shared on Facebook or Slack, those platforms provide a description/title/image to make the content more enticing.",
            "short_description": "Our API is used by mostly consumer facing products to preview links when sharing them on their platforms.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OpenGraph.io/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OpenGraph.io/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.opengraph.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "openrss",
            "name": "OpenRSS",
            "slug": "openrss",
            "url": "https://botcrawl.com/bots/openrss/",
            "status": "active",
            "operator": "OpenRSS",
            "company": "OpenRSS",
            "family": "OpenRSS",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "OpenRSS"
            ],
            "primary_user_agent": "OpenRSS",
            "robots_token": "OpenRSS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RSS Feed Provider / Feed Fetcher",
            "short_description": "RSS Feed Provider / Feed Fetcher",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OpenRSS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OpenRSS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://openrss.org"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "orlo-link-preview",
            "name": "Orlo Link Preview",
            "slug": "orlo-link-preview",
            "url": "https://botcrawl.com/bots/orlo-link-preview/",
            "status": "active",
            "operator": "Orlo",
            "company": "Orlo",
            "family": "Orlo",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Orlo-LinkPreview",
                "Orlo-LinkPreview/1.0",
                "Orlo-LinkPreview/"
            ],
            "primary_user_agent": "Orlo-LinkPreview",
            "robots_token": "Orlo-LinkPreview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Orlo Link Preview bot is used by the Orlo social media management platform. It fetches previews of links that are scheduled to be published in social media posts.",
            "short_description": "The Orlo Link Preview bot is used by the Orlo social media management platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Orlo-LinkPreview\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Orlo-LinkPreview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://orlo.tech"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "ourfamilywizard",
            "name": "OurFamilyWizard",
            "slug": "ourfamilywizard",
            "url": "https://botcrawl.com/bots/ourfamilywizard/",
            "status": "active",
            "operator": "OurFamilyWizard",
            "company": "OurFamilyWizard",
            "family": "OurFamilyWizard",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Cozi-iCalendar-FeedReader"
            ],
            "primary_user_agent": "Cozi-iCalendar-FeedReader",
            "robots_token": "Cozi-iCalendar-FeedReader",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Users subscribe to external iCalendars and sync them into their Cozi calendars. We periodically fetch these iCals to keep them up to date.",
            "short_description": "Users subscribe to external iCalendars and sync them into their Cozi calendars.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Cozi-iCalendar-FeedReader\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Cozi-iCalendar-FeedReader\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ourfamilywizard.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "outbrain",
            "name": "Outbrain",
            "slug": "outbrain",
            "url": "https://botcrawl.com/bots/outbrain/",
            "status": "active",
            "operator": "Outbrain",
            "company": "Outbrain",
            "family": "Outbrain",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "outbrain",
                "Mozilla/5.0 (Java) outbrain"
            ],
            "primary_user_agent": "outbrain",
            "robots_token": "outbrain",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Outbrain crawler analyzes content on publisher websites for the purpose of serving ads.",
            "short_description": "The Outbrain crawler analyzes content on publisher websites for the purpose of serving ads.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: outbrain\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"outbrain\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.outbrain.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "outsellurlvalidator",
            "name": "OutsellURLValidator",
            "slug": "outsellurlvalidator",
            "url": "https://botcrawl.com/bots/outsellurlvalidator/",
            "status": "active",
            "operator": "Outsell Corporation",
            "company": "Outsell Corporation",
            "family": "Outsell Corporation",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "OutsellURLValidator"
            ],
            "primary_user_agent": "OutsellURLValidator",
            "robots_token": "OutsellURLValidator",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Validating client website URLS to monitor for hosting/provider changes",
            "short_description": "Validating client website URLS to monitor for hosting/provider changes",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OutsellURLValidator\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OutsellURLValidator\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.outsell.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:43Z"
        },
        {
            "id": "overcast",
            "name": "Overcast",
            "slug": "overcast",
            "url": "https://botcrawl.com/bots/overcast/",
            "status": "active",
            "operator": "Overcast Radio",
            "company": "Overcast Radio",
            "family": "Overcast Radio",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Overcast/1.0 Podcast Sync",
                "Overcast/1.0 Podcast Sync (123 subscribers",
                "feed-id=456789",
                "+http://overcast.fm/)"
            ],
            "primary_user_agent": "Overcast/1.0 Podcast Sync",
            "robots_token": "Overcast/1.0 Podcast Sync",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Overcast is a podcast player application, and its bot fetches RSS feeds and audio files from podcast hosting servers. This keeps the podcast directory and episodes updated for its users.",
            "short_description": "Overcast is a podcast player application, and its bot fetches RSS feeds and audio files from podcast hosting servers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Overcast/1.0 Podcast Sync\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Overcast/1.0 Podcast Sync\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://overcast.fm/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "ozon-web-grabber",
            "name": "Ozon Web Grabber",
            "slug": "ozon-web-grabber",
            "url": "https://botcrawl.com/bots/ozon-web-grabber/",
            "status": "active",
            "operator": "Ozon",
            "company": "Ozon",
            "family": "Ozon",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "OdklBot",
                "Mozilla/5.0 (compatible",
                "OdklBot/1.0 like Linux",
                "klass@odnoklassniki.ru)",
                "OdklBot/"
            ],
            "primary_user_agent": "OdklBot",
            "robots_token": "OdklBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A component that serves to load previews for external and internal links. For external links, whenever possible, information from the open graph tags specified on the page (title, descr, imagesvideo) is used, for references to internal objects, the internal representation is used (in the form of specialized blocks in the topic).",
            "short_description": "A component that serves to load previews for external and internal links.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: OdklBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"OdklBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ok.ru"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "paddle-webhooks",
            "name": "Paddle Webhooks",
            "slug": "paddle-webhooks",
            "url": "https://botcrawl.com/bots/paddle-webhooks/",
            "status": "active",
            "operator": "Paddle",
            "company": "Paddle",
            "family": "Paddle Webhooks",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Paddle"
            ],
            "primary_user_agent": "Paddle",
            "robots_token": "Paddle",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Webhooks let you get notified when events happen in Paddle.",
            "short_description": "Webhooks let you get notified when events happen in Paddle.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Paddle\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Paddle\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.paddle.com/webhooks/overview"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "paesslercloudbot",
            "name": "PaesslerCloudBot",
            "slug": "paesslercloudbot",
            "url": "https://botcrawl.com/bots/paesslercloudbot/",
            "status": "active",
            "operator": "Paessler",
            "company": "Paessler",
            "family": "Paessler",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "PRTGCloudBot",
                "Mozilla/5.0 (compatible",
                "PRTGCloudBot/1.0",
                "+http://www.paessler.com/prtgcloudbot",
                "for_[edf7e62f223b268942b7efa36b6be1e305fcdadb])"
            ],
            "primary_user_agent": "PRTGCloudBot",
            "robots_token": "PRTGCloudBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The PaesslerCloudBot is used by Paessler PRTG to monitor websites for availability and performance.",
            "short_description": "The PaesslerCloudBot is used by Paessler PRTG to monitor websites for availability and performance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PRTGCloudBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PRTGCloudBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.paessler.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pandalytics",
            "name": "Pandalytics",
            "slug": "pandalytics",
            "url": "https://botcrawl.com/bots/pandalytics/",
            "status": "active",
            "operator": "Domainsbot",
            "company": "Domainsbot",
            "family": "Domainsbot",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Pandalytics",
                "Mozilla/5.0 (compatible",
                "+https://domainsbot.com)",
                "Domainsbot Pandalytics"
            ],
            "primary_user_agent": "Pandalytics",
            "robots_token": "Pandalytics",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Domainsbot intelligence crawler that collects domain and website data for brand monitoring.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://domainsbot.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Pandalytics\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Pandalytics\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://domainsbot.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:24Z"
        },
        {
            "id": "pangubot",
            "name": "PanguBot",
            "slug": "pangubot",
            "url": "https://botcrawl.com/bots/pangubot/",
            "status": "active",
            "operator": "Huawei",
            "company": "Huawei",
            "family": "Huawei",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "PanguBot",
                "Mozilla/5.0 (compatible",
                "+https://developer.huawei.com/consumer/en/doc/development/hiai-Guides/web-crawler-0000001050213903)",
                "Huawei Pangu Bot"
            ],
            "primary_user_agent": "PanguBot",
            "robots_token": "PanguBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "block",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Huawei web crawler used to download training data for its Pangu AI models.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://developer.huawei.com/consumer/en/doc/development/hiai-Guides/web-crawler-0000001050213903.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: PanguBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PanguBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.huawei.com/consumer/en/doc/development/hiai-Guides/web-crawler-0000001050213903"
                }
            ],
            "updated_at": "2026-04-01T00:55:17Z"
        },
        {
            "id": "pangusospider",
            "name": "PangusoSpider",
            "slug": "pangusospider",
            "url": "https://botcrawl.com/bots/pangusospider/",
            "status": "active",
            "operator": "Panguso",
            "company": "Panguso",
            "family": "Panguso",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "PangusoSpider"
            ],
            "primary_user_agent": "PangusoSpider",
            "robots_token": "PangusoSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Panguso crawler token surfaced in baidu property robots.txt.",
            "verification_method": "This token is surfaced in a Baidu-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: PangusoSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PangusoSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://map.baidu.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:50Z"
        },
        {
            "id": "panopta",
            "name": "Panopta",
            "slug": "panopta",
            "url": "https://botcrawl.com/bots/panopta/",
            "status": "active",
            "operator": "Panopta",
            "company": "Panopta",
            "family": "Panopta",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Panopta",
                "Panopta v1.1"
            ],
            "primary_user_agent": "Panopta",
            "robots_token": "Panopta",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Panopta is an uptime monitoring service acquired by Fortinet.",
            "short_description": "Panopta is an uptime monitoring service acquired by Fortinet.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Panopta\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Panopta\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.panopta.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "parse-ly",
            "name": "Parse.ly",
            "slug": "parse-ly",
            "url": "https://botcrawl.com/bots/parse-ly/",
            "status": "active",
            "operator": "Parse.ly",
            "company": "Parse.ly",
            "family": "Parse.ly",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "parsely.com",
                "Mozilla/5.0 (compatible",
                "parse.ly scraper/0.16",
                "+http://parsely.com)"
            ],
            "primary_user_agent": "parsely.com",
            "robots_token": "parsely.com",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Popular content analytics tool used by many major media and content teams.",
            "short_description": "Popular content analytics tool used by many major media and content teams.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: parsely.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"parsely.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.parse.ly/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "particlenewsbot",
            "name": "ParticleNewsBot",
            "slug": "particlenewsbot",
            "url": "https://botcrawl.com/bots/particlenewsbot/",
            "status": "active",
            "operator": "Particle",
            "company": "Particle",
            "family": "ParticleNewsBot",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "ParticleNewsBot"
            ],
            "primary_user_agent": "ParticleNewsBot",
            "robots_token": "ParticleNewsBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Particle is an AI powered aggregator that collects news from many sources",
            "short_description": "Particle is an AI powered aggregator that collects news from many sources",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ParticleNewsBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ParticleNewsBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://particle.news/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "payhawk-invoice-fetching-agent",
            "name": "Payhawk Invoice Fetching Agent",
            "slug": "payhawk-invoice-fetching-agent",
            "url": "https://botcrawl.com/bots/payhawk-invoice-fetching-agent/",
            "status": "active",
            "operator": "Payhawk",
            "company": "Payhawk",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Payhawk Invoice Fetching Agent"
            ],
            "primary_user_agent": "Payhawk Invoice Fetching Agent",
            "robots_token": "Payhawk Invoice Fetching Agent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Automated browser bot that fetches invoices for users from supplier websites and attaches them to their expense records.",
            "verification_method": "Verify the user-agent token and validate against the operator documentation or the Cloudflare verified-bot directory when needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Payhawk Invoice Fetching Agent\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://payhawk.com/help/overview-of-the-financial-controller-agent"
                }
            ],
            "updated_at": "2026-03-31T14:31:50Z"
        },
        {
            "id": "paypal",
            "name": "PayPal",
            "slug": "paypal",
            "url": "https://botcrawl.com/bots/paypal/",
            "status": "active",
            "operator": "PayPal",
            "company": "PayPal",
            "family": "PayPal",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "PayPal",
                "PayPal/AUHR-214.0-51787073",
                "PayPal IPN ( https://www.paypal.com/ipn )"
            ],
            "primary_user_agent": "PayPal",
            "robots_token": "PayPal",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The PayPal webhooks is part of Paypal's Instant Payment Notification message service, automatically notifying merchants of events related to Paypal transactions.",
            "short_description": "The PayPal webhooks is part of Paypal's Instant Payment Notification message service, automatically notifying merchants of events related to Paypal transactions.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PayPal\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PayPal\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.paypal.com/ipn"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "payroll-bot",
            "name": "payroll-bot",
            "slug": "payroll-bot",
            "url": "https://botcrawl.com/bots/payroll-bot/",
            "status": "active",
            "operator": "ADP",
            "company": "ADP",
            "family": "payroll-bot",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "AdpResearchBot",
                "AdpResearchBot/1.0",
                "AdpResearchBot/"
            ],
            "primary_user_agent": "AdpResearchBot",
            "robots_token": "AdpResearchBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "payroll-bot is an AI crawler operated by ADP, Inc. to collect publicly available legal and payroll documentation.",
            "short_description": "payroll-bot is an AI crawler operated by ADP, Inc.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: AdpResearchBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"AdpResearchBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://payroll-bot.adp.com/docs"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "perplexity-user",
            "name": "Perplexity-User",
            "slug": "perplexity-user",
            "url": "https://botcrawl.com/bots/perplexity-user/",
            "status": "active",
            "operator": "Perplexity",
            "company": "Perplexity",
            "family": "Perplexity",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "Perplexity-User",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "Perplexity-User/1.0",
                "+https://perplexity.ai/perplexity-user)",
                "Perplexity user fetcher"
            ],
            "primary_user_agent": "Perplexity-User",
            "robots_token": "Perplexity-User",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "User-triggered Perplexity fetcher for live answers.",
            "verification_method": "Verify the user-agent together with Perplexity's published IP ranges for Perplexity-User.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://www.perplexity.com/perplexity-user.json",
            "rules": {
                "robots": "User-agent: Perplexity-User\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Perplexity-User\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "perplexitybot",
            "name": "PerplexityBot",
            "slug": "perplexitybot",
            "url": "https://botcrawl.com/bots/perplexitybot/",
            "status": "active",
            "operator": "Perplexity",
            "company": "Perplexity",
            "family": "Perplexity",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "PerplexityBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "PerplexityBot/1.0",
                "+https://perplexity.ai/perplexitybot)",
                "Perplexity search bot"
            ],
            "primary_user_agent": "PerplexityBot",
            "robots_token": "PerplexityBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Perplexity crawler used for search indexing, not model training.",
            "verification_method": "Verify the user-agent together with Perplexity's published IP ranges for PerplexityBot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://www.perplexity.com/perplexitybot.json",
            "rules": {
                "robots": "User-agent: PerplexityBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PerplexityBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "petalbot",
            "name": "PetalBot",
            "slug": "petalbot",
            "url": "https://botcrawl.com/bots/petalbot/",
            "status": "active",
            "operator": "Huawei",
            "company": "Huawei",
            "family": "Huawei",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "PetalBot",
                "Mozilla/5.0 (Linux",
                "Android 7.0",
                ") AppleWebKit/537.36 (KHTML",
                "like Gecko) Mobile Safari/537.36 (compatible",
                "+https://webmaster.petalsearch.com/site/petalbot)",
                "Mozilla/5.0 (compatible",
                "Huawei Petal Search crawler"
            ],
            "primary_user_agent": "PetalBot",
            "robots_token": "PetalBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "PetalBot is to access both PC and mobile websites and establish an index database which enables users to search the content of your site in Petal search engine and present content recommendations for the user in Huawei Assistant and AI Search services, both services are powered by Petal Search engine.",
            "short_description": "PetalBot is to access both PC and mobile websites and establish an index database which enables users to search the content of your site in Petal search engine and…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PetalBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PetalBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://aspiegel.com/petalbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "phindbot",
            "name": "PhindBot",
            "slug": "phindbot",
            "url": "https://botcrawl.com/bots/phindbot/",
            "status": "active",
            "operator": "Phind",
            "company": "Phind",
            "family": "Phind",
            "category": "ai",
            "kind": "unknown",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "PhindBot",
                "Mozilla/5.0 (compatible",
                "PhindBot/1.0",
                "+https://phind.com/bot)",
                "Phind Bot"
            ],
            "primary_user_agent": "PhindBot",
            "robots_token": "PhindBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Phind AI-powered developer answer engine crawler that indexes technical content.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://phind.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: PhindBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PhindBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://phind.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:20Z"
        },
        {
            "id": "pingdom",
            "name": "Pingdom",
            "slug": "pingdom",
            "url": "https://botcrawl.com/bots/pingdom/",
            "status": "active",
            "operator": "Pingdom",
            "company": "Pingdom",
            "family": "Pingdom",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Pingdom.com_bot_version_1.4",
                "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)",
                "http://www.pingdom.com/",
                "Pingdom monitoring bot"
            ],
            "primary_user_agent": "Pingdom.com_bot_version_1.4",
            "robots_token": "Pingdom.com_bot_version_1.4",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "Pingdom bot for uptime and performance monitoring.",
            "verification_method": "Validate the user-agent pattern, operator documentation, and any published IP ranges before trusting.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: http://www.pingdom.com/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Pingdom.com_bot_version_1.4\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.pingdom.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pingping",
            "name": "PingPing",
            "slug": "pingping",
            "url": "https://botcrawl.com/bots/pingping/",
            "status": "active",
            "operator": "PingPing",
            "company": "PingPing",
            "family": "PingPing",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "pingping.io/",
                "pingping.io/1.0",
                "PingPing Bot"
            ],
            "primary_user_agent": "pingping.io/",
            "robots_token": "pingping.io/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "PingPing is a website monitoring service whose bots check website uptime and TLS certificate validity.",
            "short_description": "PingPing is a website monitoring service whose bots check website uptime and TLS certificate validity.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: pingping.io/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"pingping.io/\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://pingping.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pinterestbot",
            "name": "PinterestBot",
            "slug": "pinterestbot",
            "url": "https://botcrawl.com/bots/pinterestbot/",
            "status": "active",
            "operator": "Pinterest",
            "company": "Pinterest",
            "family": "Pinterest",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Pinterestbot",
                "Mozilla/5.0 (compatible",
                "Pinterestbot/1.0",
                "+https://www.pinterest.com/bot.html)",
                "pinterest.com/",
                "Pinterest crawler"
            ],
            "primary_user_agent": "Pinterestbot",
            "robots_token": "Pinterestbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Pinterest crawler for indexing content and refreshing Pin metadata.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: pinterest.com/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Pinterestbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.pinterest.com/en/business/article/pinterestbot"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pocket-casts-feed-parser",
            "name": "Pocket Casts Feed Parser",
            "slug": "pocket-casts-feed-parser",
            "url": "https://botcrawl.com/bots/pocket-casts-feed-parser/",
            "status": "active",
            "operator": "Pocket Casts",
            "company": "Pocket Casts",
            "family": "Pocket Casts",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Pocket Casts Feed Parser",
                "PocketCasts/1.0 (Pocket Casts Feed Parser",
                "+http://pocketcasts.com/)"
            ],
            "primary_user_agent": "Pocket Casts Feed Parser",
            "robots_token": "Pocket Casts Feed Parser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fetches podcast feeds, for playback in the Pocket Casts apps",
            "short_description": "Fetches podcast feeds, for playback in the Pocket Casts apps",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Pocket Casts Feed Parser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Pocket Casts Feed Parser\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.pocketcasts.com/knowledge-base/pocket-casts-feed-parser/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "poggio-citations",
            "name": "Poggio-Citations",
            "slug": "poggio-citations",
            "url": "https://botcrawl.com/bots/poggio-citations/",
            "status": "active",
            "operator": "Poggio",
            "company": "Poggio",
            "family": "Poggio",
            "category": "ai",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Poggio-Citations",
                "Mozilla/5.0 (compatible",
                "+https://poggio.ai)",
                "Poggio Bot"
            ],
            "primary_user_agent": "Poggio-Citations",
            "robots_token": "Poggio-Citations",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Poggio AI sales enablement crawler that indexes web content for citation purposes.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://poggio.ai.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Poggio-Citations\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Poggio-Citations\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://poggio.ai"
                }
            ],
            "updated_at": "2026-04-01T00:55:27Z"
        },
        {
            "id": "polar",
            "name": "Polar",
            "slug": "polar",
            "url": "https://botcrawl.com/bots/polar/",
            "status": "active",
            "operator": "Polar",
            "company": "Polar",
            "family": "Polar",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "polar.sh webhooks"
            ],
            "primary_user_agent": "polar.sh webhooks",
            "robots_token": "polar.sh webhooks",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Polar webhooks integration sends HTTP requests to inform web servers about billing events.",
            "short_description": "The Polar webhooks integration sends HTTP requests to inform web servers about billing events.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: polar.sh webhooks\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"polar.sh webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.polar.sh/integrate/webhooks/endpoints"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "potions",
            "name": "Potions",
            "slug": "potions",
            "url": "https://botcrawl.com/bots/potions/",
            "status": "active",
            "operator": "Potions",
            "company": "Potions",
            "family": "Potions",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Potions",
                "Potions/1.0.0",
                "Potions/"
            ],
            "primary_user_agent": "Potions",
            "robots_token": "Potions",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Potions bot fetches product feeds and crawls data from its customers' websites, used for e-commerce related services.",
            "short_description": "The Potions bot fetches product feeds and crawls data from its customers' websites, used for e-commerce related services.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Potions\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Potions\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://get-potions.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "prerender",
            "name": "prerender",
            "slug": "prerender",
            "url": "https://botcrawl.com/bots/prerender/",
            "status": "active",
            "operator": "Prerender, LLC",
            "company": "Prerender, LLC",
            "family": "Prerender, LLC",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Prerender"
            ],
            "primary_user_agent": "Prerender",
            "robots_token": "Prerender",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "It's HTML pre rendering service for SPA(Single Page Application) Website SEO.",
            "short_description": "It's HTML pre rendering service for SPA(Single Page Application) Website SEO.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Prerender\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Prerender\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.prerender.io/article/33-overview-of-prerender-crawlers"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pressengine-bot",
            "name": "PressEngine Bot",
            "slug": "pressengine-bot",
            "url": "https://botcrawl.com/bots/pressengine-bot/",
            "status": "active",
            "operator": "PressEngine",
            "company": "PressEngine",
            "family": "PressEngine",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "PressEngineBot",
                "PressEngineBot (+http://pressengine.net/crawl-policy)"
            ],
            "primary_user_agent": "PressEngineBot",
            "robots_token": "PressEngineBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The PressEngine Bot verifies coverage created by video games press as genuine and their own creation. When a member of the video games press is granted a review key for a video game they will create an article, known in the industry as \"coverage\". When they submit a URL to us as \"coverage\" we automatically verify this URL exists and is viewable. This automated code announces itself as the PressEngine Bot.",
            "short_description": "The PressEngine Bot verifies coverage created by video games press as genuine and their own creation.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PressEngineBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PressEngineBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.pressengine.net"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "printfriendly",
            "name": "PrintFriendly",
            "slug": "printfriendly",
            "url": "https://botcrawl.com/bots/printfriendly/",
            "status": "active",
            "operator": "PrintFriendly.com",
            "company": "PrintFriendly.com",
            "family": "PrintFriendly.com",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "PrintFriendly.com"
            ],
            "primary_user_agent": "PrintFriendly.com",
            "robots_token": "PrintFriendly.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Extract Content to Show Print Friendly version. Publishers typically embed our button - https://www.printfriendly.com/button - so that their visitors can view a Print Friendly Page and/or create a PDF",
            "short_description": "Extract Content to Show Print Friendly version.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PrintFriendly.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PrintFriendly.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.printfriendly.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:44Z"
        },
        {
            "id": "pro-sitemaps",
            "name": "Pro Sitemaps",
            "slug": "pro-sitemaps",
            "url": "https://botcrawl.com/bots/pro-sitemaps/",
            "status": "active",
            "operator": "PRO Sitemaps",
            "company": "PRO Sitemaps",
            "family": "PRO Sitemaps",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Pro-Sitemaps",
                "Mozilla/5.0 (compatible",
                "Pro Sitemaps Generator",
                "pro-sitemaps.com) Gecko Pro-Sitemaps/1.0",
                "Pro-Sitemaps/"
            ],
            "primary_user_agent": "Pro-Sitemaps",
            "robots_token": "Pro-Sitemaps",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Online sitemap generator service.",
            "short_description": "Online sitemap generator service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Pro-Sitemaps\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Pro-Sitemaps\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://pro-sitemaps.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "productsup-crawler",
            "name": "Productsup Crawler",
            "slug": "productsup-crawler",
            "url": "https://botcrawl.com/bots/productsup-crawler/",
            "status": "active",
            "operator": "Productsup",
            "company": "Productsup",
            "family": "Productsup Crawler",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "productsup.io/crawler"
            ],
            "primary_user_agent": "productsup.io/crawler",
            "robots_token": "productsup.io/crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Productsup crawls websites to import additional product data.",
            "short_description": "Productsup crawls websites to import additional product data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: productsup.io/crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"productsup.io/crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.productsup.com/en/29437-29446-import-data-by-crawling-your-website.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "project-honeypot",
            "name": "Project Honeypot",
            "slug": "project-honeypot",
            "url": "https://botcrawl.com/bots/project-honeypot/",
            "status": "active",
            "operator": "Unspam Technologies, Inc",
            "company": "Unspam Technologies, Inc",
            "family": "Unspam Technologies, Inc",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Verispider",
                "PHPot Verispider v0.1 - http://www.projecthoneypot.org/"
            ],
            "primary_user_agent": "Verispider",
            "robots_token": "Verispider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We use Project Honeypot for IP info.",
            "short_description": "We use Project Honeypot for IP info.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Verispider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Verispider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.projecthoneypot.org."
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "projectshield-url-check",
            "name": "ProjectShield Url Check",
            "slug": "projectshield-url-check",
            "url": "https://botcrawl.com/bots/projectshield-url-check/",
            "status": "active",
            "operator": "Google",
            "company": "Google",
            "family": "Google",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "ProjectShield-UrlCheck",
                "Mozilla/5.0 (compatible",
                "+http://g.co/projectshield)"
            ],
            "primary_user_agent": "ProjectShield-UrlCheck",
            "robots_token": "ProjectShield-UrlCheck",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Project Shield, created by Google Cloud and Jigsaw and powered by Google Cloud Armor, provides free unlimited protection against DDoS attacks, a type of digital attack used to censor information by taking websites offline.",
            "short_description": "Project Shield, created by Google Cloud and Jigsaw and powered by Google Cloud Armor, provides free unlimited protection against DDoS attacks, a type of digital attack…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ProjectShield-UrlCheck\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ProjectShield-UrlCheck\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://projectshield.withgoogle.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "protopage",
            "name": "Protopage",
            "slug": "protopage",
            "url": "https://botcrawl.com/bots/protopage/",
            "status": "active",
            "operator": "Protopage Ltd",
            "company": "Protopage Ltd",
            "family": "Protopage Ltd",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Protopage",
                "Protopage/3.0 (http://www.protopage.com)"
            ],
            "primary_user_agent": "Protopage",
            "robots_token": "Protopage",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Protopage.com indexes RSS news headlines mostly from news sites",
            "short_description": "Protopage.com indexes RSS news headlines mostly from news sites",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Protopage\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Protopage\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.protopage.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "proximic",
            "name": "Proximic",
            "slug": "proximic",
            "url": "https://botcrawl.com/bots/proximic/",
            "status": "active",
            "operator": "ComScore",
            "company": "ComScore",
            "family": "ComScore",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "compatible; proximic",
                "Mozilla/5.0 (compatible",
                "proximic",
                "+https://www.comscore.com/Web-Crawler)",
                "+http://www.proximic.com/info/spider.php)"
            ],
            "primary_user_agent": "compatible; proximic",
            "robots_token": "compatible; proximic",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Proximic crawler visites websites serving ads on behalf of them or their partners to determine which ads best fit a given website's content.",
            "short_description": "The Proximic crawler visites websites serving ads on behalf of them or their partners to determine which ads best fit a given website's content.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: compatible; proximic\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"compatible; proximic\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.comscore.com/Web-Crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "pulsetic",
            "name": "Pulsetic",
            "slug": "pulsetic",
            "url": "https://botcrawl.com/bots/pulsetic/",
            "status": "active",
            "operator": "Pulsetic",
            "company": "Pulsetic",
            "family": "Pulsetic",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "pulsetic.com",
                "pulsetic.com (+https://pulsetic.com)"
            ],
            "primary_user_agent": "pulsetic.com",
            "robots_token": "pulsetic.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website monitoring service. Uptime monitoring and down alerts.",
            "short_description": "Website monitoring service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: pulsetic.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"pulsetic.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.pulsetic.com/article/198-pulsetic-user-agent"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "pwabuilder",
            "name": "PWABuilder",
            "slug": "pwabuilder",
            "url": "https://botcrawl.com/bots/pwabuilder/",
            "status": "active",
            "operator": "Microsoft",
            "company": "Microsoft",
            "family": "Microsoft",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "PWABuilderHttpAgent",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.57 PWABuilderHttpAgent"
            ],
            "primary_user_agent": "PWABuilderHttpAgent",
            "robots_token": "PWABuilderHttpAgent",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "PWABuilder (pwabuilder.com) is a free, open source developer tool from Microsoft that helps developers build progressive web apps and publish them in app stores. PWABuilder tool analyzes their website for Progressive Web App capabilities, such as a web manifest or service worker",
            "short_description": "PWABuilder (pwabuilder.com) is a free, open source developer tool from Microsoft that helps developers build progressive web apps and publish them in app stores.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: PWABuilderHttpAgent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"PWABuilderHttpAgent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.pwabuilder.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qa-tech",
            "name": "QA.tech",
            "slug": "qa-tech",
            "url": "https://botcrawl.com/bots/qa-tech/",
            "status": "active",
            "operator": "QA.tech",
            "company": "QA.tech",
            "family": "QA.tech",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "qatech",
                "QA.tech"
            ],
            "primary_user_agent": "qatech",
            "robots_token": "qatech",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "Web agent that browses websites, identifies potential test cases, and executes tests against web applications.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"qatech\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/qatech"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qstash",
            "name": "QStash",
            "slug": "qstash",
            "url": "https://botcrawl.com/bots/qstash/",
            "status": "active",
            "operator": "Upstash",
            "company": "Upstash",
            "family": "Upstash",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "qstash"
            ],
            "primary_user_agent": "qstash",
            "robots_token": "qstash",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "QStash is a platform for building event-driven applications.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"qstash\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/qstash"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qualifiedbot",
            "name": "QualifiedBot",
            "slug": "qualifiedbot",
            "url": "https://botcrawl.com/bots/qualifiedbot/",
            "status": "active",
            "operator": "Qualified.com, Inc.",
            "company": "Qualified.com, Inc.",
            "family": "Qualified.com, Inc.",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "QualifiedBot",
                "QualifiedBot/1.0",
                "Mozilla/5.0 (compatible",
                "+https://www.qualified.com/legal/qualified-crawler-user-agent)",
                "Qualified crawler"
            ],
            "primary_user_agent": "QualifiedBot",
            "robots_token": "QualifiedBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot crawls customer websites to provide information to customer hosted chatbots.",
            "short_description": "Bot crawls customer websites to provide information to customer hosted chatbots.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: QualifiedBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"QualifiedBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.qualified.com/legal/qualified-crawler-user-agent"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qualys",
            "name": "Qualys",
            "slug": "qualys",
            "url": "https://botcrawl.com/bots/qualys/",
            "status": "active",
            "operator": "Qualys",
            "company": "Qualys",
            "family": "Qualys",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "cf-qualys-scanner"
            ],
            "primary_user_agent": "cf-qualys-scanner",
            "robots_token": "cf-qualys-scanner",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Qualys Web Application Scanner is a cloud-based service that provides automated crawling and testing of custom web applications to identify vulnerabilities including cross-site scripting (XSS) and SQL injection.",
            "short_description": "Qualys Web Application Scanner is a cloud-based service that provides automated crawling and testing of custom web applications to identify vulnerabilities including…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: cf-qualys-scanner\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"cf-qualys-scanner\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.qualys.com/apps/pci-compliance/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qualys-ssl-scanner",
            "name": "Qualys SSL Scanner",
            "slug": "qualys-ssl-scanner",
            "url": "https://botcrawl.com/bots/qualys-ssl-scanner/",
            "status": "active",
            "operator": "Qualys",
            "company": "Qualys",
            "family": "Qualys",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SSL Labs"
            ],
            "primary_user_agent": "SSL Labs",
            "robots_token": "SSL Labs",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SSL Labs / Qualys is used to test monitor ssl rating against their site",
            "short_description": "SSL Labs / Qualys is used to test monitor ssl rating against their site",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SSL Labs\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SSL Labs\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.qualys.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "quantcastbot",
            "name": "Quantcastbot",
            "slug": "quantcastbot",
            "url": "https://botcrawl.com/bots/quantcastbot/",
            "status": "active",
            "operator": "Quantcast",
            "company": "Quantcast",
            "family": "Quantcast",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Quantcastbot",
                "Quantcastbot/1.0 (+http://www.quantcast.com/bot)"
            ],
            "primary_user_agent": "Quantcastbot",
            "robots_token": "Quantcastbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Quantcast Bot is the name of a web crawler used by Quantcast for advertisement quality assurance and to understand page content for Interest-Based Audiences.",
            "short_description": "Quantcast Bot is the name of a web crawler used by Quantcast for advertisement quality assurance and to understand page content for Interest-Based Audiences.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Quantcastbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Quantcastbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.quantcast.com/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "quartr-crawler",
            "name": "Quartr Crawler",
            "slug": "quartr-crawler",
            "url": "https://botcrawl.com/bots/quartr-crawler/",
            "status": "active",
            "operator": "Quartr",
            "company": "Quartr",
            "family": "Quartr Crawler",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Quartr Crawler"
            ],
            "primary_user_agent": "Quartr Crawler",
            "robots_token": "Quartr Crawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Quartr uses a crawler to obtain and deliver investor relations material",
            "short_description": "Quartr uses a crawler to obtain and deliver investor relations material",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Quartr Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Quartr Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://quartr.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qwantbot",
            "name": "Qwantbot",
            "slug": "qwantbot",
            "url": "https://botcrawl.com/bots/qwantbot/",
            "status": "active",
            "operator": "Qwant",
            "company": "Qwant",
            "family": "Qwant",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Qwantbot",
                "Mozilla/5.0 (compatible",
                "Qwantbot/2.4w",
                "+https://www.qwant.com/)",
                "Qwantbot-junior/1.0",
                "Qwantbot-news/2.0",
                "Qwantbot-official/1.0",
                "Qwantbot-wikidata/1.0",
                "Qwantbot-opt/1.0",
                "+Qwantbot@qwant.com)"
            ],
            "primary_user_agent": "Qwantbot",
            "robots_token": "Qwantbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Based and designed in Europe, Qwant is the first search engine which protect privacy.",
            "short_description": "Based and designed in Europe, Qwant is the first search engine which protect privacy.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Qwantbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Qwantbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.qwant.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qwantbot-news",
            "name": "Qwantbot-news",
            "slug": "qwantbot-news",
            "url": "https://botcrawl.com/bots/qwantbot-news/",
            "status": "active",
            "operator": "Qwant",
            "company": "Qwant",
            "family": "Qwant",
            "category": "search",
            "kind": "crawler",
            "purpose": "unknown",
            "identity_type": "official-documented",
            "user_agents": [
                "Qwantbot-news",
                "Mozilla/5.0 (compatible",
                "Qwantbot-news/2.0",
                "+https://help.qwant.com/bot/)"
            ],
            "primary_user_agent": "Qwantbot-news",
            "robots_token": "Qwantbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Qwant says it may use the Qwantbot-news user-agent variant for crawling and indexing news content.",
            "verification_method": "Official Qwant crawler documentation; reverse and forward DNS should resolve within qwant.com, or match the published qwantbot.json IP list.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://help.qwant.com/bot/",
            "rules": {
                "robots": "User-agent: Qwantbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Qwantbot-news\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.qwant.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "qwantify",
            "name": "Qwantify",
            "slug": "qwantify",
            "url": "https://botcrawl.com/bots/qwantify/",
            "status": "active",
            "operator": "Qwant",
            "company": "Qwant",
            "family": "Qwant",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Qwantify",
                "Mozilla/5.0 (compatible",
                "Qwantify/2.4w",
                "+https://www.qwant.com/)",
                "Qwant Bot"
            ],
            "primary_user_agent": "Qwantify",
            "robots_token": "Qwantify",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Qwant French privacy-focused search engine web crawler.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://www.qwant.com/.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Qwantify\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Qwantify\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.qwant.com/"
                }
            ],
            "updated_at": "2026-04-01T00:55:30Z"
        },
        {
            "id": "rackspace",
            "name": "Rackspace",
            "slug": "rackspace",
            "url": "https://botcrawl.com/bots/rackspace/",
            "status": "active",
            "operator": "Rackspace",
            "company": "Rackspace",
            "family": "Rackspace",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Rackspace Monitoring",
                "Rackspace Monitoring/1.1 (https://monitoring.api.rackspacecloud.com)"
            ],
            "primary_user_agent": "Rackspace Monitoring",
            "robots_token": "Rackspace Monitoring",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Rackspace HTTP Monitor monitors customer websites for uptime and other issues.",
            "short_description": "The Rackspace HTTP Monitor monitors customer websites for uptime and other issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Rackspace Monitoring\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Rackspace Monitoring\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.rackspace.com/how-to/about-the-rackspace-monitoring-agent/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "rakuten-image-extraction-bot",
            "name": "Rakuten Image extraction bot",
            "slug": "rakuten-image-extraction-bot",
            "url": "https://botcrawl.com/bots/rakuten-image-extraction-bot/",
            "status": "active",
            "operator": "Rakuten",
            "company": "Rakuten",
            "family": "Rakuten",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "rakutenusabot-image",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "rakutenusabot-image/1.0) Chrome/114.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "rakutenusabot-image",
            "robots_token": "rakutenusabot-image",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Rakuten uses this bot to crawl product images so that we can display cashbach deals for our merchants.",
            "short_description": "Rakuten uses this bot to crawl product images so that we can display cashbach deals for our merchants.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: rakutenusabot-image\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"rakutenusabot-image\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://product-image.ebates.com/item-gsp/rakutenusabot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "razorpay-webhook",
            "name": "Razorpay Webhook",
            "slug": "razorpay-webhook",
            "url": "https://botcrawl.com/bots/razorpay-webhook/",
            "status": "active",
            "operator": "Razorpay",
            "company": "Razorpay",
            "family": "Razorpay",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Razorpay"
            ],
            "primary_user_agent": "Razorpay",
            "robots_token": "Razorpay",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Razorpay sends webhooks back to ecommerce sites.",
            "short_description": "Razorpay sends webhooks back to ecommerce sites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Razorpay\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Razorpay\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://razorpay.com/docs/security/whitelists/#webhook-ips"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "readable",
            "name": "Readable",
            "slug": "readable",
            "url": "https://botcrawl.com/bots/readable/",
            "status": "active",
            "operator": "Added Bytes Ltd",
            "company": "Added Bytes Ltd",
            "family": "Added Bytes Ltd",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Readable",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/58.0.3029.110 Safari/537.36 Readable/1.1.4",
                "Readable/"
            ],
            "primary_user_agent": "Readable",
            "robots_token": "Readable",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Readable is a collection of text analysis tools, primarily focused on clarity and plain language. We spider customers' websites, find the content of each page, analyse it, and present that to the customer.",
            "short_description": "Readable is a collection of text analysis tools, primarily focused on clarity and plain language.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Readable\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Readable\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://readable.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:45Z"
        },
        {
            "id": "recurly-webhooks",
            "name": "Recurly Webhooks",
            "slug": "recurly-webhooks",
            "url": "https://botcrawl.com/bots/recurly-webhooks/",
            "status": "active",
            "operator": "Recurly",
            "company": "Recurly",
            "family": "Recurly",
            "category": "monitoring",
            "kind": "agent",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Recurly Webhook",
                "Recurly Webhooks/2.0 (+https://docs.recurly.com/push-notifications)",
                "ecurly Webhook"
            ],
            "primary_user_agent": "Recurly Webhook",
            "robots_token": "Recurly Webhook",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "Recurly webhook sender used to notify customer sites about platform events.",
            "verification_method": "Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Recurly Webhook\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Recurly Webhook\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://recurly.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "redactus-web-classifier",
            "name": "Redactus Web Classifier",
            "slug": "redactus-web-classifier",
            "url": "https://botcrawl.com/bots/redactus-web-classifier/",
            "status": "active",
            "operator": "Redactus",
            "company": "Redactus",
            "family": "Redactus",
            "category": "security",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "RedactusWebClassifier",
                "RedactusWebClassifier/1.0"
            ],
            "primary_user_agent": "RedactusWebClassifier",
            "robots_token": "RedactusWebClassifier",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Web classifier: fetches a site’s homepage to assign a category and rechecks periodically.",
            "short_description": "Web classifier: fetches a site’s homepage to assign a category and rechecks periodically.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: RedactusWebClassifier\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RedactusWebClassifier\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bot.redactus.co.uk/docs"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "redirect-pizza-destination-monitor",
            "name": "Redirect Pizza Destination Monitor",
            "slug": "redirect-pizza-destination-monitor",
            "url": "https://botcrawl.com/bots/redirect-pizza-destination-monitor/",
            "status": "active",
            "operator": "redirect.pizza",
            "company": "redirect.pizza",
            "family": "redirect.pizza",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "redirect-pizza",
                "Redirect Pizza Destination Monitor"
            ],
            "primary_user_agent": "redirect-pizza",
            "robots_token": "redirect-pizza",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Monitoring",
            "short_description": "redirect.pizza destination monitor checks whether redirect destination URLs are reachable.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"redirect-pizza\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/redirect-pizza"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "reelevant",
            "name": "Reelevant",
            "slug": "reelevant",
            "url": "https://botcrawl.com/bots/reelevant/",
            "status": "active",
            "operator": "Reelevant",
            "company": "Reelevant",
            "family": "Reelevant",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Reelevant",
                "Reelevant/1.0",
                "Reelevant/"
            ],
            "primary_user_agent": "Reelevant",
            "robots_token": "Reelevant",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Reelavant allows customers to dynamically update content inside their emails. Needs to fetch images of their product at runtime and send them back to the users.",
            "short_description": "Reelavant allows customers to dynamically update content inside their emails.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Reelevant\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Reelevant\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://reelevant.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "retool",
            "name": "Retool",
            "slug": "retool",
            "url": "https://botcrawl.com/bots/retool/",
            "status": "active",
            "operator": "Retool",
            "company": "Retool",
            "family": "Retool",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Retool",
                "Retool/2.0 (+https://docs.tryretool.com/docs/apis)",
                "Retool/"
            ],
            "primary_user_agent": "Retool",
            "robots_token": "Retool",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Retool platform user agent",
            "short_description": "Retool platform user agent",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Retool\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Retool\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.tryretool.com/docs/apis"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "retrolistecom",
            "name": "RetroListeCOM",
            "slug": "retrolistecom",
            "url": "https://botcrawl.com/bots/retrolistecom/",
            "status": "active",
            "operator": "Niclas Papst",
            "company": "Niclas Papst",
            "family": "Niclas Papst",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "RetroListeCOM",
                "Mozilla/5.0 (compatible",
                "RetroListeCOM/1.0)",
                "RetroListeCOM/"
            ],
            "primary_user_agent": "RetroListeCOM",
            "robots_token": "RetroListeCOM",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RetroListeCOM is a service that tracks user counts on gaming-related websites, and its bot visits those sites to collect this data.",
            "short_description": "RetroListeCOM is a service that tracks user counts on gaming-related websites, and its bot visits those sites to collect this data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: RetroListeCOM\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RetroListeCOM\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://retroliste.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "revvim",
            "name": "Revvim",
            "slug": "revvim",
            "url": "https://botcrawl.com/bots/revvim/",
            "status": "active",
            "operator": "Revvim",
            "company": "Revvim",
            "family": "Revvim",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "RevvimGort",
                "Chrome/54.0.2840.71 (compatible",
                "RevvimGort/5.0",
                "+http://www.revvim.com",
                "webmaster@revvim.com)"
            ],
            "primary_user_agent": "RevvimGort",
            "robots_token": "RevvimGort",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our bot crawls our customers' websites to identify SEO opportunities",
            "short_description": "Our bot crawls our customers' websites to identify SEO opportunities",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: RevvimGort\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RevvimGort\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://revvim.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "reward-gateway",
            "name": "Reward Gateway",
            "slug": "reward-gateway",
            "url": "https://botcrawl.com/bots/reward-gateway/",
            "status": "active",
            "operator": "Reward Gateway",
            "company": "Reward Gateway",
            "family": "Reward Gateway",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "reward-gateway"
            ],
            "primary_user_agent": "reward-gateway",
            "robots_token": "reward-gateway",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "With the iboss Cloud Platform, each customer gets dedicated source cloud IP Addresses which are associated with the organization. Because of this, any data traversing the global cloud containerized gateways in the Platform will have a uniquely associated IP Address that can be mapped to the organization. This means that users always appear to be accessing the Internet from within the organization regardless of whether they’re in the office or on the road. This preserves the critical connectivity requirements that IT departments need when migrating to a cloud gateway platform.",
            "short_description": "With the iboss Cloud Platform, each customer gets dedicated source cloud IP Addresses which are associated with the organization.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: reward-gateway\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"reward-gateway\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.rewardgateway.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "robinbot",
            "name": "Robinbot",
            "slug": "robinbot",
            "url": "https://botcrawl.com/bots/robinbot/",
            "status": "active",
            "operator": "Robin Education",
            "company": "Robin Education",
            "family": "Robin",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Robinbot",
                "Robinbot/1.0",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "+https://www.robin.education) Chrome/131.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "Robinbot",
            "robots_token": "Robinbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Robin automatically scans school websites in the UK, providing compliance checks against statutory requirements.",
            "short_description": "Robin automatically scans school websites in the UK, providing compliance checks against statutory requirements.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: Robinbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Robinbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.robin.education"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "rss-api",
            "name": "RSS API",
            "slug": "rss-api",
            "url": "https://botcrawl.com/bots/rss-api/",
            "status": "active",
            "operator": "RSS API (by Tibush GmbH)",
            "company": "RSS API (by Tibush GmbH)",
            "family": "RSS API (by Tibush GmbH)",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "RSSAPI",
                "RSSAPI/2.0 (+https://rssapi.net/)"
            ],
            "primary_user_agent": "RSSAPI",
            "robots_token": "RSSAPI",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "RSS API periodically requesting and parsing RSS Feeds for our customers to monitor them for any changes.",
            "short_description": "RSS API periodically requesting and parsing RSS Feeds for our customers to monitor them for any changes.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: RSSAPI\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RSSAPI\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://rssapi.net/"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "rss2tg_bot",
            "name": "rss2tg_bot",
            "slug": "rss2tg_bot",
            "url": "https://botcrawl.com/bots/rss2tg_bot/",
            "status": "active",
            "operator": "Yellow Rubber Duck Consulting",
            "company": "Yellow Rubber Duck Consulting",
            "family": "Yellow Rubber Duck Consulting",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "rss2tg bot",
                "Mozilla/5.0 (compatible",
                "+http://komar.in/en/rss2tg_crawler)",
                "rss2tg crawler"
            ],
            "primary_user_agent": "rss2tg bot",
            "robots_token": "rss2tg bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A notification RSS bot for Telegram instant messenger",
            "short_description": "A notification RSS bot for Telegram instant messenger",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: rss2tg bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"rss2tg bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://rss2tg.duck.consulting/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "ryebot",
            "name": "RyeBot",
            "slug": "ryebot",
            "url": "https://botcrawl.com/bots/ryebot/",
            "status": "active",
            "operator": "Rye",
            "company": "Rye",
            "family": "Rye",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ryebot"
            ],
            "primary_user_agent": "ryebot",
            "robots_token": "ryebot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "E-commerce",
            "short_description": "Powers automated checkout on behalf of shoppers with explicit consent.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"ryebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/ryebot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "rytebot",
            "name": "RyteBot",
            "slug": "rytebot",
            "url": "https://botcrawl.com/bots/rytebot/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "RyteBot",
                "Ryte crawler"
            ],
            "primary_user_agent": "RyteBot",
            "robots_token": "RyteBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Crawler for Ryte-powered tools documented by Semrush.",
            "verification_method": "Identify the documented Semrush token and compare behavior against Semrush's official bot documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: RyteBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"RyteBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "salesforce",
            "name": "Salesforce",
            "slug": "salesforce",
            "url": "https://botcrawl.com/bots/salesforce/",
            "status": "active",
            "operator": "Salseforce",
            "company": "Salseforce",
            "family": "Salseforce",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SFDC-Callout",
                "SFDC-Callout/49.0"
            ],
            "primary_user_agent": "SFDC-Callout",
            "robots_token": "SFDC-Callout",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "CRM + Marketing",
            "short_description": "CRM + Marketing",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SFDC-Callout\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SFDC-Callout\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.salesforce.com/articleView?id=000321501&amp;type=1&amp;mode=1"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "salesviewerbot",
            "name": "SalesViewerBot",
            "slug": "salesviewerbot",
            "url": "https://botcrawl.com/bots/salesviewerbot/",
            "status": "active",
            "operator": "SalesViewer GmbH",
            "company": "SalesViewer GmbH",
            "family": "SalesViewer GmbH",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "page-preview-tool",
                "page-preview-tool Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/80.0.3987.132 Safari/537.36"
            ],
            "primary_user_agent": "page-preview-tool",
            "robots_token": "page-preview-tool",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Since we offer sales and marketing information we need to enrich the company information. To provide crucial company information inside our service we need to provide a preview of visitor websites. Therefore we need to visit the websites.",
            "short_description": "Since we offer sales and marketing information we need to enrich the company information.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: page-preview-tool\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"page-preview-tool\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.salesviewer.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "sanity-webhooks",
            "name": "Sanity Webhooks",
            "slug": "sanity-webhooks",
            "url": "https://botcrawl.com/bots/sanity-webhooks/",
            "status": "active",
            "operator": "Sanity",
            "company": "Sanity",
            "family": "Sanity",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "sanity-webhooks",
                "Sanity Webhooks"
            ],
            "primary_user_agent": "sanity-webhooks",
            "robots_token": "sanity-webhooks",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Webhook",
            "short_description": "Sanity's webhook service that delivers real-time event notifications for content changes and other events.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"sanity-webhooks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/sanity-webhooks"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "sansec-security-monitor",
            "name": "Sansec Security Monitor",
            "slug": "sansec-security-monitor",
            "url": "https://botcrawl.com/bots/sansec-security-monitor/",
            "status": "active",
            "operator": "Sansec Security Monitor",
            "company": "Sansec Security Monitor",
            "family": "Sansec Security Monitor",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Sansec Security Monitor",
                "Mozilla/5.0 (compatible",
                "Sansec Security Monitor/1.0",
                "+https://sansec.io/monitor)"
            ],
            "primary_user_agent": "Sansec Security Monitor",
            "robots_token": "Sansec Security Monitor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Enhances e-commerce security by monitoring stores, crucial for preventing data breaches & fighting digital skimming.",
            "short_description": "Enhances e-commerce security by monitoring stores, crucial for preventing data breaches & fighting digital skimming.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Sansec Security Monitor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sansec Security Monitor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sansec.io/monitor"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "sbintuitionsbot",
            "name": "SBIntuitionsBot",
            "slug": "sbintuitionsbot",
            "url": "https://botcrawl.com/bots/sbintuitionsbot/",
            "status": "active",
            "operator": "SB Intuitions",
            "company": "SB Intuitions",
            "family": "SB Intuitions",
            "category": "ai",
            "kind": "unknown",
            "purpose": "training",
            "identity_type": "unknown",
            "user_agents": [
                "SBIntuitionsBot",
                "Mozilla/5.0 (compatible",
                "+https://sbintuitions.co.jp/)",
                "SB Intuitions Crawler"
            ],
            "primary_user_agent": "SBIntuitionsBot",
            "robots_token": "SBIntuitionsBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "SB Intuitions Japanese AI company crawler used for training generative AI models.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://sbintuitions.co.jp.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: SBIntuitionsBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SBIntuitionsBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sbintuitions.co.jp"
                }
            ],
            "updated_at": "2026-04-01T00:55:17Z"
        },
        {
            "id": "scavobot",
            "name": "ScavoBot",
            "slug": "scavobot",
            "url": "https://botcrawl.com/bots/scavobot/",
            "status": "active",
            "operator": "Scavo",
            "company": "Scavo",
            "family": "Scavo",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "ScavoBot"
            ],
            "primary_user_agent": "ScavoBot",
            "robots_token": "ScavoBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "User-authorized website monitoring, uptime checks, diagnostics, and compliance scanning for managed sites.",
            "short_description": "User-authorized website monitoring, uptime checks, diagnostics, and compliance scanning for managed sites.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: ScavoBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ScavoBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://scavo.ai"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "scour-rss-bot",
            "name": "Scour RSS Bot",
            "slug": "scour-rss-bot",
            "url": "https://botcrawl.com/bots/scour-rss-bot/",
            "status": "active",
            "operator": "Scour",
            "company": "Scour",
            "family": "Scour",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "scour.ing",
                "ScourRSSBot/1.0 (+https://scour.ing/bot)",
                "Scour/1.0 (+https://scour.ing/bot)",
                "ScourBot/1.0 (+https://scour.ing/bot)"
            ],
            "primary_user_agent": "scour.ing",
            "robots_token": "scour.ing",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A personalized RSS feed reader that uses AI/ML to surface content that matches user interests.",
            "short_description": "A personalized RSS feed reader that uses AI/ML to surface content that matches user interests.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: scour.ing\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"scour.ing\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://scour.ing/bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "se-ranking-bot",
            "name": "SE Ranking Bot",
            "slug": "se-ranking-bot",
            "url": "https://botcrawl.com/bots/se-ranking-bot/",
            "status": "active",
            "operator": "SE Ranking",
            "company": "SE Ranking",
            "family": "SE Ranking",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SE Ranking Bot"
            ],
            "primary_user_agent": "SE Ranking Bot",
            "robots_token": "SE Ranking Bot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "Crawler for analyzing SE Ranking client websites for potential issues.",
            "verification_method": "Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: SE Ranking Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SE Ranking Bot\")"
            },
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "searchatlas-bot",
            "name": "SearchAtlas Bot",
            "slug": "searchatlas-bot",
            "url": "https://botcrawl.com/bots/searchatlas-bot/",
            "status": "active",
            "operator": "Search Atlas",
            "company": "Search Atlas",
            "family": "Search Atlas",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SearchAtlas Bot"
            ],
            "primary_user_agent": "SearchAtlas Bot",
            "robots_token": "SearchAtlas Bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot used to evaluate customer's websites and provide SEO optimization strategy",
            "short_description": "Bot used to evaluate customer's websites and provide SEO optimization strategy",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SearchAtlas Bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SearchAtlas Bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.searchatlas.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "sebot-wa",
            "name": "SEBot-WA",
            "slug": "sebot-wa",
            "url": "https://botcrawl.com/bots/sebot-wa/",
            "status": "active",
            "operator": "SE Ranking",
            "company": "SE Ranking",
            "family": "SE Ranking",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SEBot-WA"
            ],
            "primary_user_agent": "SEBot-WA",
            "robots_token": "SEBot-WA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawler for analyzing SE Ranking clients websites for potential issues.",
            "short_description": "Crawler for analyzing SE Ranking clients websites for potential issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SEBot-WA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SEBot-WA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.seranking.com/en/project-tools/website-audit/overview"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "securityheaders",
            "name": "SecurityHeaders",
            "slug": "securityheaders",
            "url": "https://botcrawl.com/bots/securityheaders/",
            "status": "active",
            "operator": "Security Headers",
            "company": "Security Headers",
            "family": "Security Headers",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SecurityHeaders",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/75.0.3770.142 Safari/537.36 SecurityHeaders"
            ],
            "primary_user_agent": "SecurityHeaders",
            "robots_token": "SecurityHeaders",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Provides a free security scanning service at https://securityheaders.com",
            "short_description": "Provides a free security scanning service at https://securityheaders.com",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SecurityHeaders\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SecurityHeaders\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://securityheaders.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:46Z"
        },
        {
            "id": "seekport",
            "name": "Seekport",
            "slug": "seekport",
            "url": "https://botcrawl.com/bots/seekport/",
            "status": "active",
            "operator": "SISTRIX",
            "company": "SISTRIX",
            "family": "SISTRIX",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SeekportBot",
                "Mozilla/5.0 (compatible",
                "+https://bot.seekport.com)"
            ],
            "primary_user_agent": "SeekportBot",
            "robots_token": "SeekportBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Seekport is an internet search engine. Originally founded in 2003, the search engine has been operated by SISTRIX, a platform intelligence provider from Bonn (Germany), since December 2014. The search engine is a public, free and independent alternative to Google. Seekport does not store user data and does not profile users. Seekport is also operated without advertising and has no conflicts of interest in the display of search results.",
            "short_description": "Seekport is an internet search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SeekportBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SeekportBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.seekport.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seekr",
            "name": "Seekr",
            "slug": "seekr",
            "url": "https://botcrawl.com/bots/seekr/",
            "status": "active",
            "operator": "Seekr",
            "company": "Seekr",
            "family": "Seekr",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Seekr",
                "Seekr crawler"
            ],
            "primary_user_agent": "Seekr",
            "robots_token": "Seekr",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Seekr content reliability/scoring crawler.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: Seekr\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Seekr\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/scrapers/seekr"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "seekrbot",
            "name": "SeekrBot",
            "slug": "seekrbot",
            "url": "https://botcrawl.com/bots/seekrbot/",
            "status": "active",
            "operator": "Seekr",
            "company": "Seekr",
            "family": "Seekr",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SeekrBot",
                "Seekr bot"
            ],
            "primary_user_agent": "SeekrBot",
            "robots_token": "SeekrBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "Seekr crawler for content reliability/scoring services.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: SeekrBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SeekrBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawlercheck.com/directory/scrapers/seekrbot"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "semrushbot",
            "name": "Semrushbot",
            "slug": "semrushbot",
            "url": "https://botcrawl.com/bots/semrushbot/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-SA",
                "Mozilla/5.0 (compatible",
                "SemrushBot-SA/0.97",
                "+http://www.semrush.com/bot.html)"
            ],
            "primary_user_agent": "SemrushBot-SA",
            "robots_token": "SemrushBot-SA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Semrushbot crawls your website to analyze it for different SEO and technical issues.",
            "short_description": "Semrushbot crawls your website to analyze it for different SEO and technical issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SemrushBot-SA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-SA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot/"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "semrushbot-core",
            "name": "SemrushBot Core",
            "slug": "semrushbot-core",
            "url": "https://botcrawl.com/bots/semrushbot-core/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot",
                "Mozilla/5.0 (compatible",
                "SemrushBot/3~bl",
                "+http://www.semrush.com/bot.html)",
                "SemrushBot/",
                "SemrushBotBacklinks",
                "SemrushBotLinkBuilding"
            ],
            "primary_user_agent": "SemrushBot",
            "robots_token": "SemrushBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SemrushBot is used by Semrush to gather website data for backlink analytics and link building features.",
            "short_description": "SemrushBot crawls websites for Semrush backlink analytics and link building tools.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: SemrushBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "semrushbot-bm",
            "name": "SemrushBot-BM",
            "slug": "semrushbot-bm",
            "url": "https://botcrawl.com/bots/semrushbot-bm/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-BM",
                "Mozilla/5.0 (compatible",
                "SemrushBot-BM/1.0",
                "+https://www.semrush.com/bot.html)",
                "Semrush Brand Bot"
            ],
            "primary_user_agent": "SemrushBot-BM",
            "robots_token": "SemrushBot-BM",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Semrush Brand Monitoring bot that crawls websites for brand mention tracking.",
            "verification_method": "Verify the exact user-agent and confirm Semrush ownership using Semrush's published bot documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: SemrushBot-BM\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-BM\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:28Z"
        },
        {
            "id": "semrushbot-ct",
            "name": "SemrushBot-CT",
            "slug": "semrushbot-ct",
            "url": "https://botcrawl.com/bots/semrushbot-ct/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-CT",
                "Mozilla/5.0 (compatible",
                "SemrushBot-CT/1.0",
                "+https://www.semrush.com/bot.html)",
                "Semrush Content Bot"
            ],
            "primary_user_agent": "SemrushBot-CT",
            "robots_token": "SemrushBot-CT",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Semrush Content Marketing bot that crawls websites for content analysis.",
            "verification_method": "Verify the exact user-agent and confirm Semrush ownership using Semrush's published bot documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: SemrushBot-CT\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-CT\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:28Z"
        },
        {
            "id": "semrushbot-ocob",
            "name": "SemrushBot-OCOB",
            "slug": "semrushbot-ocob",
            "url": "https://botcrawl.com/bots/semrushbot-ocob/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-OCOB",
                "Mozilla/5.0 (compatible",
                "SemrushBot-OCOB/1",
                "+https://www.semrush.com/bot/)"
            ],
            "primary_user_agent": "SemrushBot-OCOB",
            "robots_token": "SemrushBot-OCOB",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Semrush is an all-in-one tool suite for improving online visibility and discovering marketing insights.",
            "short_description": "Semrush is an all-in-one tool suite for improving online visibility and discovering marketing insights.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SemrushBot-OCOB\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-OCOB\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "semrushbotbacklinkaudit",
            "name": "SemrushBotBacklinkAudit",
            "slug": "semrushbotbacklinkaudit",
            "url": "https://botcrawl.com/bots/semrushbotbacklinkaudit/",
            "status": "active",
            "operator": "SEMrush",
            "company": "SEMrush",
            "family": "SEMrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-BA",
                "Mozilla/5.0 (compatible",
                "+http://www.semrush.com/bot.html)",
                "Semrush backlink audit crawler"
            ],
            "primary_user_agent": "SemrushBot-BA",
            "robots_token": "SemrushBot-BA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Data collected by SEMrushBot is used for the Backlink Audit tool to check website backlinks",
            "short_description": "Data collected by SEMrushBot is used for the Backlink Audit tool to check website backlinks",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: SemrushBot-BA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-BA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/backlink_audit/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "semrushbotlinkbuilding",
            "name": "SemrushBotLinkBuilding",
            "slug": "semrushbotlinkbuilding",
            "url": "https://botcrawl.com/bots/semrushbotlinkbuilding/",
            "status": "active",
            "operator": "SEMrush",
            "company": "SEMrush",
            "family": "SEMrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot;",
                "Mozilla/5.0 (compatible",
                "SemrushBot",
                "+http://www.semrush.com/bot.html)"
            ],
            "primary_user_agent": "SemrushBot;",
            "robots_token": "SemrushBot;",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Data collected by SEMrushBot is used for the Link Building tool to check website backlinks",
            "short_description": "Data collected by SEMrushBot is used for the Link Building tool to check website backlinks",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SemrushBot;\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot;\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/link_building/"
                }
            ],
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-20T03:46:42Z"
        },
        {
            "id": "semrushbotsi",
            "name": "SemrushBotSI",
            "slug": "semrushbotsi",
            "url": "https://botcrawl.com/bots/semrushbotsi/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-SI",
                "Mozilla/5.0 (compatible",
                "SemrushBot-SI/0.97",
                "+http://www.semrush.com/bot.html)",
                "Semrush on-page SEO crawler"
            ],
            "primary_user_agent": "SemrushBot-SI",
            "robots_token": "SemrushBot-SI",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Data collected by SEMrushBot is used for the On Page SEO Checker and SEO Content template tools reports Data collected by SEMrushBot is used for the Topic Research tool reports",
            "short_description": "Data collected by SEMrushBot is used for the On Page SEO Checker and SEO Content template tools reports Data collected by SEMrushBot is used for the Topic Research tool…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: SemrushBot-SI\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-SI\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/on-page-seo-checker/%20;%20https://www.semrush.com/seo-content-template/%20;%20https://www.semrush.com/topic-research/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "semrushbotswa",
            "name": "SemrushBotSwa",
            "slug": "semrushbotswa",
            "url": "https://botcrawl.com/bots/semrushbotswa/",
            "status": "active",
            "operator": "SEMrush",
            "company": "SEMrush",
            "family": "SEMrush",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SemrushBot-SWA",
                "Mozilla/5.0 (compatible",
                "SemrushBot-SWA/0.1",
                "+http://www.semrush.com/bot.html)",
                "SemrushBot-SWA/"
            ],
            "primary_user_agent": "SemrushBot-SWA",
            "robots_token": "SemrushBot-SWA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Data collected by SEMrushBot is used for the SEO Writing Assistant tool to check if URL is accessible",
            "short_description": "Data collected by SEMrushBot is used for the SEO Writing Assistant tool to check if URL is accessible",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SemrushBot-SWA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SemrushBot-SWA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/swa/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "sendgrid",
            "name": "SendGrid",
            "slug": "sendgrid",
            "url": "https://botcrawl.com/bots/sendgrid/",
            "status": "active",
            "operator": "SendGrid",
            "company": "SendGrid",
            "family": "SendGrid",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "SendGrid Event API"
            ],
            "primary_user_agent": "SendGrid Event API",
            "robots_token": "SendGrid Event API",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The SendGrid Event Webhook sends email event data to customer APIs as SendGrid processes it.",
            "short_description": "The SendGrid Event Webhook sends email event data to customer APIs as SendGrid processes it.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SendGrid Event API\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SendGrid Event API\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sendgrid.com/docs/for-developers/tracking-events/event/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "sentry",
            "name": "Sentry",
            "slug": "sentry",
            "url": "https://botcrawl.com/bots/sentry/",
            "status": "active",
            "operator": "Sentry",
            "company": "Sentry",
            "family": "Sentry",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "https://sentry.io",
                "sentry/10.0.0.dev0 (https://sentry.io)"
            ],
            "primary_user_agent": "https://sentry.io",
            "robots_token": "https://sentry.io",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Sentry monitors webpages for availability and performance issues.",
            "short_description": "Sentry monitors webpages for availability and performance issues.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: https://sentry.io\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"https://sentry.io\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sentry.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "sentry-uptime-monitoring",
            "name": "Sentry Uptime Monitoring",
            "slug": "sentry-uptime-monitoring",
            "url": "https://botcrawl.com/bots/sentry-uptime-monitoring/",
            "status": "active",
            "operator": "Sentry Uptime Bot",
            "company": "Sentry Uptime Bot",
            "family": "Sentry Uptime Bot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SentryUptimeBot",
                "SentryUptimeBot/1.0 (+http://docs.sentry.io/product/alerts/uptime-monitoring/)"
            ],
            "primary_user_agent": "SentryUptimeBot",
            "robots_token": "SentryUptimeBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Sentry Uptime Monitoring is a feature of the Sentry platform that checks customer websites and APIs for availability.",
            "short_description": "Sentry Uptime Monitoring is a feature of the Sentry platform that checks customer websites and APIs for availability.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SentryUptimeBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SentryUptimeBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.sentry.io/product/alerts/uptime-monitoring/troubleshooting/#verify-firewall-configuration"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seo-audit-check-bot",
            "name": "SEO Audit Check Bot",
            "slug": "seo-audit-check-bot",
            "url": "https://botcrawl.com/bots/seo-audit-check-bot/",
            "status": "active",
            "operator": "SEO Audit Check",
            "company": "SEO Audit Check",
            "family": "SEO Audit Check",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "seo-audit-check-bot",
                "Mozilla/5.0 (compatible",
                "seo-audit-check-bot/1.0)"
            ],
            "primary_user_agent": "seo-audit-check-bot",
            "robots_token": "seo-audit-check-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SEO audit check bot is likely an automated tool within the WebCEO platform that performs comprehensive SEO audits on websites",
            "short_description": "SEO audit check bot is likely an automated tool within the WebCEO platform that performs comprehensive SEO audits on websites",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: seo-audit-check-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"seo-audit-check-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.webceo.com/webceo-bots.htm"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seo4ajax",
            "name": "seo4ajax",
            "slug": "seo4ajax",
            "url": "https://botcrawl.com/bots/seo4ajax/",
            "status": "active",
            "operator": "Capsule Code",
            "company": "Capsule Code",
            "family": "Capsule Code",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "seo4ajax.com",
                "Mozilla/5.0 AppleWebKit (compatible",
                "s4a/1.0",
                "+https://www.seo4ajax.com/webscraper)",
                "s4a",
                "seo4ajax bot"
            ],
            "primary_user_agent": "seo4ajax.com",
            "robots_token": "seo4ajax.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The seo4ajax bot is used by a service that helps make single-page applications (SPAs) crawlable by search engines. It pre-renders JavaScript-heavy pages into static HTML so they can be indexed.",
            "short_description": "The seo4ajax bot is used by a service that helps make single-page applications (SPAs) crawlable by search engines.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: seo4ajax.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"seo4ajax.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.seo4ajax.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seobilitybot",
            "name": "SeobilityBot",
            "slug": "seobilitybot",
            "url": "https://botcrawl.com/bots/seobilitybot/",
            "status": "active",
            "operator": "Seobility",
            "company": "Seobility",
            "family": "Seobility",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SeobilityBot",
                "SeobilityBot (SEO Tool",
                "https://www.seobility.net/sites/bot.html)",
                "Seobility crawler"
            ],
            "primary_user_agent": "SeobilityBot",
            "robots_token": "SeobilityBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Seobility Bot crawls websites to gather SEO Information and provide SEO analysis to its customers.",
            "short_description": "The Seobility Bot crawls websites to gather SEO Information and provide SEO analysis to its customers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SeobilityBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SeobilityBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.seobility.net/sites/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seokicks-robot",
            "name": "Seokicks-Robot",
            "slug": "seokicks-robot",
            "url": "https://botcrawl.com/bots/seokicks-robot/",
            "status": "active",
            "operator": "Seokicks",
            "company": "Seokicks",
            "family": "Seokicks",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Seokicks-Robot",
                "Mozilla/5.0 (compatible",
                "+http://www.seokicks.de/robot.html)",
                "Seokicks Bot"
            ],
            "primary_user_agent": "Seokicks-Robot",
            "robots_token": "Seokicks-Robot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Seokicks SEO crawler that powers the Seokicks backlink analysis tool.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.seokicks.de/robot.html.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Seokicks-Robot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Seokicks-Robot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.seokicks.de/robot.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:37Z"
        },
        {
            "id": "sequelwp",
            "name": "SequelWP",
            "slug": "sequelwp",
            "url": "https://botcrawl.com/bots/sequelwp/",
            "status": "active",
            "operator": "SequelWP",
            "company": "SequelWP",
            "family": "SequelWP",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SequelWP",
                "SequelWP Uptime Monitoring Bot. https://sequelwp.com.au"
            ],
            "primary_user_agent": "SequelWP",
            "robots_token": "SequelWP",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our monitoring agent checks website uptime on a 5 minute interval. It only checks verified customers & when the x-sequelwp header is valid.",
            "short_description": "Our monitoring agent checks website uptime on a 5 minute interval.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SequelWP\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SequelWP\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sequelwp.com.au"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "serankingbacklinksbot",
            "name": "SERankingBacklinksBot",
            "slug": "serankingbacklinksbot",
            "url": "https://botcrawl.com/bots/serankingbacklinksbot/",
            "status": "active",
            "operator": "SE Ranking",
            "company": "SE Ranking",
            "family": "SE Ranking",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SERankingBacklinksBot",
                "Mozilla/5.0 (compatible",
                "SERankingBacklinksBot/1.0",
                "+https://seranking.com/backlinks-crawler)"
            ],
            "primary_user_agent": "SERankingBacklinksBot",
            "robots_token": "SERankingBacklinksBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawls the Internet to assist in getting information on the link structure of sites on the web to assist SEO specialists",
            "short_description": "Crawls the Internet to assist in getting information on the link structure of sites on the web to assist SEO specialists",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SERankingBacklinksBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SERankingBacklinksBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://seranking.com/backlinks-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "serpstatbot",
            "name": "SerpstatBot",
            "slug": "serpstatbot",
            "url": "https://botcrawl.com/bots/serpstatbot/",
            "status": "active",
            "operator": "Serpstat",
            "company": "Serpstat",
            "family": "Serpstat",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "serpstatbot",
                "serpstatbot/2.1 (advanced backlink tracking bot",
                "https://serpstatbot.com/",
                "abuse@serpstatbot.com)",
                "serpstatbot/2.0 beta (advanced backlink tracking bot",
                "http://serpstatbot.com/",
                "serpstatbot/1.0 (advanced backlink tracking bot",
                "curl/7.58.0",
                "serpstatbot/2.0 alpha (advanced backlink tracking bot"
            ],
            "primary_user_agent": "serpstatbot",
            "robots_token": "serpstatbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SerpstatBot is the Serpstat bot collects data for Serpstat's Backlink Analysis tool",
            "short_description": "SerpstatBot is the Serpstat bot collects data for Serpstat's Backlink Analysis tool",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: serpstatbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"serpstatbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://serpstatbot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "server-density",
            "name": "Server Density",
            "slug": "server-density",
            "url": "https://botcrawl.com/bots/server-density/",
            "status": "active",
            "operator": "Stackpath",
            "company": "Stackpath",
            "family": "Stackpath",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Server Density",
                "Server Density Service Monitoring v2",
                "Server Density Agent"
            ],
            "primary_user_agent": "Server Density",
            "robots_token": "Server Density",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Proactive infrastructure monitoring for cloud, servers, containers & websites.",
            "short_description": "Proactive infrastructure monitoring for cloud, servers, containers & websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Server Density\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Server Density\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.stackpath.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "serverhunterspider",
            "name": "ServerHunterSpider",
            "slug": "serverhunterspider",
            "url": "https://botcrawl.com/bots/serverhunterspider/",
            "status": "active",
            "operator": "Server Hunter",
            "company": "Server Hunter",
            "family": "Server Hunter",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ServerHunterSpider",
                "Mozilla/5.0 (compatible",
                "ServerHunterSpider/1.1",
                "+https://www.serverhunter.com/spider/)"
            ],
            "primary_user_agent": "ServerHunterSpider",
            "robots_token": "ServerHunterSpider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our spider indexes the price, specifications and stock of hosting plans. We fully respect robots.txt and we have more information on https://www.serverhunter.com/spider/.",
            "short_description": "Our spider indexes the price, specifications and stock of hosting plans.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ServerHunterSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ServerHunterSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.serverhunter.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "seznam",
            "name": "Seznam",
            "slug": "seznam",
            "url": "https://botcrawl.com/bots/seznam/",
            "status": "active",
            "operator": "Seznam",
            "company": "Seznam",
            "family": "Seznam",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SeznamBot",
                "Mozilla/5.0 (compatible",
                "SeznamBot/3.2",
                "+http://fulltext.sblog.cz/)"
            ],
            "primary_user_agent": "SeznamBot",
            "robots_token": "SeznamBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SeznamBot is the search engine crawler for Seznam search.",
            "short_description": "SeznamBot is the search engine crawler for Seznam search.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SeznamBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SeznamBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://napoveda.seznam.cz/en/seznambot-crawler/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "shapbot",
            "name": "ShapBot",
            "slug": "shapbot",
            "url": "https://botcrawl.com/bots/shapbot/",
            "status": "active",
            "operator": "Parallel",
            "company": "Parallel",
            "family": "Parallel",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ShapBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko)",
                "compatible",
                "ShapBot/0.1.0",
                "ShapBot/"
            ],
            "primary_user_agent": "ShapBot",
            "robots_token": "ShapBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ShapBot helps discover and index websites for Parallel's web APIs.",
            "short_description": "ShapBot helps discover and index websites for Parallel's web APIs.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ShapBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ShapBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.parallel.ai/features/crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "shopify-captain-hook",
            "name": "Shopify-Captain-Hook",
            "slug": "shopify-captain-hook",
            "url": "https://botcrawl.com/bots/shopify-captain-hook/",
            "status": "active",
            "operator": "Shopify",
            "company": "Shopify",
            "family": "Shopify",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Shopify-Captain-Hook"
            ],
            "primary_user_agent": "Shopify-Captain-Hook",
            "robots_token": "Shopify-Captain-Hook",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Shopify-Captain-Hook is a system used by Shopify to deliver webhooks. It sends automated messages to a user's server when specific events occur within their Shopify store.",
            "short_description": "Shopify-Captain-Hook is a system used by Shopify to deliver webhooks.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Shopify-Captain-Hook\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Shopify-Captain-Hook\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:47Z"
        },
        {
            "id": "shortwave-image-fetcher",
            "name": "Shortwave Image Fetcher",
            "slug": "shortwave-image-fetcher",
            "url": "https://botcrawl.com/bots/shortwave-image-fetcher/",
            "status": "active",
            "operator": "Shortwave Communications Inc.",
            "company": "Shortwave Communications Inc.",
            "family": "Shortwave Communications Inc.",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Shortwave Image Fetcher"
            ],
            "primary_user_agent": "Shortwave Image Fetcher",
            "robots_token": "Shortwave Image Fetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "An email client that proxies all images found in HTML emails from to protect end customer's IP address and connection private",
            "short_description": "An email client that proxies all images found in HTML emails from to protect end customer's IP address and connection private",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Shortwave Image Fetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Shortwave Image Fetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.shortwave.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "silktidebot",
            "name": "SilktideBot",
            "slug": "silktidebot",
            "url": "https://botcrawl.com/bots/silktidebot/",
            "status": "active",
            "operator": "Silktide",
            "company": "Silktide",
            "family": "Silktide",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Silktide",
                "Mozilla/5.0 (compatible",
                "SilktideBot/1.1",
                "+https://help.silktide.com/en/articles/4606146-silktidebot-our-site-scanning-bot)"
            ],
            "primary_user_agent": "Silktide",
            "robots_token": "Silktide",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website accessibility, SEO, and content quality scanner",
            "short_description": "Website accessibility, SEO, and content quality scanner",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Silktide\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Silktide\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.silktide.com/en/articles/4606146-silktidebot-our-site-scanning-bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sistrix-crawler",
            "name": "Sistrix Crawler",
            "slug": "sistrix-crawler",
            "url": "https://botcrawl.com/bots/sistrix-crawler/",
            "status": "active",
            "operator": "Sistrix",
            "company": "Sistrix",
            "family": "Sistrix",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SISTRIX Crawler",
                "Mozilla/5.0 (compatible",
                "SISTRIX Crawler/2.0",
                "+https://crawler.sistrix.net/)",
                "Sistrix Bot"
            ],
            "primary_user_agent": "SISTRIX Crawler",
            "robots_token": "SISTRIX Crawler",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sistrix SEO crawler that powers the Sistrix SEO toolbox and visibility index.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://crawler.sistrix.net/.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: SISTRIX Crawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SISTRIX Crawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://crawler.sistrix.net/"
                }
            ],
            "updated_at": "2026-04-01T00:55:36Z"
        },
        {
            "id": "site24x7",
            "name": "Site24x7",
            "slug": "site24x7",
            "url": "https://botcrawl.com/bots/site24x7/",
            "status": "active",
            "operator": "Site24x7",
            "company": "Site24x7",
            "family": "Site24x7",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "site24x7",
                "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/34.0.1847.137 Safari/537.36 (site24x7)",
                "site24x7.com",
                "Site24x7 monitor"
            ],
            "primary_user_agent": "site24x7",
            "robots_token": "site24x7",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Site24x7's global website monitoring probe.",
            "short_description": "Site24x7's global website monitoring probe.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: site24x7\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"site24x7\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.site24x7.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "siteauditbot",
            "name": "SiteAuditBot",
            "slug": "siteauditbot",
            "url": "https://botcrawl.com/bots/siteauditbot/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SiteAuditBot",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML",
                "like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible",
                "SiteAuditBot/0.97",
                "+http://www.semrush.com/bot.html)",
                "Mozilla/5.0 (compatible",
                "Semrush site audit crawler"
            ],
            "primary_user_agent": "SiteAuditBot",
            "robots_token": "SiteAuditBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Check for over 130 common website issues and get special reports about your site’s crawlability, use of markups, internal linking, speed/performance, HTTPS, and international SEO.",
            "short_description": "Check for over 130 common website issues and get special reports about your site’s crawlability, use of markups, internal linking, speed/performance, HTTPS, and…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: SiteAuditBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SiteAuditBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "siteimprove-crawl",
            "name": "Siteimprove Crawl",
            "slug": "siteimprove-crawl",
            "url": "https://botcrawl.com/bots/siteimprove-crawl/",
            "status": "active",
            "operator": "Siteimprove",
            "company": "Siteimprove",
            "family": "Siteimprove",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SiteCheck-sitecrawl",
                "Mozilla/5.0 (compatible",
                "MSIE 10.0",
                "Windows NT 6.1",
                "Trident/6.0) SiteCheck-sitecrawl"
            ],
            "primary_user_agent": "SiteCheck-sitecrawl",
            "robots_token": "SiteCheck-sitecrawl",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Siteimprove content suite (i.e. Quality Assurance, Accessibility, Policy, and SEO). Crawls run on ports are 80 for HTTP and 443 for HTTPS.",
            "short_description": "Siteimprove content suite (i.e.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SiteCheck-sitecrawl\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SiteCheck-sitecrawl\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.siteimprove.com/hc/en-gb/articles/206345523-What-IP-addresses-and-user-agents-are-used-by-Siteimprove-"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sitelock",
            "name": "SiteLock",
            "slug": "sitelock",
            "url": "https://botcrawl.com/bots/sitelock/",
            "status": "active",
            "operator": "SiteLock",
            "company": "SiteLock",
            "family": "SiteLock",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SiteLockSpider"
            ],
            "primary_user_agent": "SiteLockSpider",
            "robots_token": "SiteLockSpider",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The SiteLock Spider is a web scanning service scans websites for malware and malicious code.",
            "short_description": "The SiteLock Spider is a web scanning service scans websites for malware and malicious code.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SiteLockSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SiteLockSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://sitelock.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sitesearch360",
            "name": "SiteSearch360",
            "slug": "sitesearch360",
            "url": "https://botcrawl.com/bots/sitesearch360/",
            "status": "active",
            "operator": "SEMKNOX GmbH",
            "company": "SEMKNOX GmbH",
            "family": "SEMKNOX GmbH",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SiteSearch360",
                "Mozilla/5.0 (compatible",
                "SiteSearch360/1.0",
                "+https://sitesearch360.com/)"
            ],
            "primary_user_agent": "SiteSearch360",
            "robots_token": "SiteSearch360",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Site Search 360 is a popular Google Site Search replacement. Our crawler indexes content on our customers' sites for search.",
            "short_description": "Site Search 360 is a popular Google Site Search replacement.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SiteSearch360\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SiteSearch360\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sitesearch360.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "siteuptimebot",
            "name": "SiteUpTimeBot",
            "slug": "siteuptimebot",
            "url": "https://botcrawl.com/bots/siteuptimebot/",
            "status": "active",
            "operator": "SiteUpTime",
            "company": "SiteUpTime",
            "family": "SiteUpTime",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SiteUptime",
                "SiteUptime.com"
            ],
            "primary_user_agent": "SiteUptime",
            "robots_token": "SiteUptime",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "SiteUpTimeBot is the HTTP monitoring probe for SiteUpTime.com.",
            "short_description": "SiteUpTimeBot is the HTTP monitoring probe for SiteUpTime.com.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SiteUptime\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SiteUptime\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://SiteUptime.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "skroutz-imagebot",
            "name": "Skroutz ImageBot",
            "slug": "skroutz-imagebot",
            "url": "https://botcrawl.com/bots/skroutz-imagebot/",
            "status": "active",
            "operator": "Skroutz S.A.",
            "company": "Skroutz S.A.",
            "family": "Skroutz S.A.",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Skroutz ImageBot",
                "Skroutz ImageBot v1"
            ],
            "primary_user_agent": "Skroutz ImageBot",
            "robots_token": "Skroutz ImageBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Skroutz ImageBot to fetch the individual product images.",
            "short_description": "Skroutz ImageBot to fetch the individual product images.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Skroutz ImageBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Skroutz ImageBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.skroutz.gr/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "skroutzbot",
            "name": "SkroutzBot",
            "slug": "skroutzbot",
            "url": "https://botcrawl.com/bots/skroutzbot/",
            "status": "active",
            "operator": "Skroutz S.A.",
            "company": "Skroutz S.A.",
            "family": "Skroutz S.A.",
            "category": "search",
            "kind": "fetcher",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SkroutzBot",
                "SkroutzBot v1.0"
            ],
            "primary_user_agent": "SkroutzBot",
            "robots_token": "SkroutzBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Skroutz uses SkroutzBot web crawler to download XML feeds.",
            "short_description": "Skroutz uses SkroutzBot web crawler to download XML feeds.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SkroutzBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SkroutzBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.skroutz.gr/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "skype",
            "name": "Skype",
            "slug": "skype",
            "url": "https://botcrawl.com/bots/skype/",
            "status": "active",
            "operator": "Skype",
            "company": "Skype",
            "family": "Skype",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SkypeUriPreview",
                "Mozilla/5.0 (Windows NT 6.1",
                "WOW64) SkypeUriPreview Preview/0.5",
                "Skype preview bot"
            ],
            "primary_user_agent": "SkypeUriPreview",
            "robots_token": "SkypeUriPreview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Skype's URI Preview services fetches a page preview when someone posts a URL in a Skype message.",
            "short_description": "Skype's URI Preview services fetches a page preview when someone posts a URL in a Skype message.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SkypeUriPreview\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SkypeUriPreview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.skype.com/en/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "slack-image-proxy",
            "name": "Slack Image Proxy",
            "slug": "slack-image-proxy",
            "url": "https://botcrawl.com/bots/slack-image-proxy/",
            "status": "active",
            "operator": "Slack",
            "company": "Slack",
            "family": "Slack",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Slack-ImgProxy",
                "Slack-ImgProxy (+https://api.slack.com/robots)",
                "Slack image proxy"
            ],
            "primary_user_agent": "Slack-ImgProxy",
            "robots_token": "Slack-ImgProxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "This robot is used to fetch and cache images posted into Slack channels.",
            "short_description": "This robot is used to fetch and cache images posted into Slack channels.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Slack-ImgProxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Slack-ImgProxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://api.slack.com/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "slackbot",
            "name": "Slackbot",
            "slug": "slackbot",
            "url": "https://botcrawl.com/bots/slackbot/",
            "status": "active",
            "operator": "Slack",
            "company": "Slack",
            "family": "Slack",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Slackbot",
                "Slackbot 1.0 (+https://api.slack.com/robots)",
                "Slack service bot",
                "Slackbot 1.0"
            ],
            "primary_user_agent": "Slackbot",
            "robots_token": "Slackbot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Slack bot for integrations, webhooks, and Slack service fetches.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Slackbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Slackbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://api.slack.com/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "slackbot-linkexpanding",
            "name": "Slackbot-LinkExpanding",
            "slug": "slackbot-linkexpanding",
            "url": "https://botcrawl.com/bots/slackbot-linkexpanding/",
            "status": "active",
            "operator": "Slack",
            "company": "Slack",
            "family": "Slack",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "Slackbot-LinkExpanding",
                "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)",
                "Slack link preview bot"
            ],
            "primary_user_agent": "Slackbot-LinkExpanding",
            "robots_token": "Slackbot-LinkExpanding",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Slack crawler for link unfurling and preview metadata.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Slackbot-LinkExpanding\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Slackbot-LinkExpanding\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://api.slack.com/robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "slickstream",
            "name": "Slickstream",
            "slug": "slickstream",
            "url": "https://botcrawl.com/bots/slickstream/",
            "status": "active",
            "operator": "Slickstream",
            "company": "Slickstream",
            "family": "Slickstream",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SlickBot",
                "Mozilla/5.0 (compatible",
                "SlickBot/1.0",
                "+http://slickstream.com)"
            ],
            "primary_user_agent": "SlickBot",
            "robots_token": "SlickBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Slickstream is a SaaS that indexes our customer's websites (with their approval) in order to provide engagement features for their site visitors, including site search, content recommendations, etc.",
            "short_description": "Slickstream is a SaaS that indexes our customer's websites (with their approval) in order to provide engagement features for their site visitors, including site search,…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SlickBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SlickBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.slickstream.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "slopsearch",
            "name": "slopsearch",
            "slug": "slopsearch",
            "url": "https://botcrawl.com/bots/slopsearch/",
            "status": "active",
            "operator": "Christopher Albert",
            "company": "Christopher Albert",
            "family": "Christopher",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "slopsearch",
                "slopsearch/1.0 (+https://search.sloppy.at/about)"
            ],
            "primary_user_agent": "slopsearch",
            "robots_token": "slopsearch",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Research web crawler that indexes publicly available documents. Respects robots.txt and rate limits.",
            "short_description": "Research web crawler that indexes publicly available documents. Respects robots.txt and rate limits.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: slopsearch\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"slopsearch\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://sloppy.at"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "slurp",
            "name": "Slurp",
            "slug": "slurp",
            "url": "https://botcrawl.com/bots/slurp/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Slurp",
                "Mozilla/5.0 (compatible",
                "Yahoo! Slurp",
                "http://help.yahoo.com/help/us/ysearch/slurp)",
                "Yahoo Slurp",
                "Yahoo Bot"
            ],
            "primary_user_agent": "Slurp",
            "robots_token": "Slurp",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yahoo Search web crawler (powered by Bing) for indexing web content.",
            "verification_method": "Treat this entry as verified only when the exact user-agent matches the operator documentation at https://help.yahoo.com/help/us/ysearch/slurp.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Slurp\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Slurp\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.yahoo.com/help/us/ysearch/slurp"
                }
            ],
            "updated_at": "2026-04-01T00:55:30Z"
        },
        {
            "id": "smarshbot",
            "name": "SmarshBot",
            "slug": "smarshbot",
            "url": "https://botcrawl.com/bots/smarshbot/",
            "status": "active",
            "operator": "https://central.smarsh.com/s/",
            "company": "https://central.smarsh.com/s/",
            "family": "https://central.smarsh.com/s/",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SmarshBot",
                "SmarshBot/1.0"
            ],
            "primary_user_agent": "SmarshBot",
            "robots_token": "SmarshBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Providing archiving solutions to clients for compliance purposes",
            "short_description": "Providing archiving solutions to clients for compliance purposes",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SmarshBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SmarshBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.smarsh.com/platform/compliance-management/web-archive"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "smartologybot",
            "name": "SmartologyBot",
            "slug": "smartologybot",
            "url": "https://botcrawl.com/bots/smartologybot/",
            "status": "active",
            "operator": "Smartology",
            "company": "Smartology",
            "family": "Smartology",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SmartologyBot",
                "SmartologyBot/1.0 (+http://www.smartology.net/smartologybot)"
            ],
            "primary_user_agent": "SmartologyBot",
            "robots_token": "SmartologyBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Smartology generates semantic vectors from domain pages in order to serve semantically-relevant ads on those pages",
            "short_description": "The Smartology generates semantic vectors from domain pages in order to serve semantically-relevant ads on those pages",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SmartologyBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SmartologyBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://smartology.net/smartologybot/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "smtnet-pm-bot",
            "name": "SMTnet PM Bot",
            "slug": "smtnet-pm-bot",
            "url": "https://botcrawl.com/bots/smtnet-pm-bot/",
            "status": "active",
            "operator": "SMTnet",
            "company": "SMTnet",
            "family": "SMTnet",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "SMTnetPMBot",
                "SMTnetPMBot/Nutch-1.16",
                "SMTnetPMBot/"
            ],
            "primary_user_agent": "SMTnetPMBot",
            "robots_token": "SMTnetPMBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawls partner company's websites to include them in our on-site search engine.",
            "short_description": "Crawls partner company's websites to include them in our on-site search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SMTnetPMBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SMTnetPMBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://smtnet.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "snipcart",
            "name": "Snipcart",
            "slug": "snipcart",
            "url": "https://botcrawl.com/bots/snipcart/",
            "status": "active",
            "operator": "Snipcart",
            "company": "Snipcart",
            "family": "Snipcart",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Snipcart/1.0"
            ],
            "primary_user_agent": "Snipcart/1.0",
            "robots_token": "Snipcart/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Snipcart is an e-commerce solution for developers.",
            "short_description": "Snipcart is an e-commerce solution for developers.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Snipcart/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Snipcart/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://snipcart.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sogou-blog",
            "name": "Sogou blog",
            "slug": "sogou-blog",
            "url": "https://botcrawl.com/bots/sogou-blog/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou blog"
            ],
            "primary_user_agent": "Sogou blog",
            "robots_token": "Sogou blog",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou blog crawler.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou blog\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou blog\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:47Z"
        },
        {
            "id": "sogou-inst-spider",
            "name": "Sogou inst spider",
            "slug": "sogou-inst-spider",
            "url": "https://botcrawl.com/bots/sogou-inst-spider/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou inst spider"
            ],
            "primary_user_agent": "Sogou inst spider",
            "robots_token": "Sogou inst spider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou indexed-content spider.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou inst spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou inst spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:47Z"
        },
        {
            "id": "sogou-news-spider",
            "name": "Sogou News Spider",
            "slug": "sogou-news-spider",
            "url": "https://botcrawl.com/bots/sogou-news-spider/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou News Spider"
            ],
            "primary_user_agent": "Sogou News Spider",
            "robots_token": "Sogou News Spider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou news crawler.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou News Spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou News Spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:47Z"
        },
        {
            "id": "sogou-orion-spider",
            "name": "Sogou Orion spider",
            "slug": "sogou-orion-spider",
            "url": "https://botcrawl.com/bots/sogou-orion-spider/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou Orion spider"
            ],
            "primary_user_agent": "Sogou Orion spider",
            "robots_token": "Sogou Orion spider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou orion crawler.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou Orion spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou Orion spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:48Z"
        },
        {
            "id": "sogou-spider2",
            "name": "Sogou spider2",
            "slug": "sogou-spider2",
            "url": "https://botcrawl.com/bots/sogou-spider2/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou spider2"
            ],
            "primary_user_agent": "Sogou spider2",
            "robots_token": "Sogou spider2",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou secondary crawler token.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou spider2\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou spider2\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:47Z"
        },
        {
            "id": "sogou-web-spider",
            "name": "Sogou web spider",
            "slug": "sogou-web-spider",
            "url": "https://botcrawl.com/bots/sogou-web-spider/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Sogou web spider",
                "Mozilla/5.0 (compatible",
                "Sogou web spider/4.0",
                "+http://www.sogou.com/docs/help/webmasters.htm)",
                "Sogou Spider",
                "Sogou Bot"
            ],
            "primary_user_agent": "Sogou web spider",
            "robots_token": "Sogou web spider",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou Chinese search engine web crawler for indexing content.",
            "verification_method": "Verify the exact user-agent and confirm ownership against Sogou's published crawler documentation where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogou web spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogou web spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sogou.com/docs/help/webmasters.htm"
                }
            ],
            "updated_at": "2026-04-01T00:55:29Z"
        },
        {
            "id": "sogoubot",
            "name": "Sogoubot",
            "slug": "sogoubot",
            "url": "https://botcrawl.com/bots/sogoubot/",
            "status": "active",
            "operator": "Sogou",
            "company": "Sogou",
            "family": "Sogou",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Sogoubot",
                "Mozilla/5.0 (compatible",
                "+http://www.sogou.com/docs/help/webmasters.htm)",
                "Sogou Mobile Bot"
            ],
            "primary_user_agent": "Sogoubot",
            "robots_token": "Sogoubot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Sogou mobile web crawler for indexing mobile web content.",
            "verification_method": "Verify the exact user-agent and confirm ownership against Sogou's published crawler documentation where available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sogoubot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sogoubot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sogou.com/docs/help/webmasters.htm"
                }
            ],
            "updated_at": "2026-04-01T00:55:29Z"
        },
        {
            "id": "solarwinds-observability",
            "name": "SolarWinds Observability",
            "slug": "solarwinds-observability",
            "url": "https://botcrawl.com/bots/solarwinds-observability/",
            "status": "active",
            "operator": "SolarWinds",
            "company": "SolarWinds",
            "family": "SolarWinds",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "solarwinds",
                "solarwinds/1.0 ( www.solarwinds.com/solarwinds-observability)"
            ],
            "primary_user_agent": "solarwinds",
            "robots_token": "solarwinds",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Default header that identifies SolarWinds Observability robots that are used for our synthetic monitoring.",
            "short_description": "Default header that identifies SolarWinds Observability robots that are used for our synthetic monitoring.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: solarwinds\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"solarwinds\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://documentation.solarwinds.com/en/success_center/observability/content/get-started/dem_getting_started_guide.htm"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sora-caisse-pos",
            "name": "Sora Caisse POS",
            "slug": "sora-caisse-pos",
            "url": "https://botcrawl.com/bots/sora-caisse-pos/",
            "status": "active",
            "operator": "Sora Caisse",
            "company": "Sora Caisse",
            "family": "Sora Caisse POS",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Sora POS",
                "Sora POS/1.0 (Sora Websoft)"
            ],
            "primary_user_agent": "Sora POS",
            "robots_token": "Sora POS",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "POS software connected to prestashop ecommerce website",
            "short_description": "POS software connected to prestashop ecommerce website",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Sora POS\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sora POS\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sora-caisse.com/sora.pdf"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sosospider",
            "name": "Sosospider",
            "slug": "sosospider",
            "url": "https://botcrawl.com/bots/sosospider/",
            "status": "active",
            "operator": "Soso",
            "company": "Soso",
            "family": "Soso",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Sosospider"
            ],
            "primary_user_agent": "Sosospider",
            "robots_token": "Sosospider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Soso crawler token surfaced in sogou robots.txt.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Sosospider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sosospider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:48Z"
        },
        {
            "id": "spark-shipping",
            "name": "Spark Shipping",
            "slug": "spark-shipping",
            "url": "https://botcrawl.com/bots/spark-shipping/",
            "status": "active",
            "operator": "Spark Shipping",
            "company": "Spark Shipping",
            "family": "Spark Shipping",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "SparkShipping"
            ],
            "primary_user_agent": "SparkShipping",
            "robots_token": "SparkShipping",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Spark Shipping is eCommerce automation software for retailers running WooCommerce",
            "short_description": "Spark Shipping is eCommerce automation software for retailers running WooCommerce",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SparkShipping\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SparkShipping\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sparkshipping.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "sparkpostbot",
            "name": "SparkpostBot",
            "slug": "sparkpostbot",
            "url": "https://botcrawl.com/bots/sparkpostbot/",
            "status": "active",
            "operator": "Bird",
            "company": "Bird",
            "family": "Bird",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "SparkPost"
            ],
            "primary_user_agent": "SparkPost",
            "robots_token": "SparkPost",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Sparkbot webhook integration is used for automating email transactions on web server events.",
            "short_description": "Sparkbot webhook integration is used for automating email transactions on web server events.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SparkPost\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SparkPost\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.sparkpost.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:48Z"
        },
        {
            "id": "spectate",
            "name": "Spectate",
            "slug": "spectate",
            "url": "https://botcrawl.com/bots/spectate/",
            "status": "active",
            "operator": "Spectate",
            "company": "Spectate",
            "family": "Spectate",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Spectate",
                "Spectate/1.0 (+https://docs.spectate.net/faq/uptime-monitor-bot)"
            ],
            "primary_user_agent": "Spectate",
            "robots_token": "Spectate",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime monitoring bot for healthchecks",
            "short_description": "Uptime monitoring bot for healthchecks",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Spectate\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Spectate\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.spectate.net/faq/uptime-monitor-bot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "speedy-spider",
            "name": "Speedy Spider",
            "slug": "speedy-spider",
            "url": "https://botcrawl.com/bots/speedy-spider/",
            "status": "active",
            "operator": "Entireweb",
            "company": "Entireweb",
            "family": "Entireweb",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Speedy Spider",
                "Mozilla/5.0 (compatible",
                "+http://www.entireweb.com/about/search_tech/speedy_spider/)",
                "Entireweb Spider"
            ],
            "primary_user_agent": "Speedy Spider",
            "robots_token": "Speedy Spider",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Entireweb Speedy Spider web crawler for indexing web content for Entireweb search.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.entireweb.com/about/search_tech/speedy_spider/.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Speedy Spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Speedy Spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.entireweb.com/about/search_tech/speedy_spider/"
                }
            ],
            "updated_at": "2026-04-01T00:55:29Z"
        },
        {
            "id": "spider",
            "name": "Spider",
            "slug": "spider",
            "url": "https://botcrawl.com/bots/spider/",
            "status": "active",
            "operator": "Spider",
            "company": "Spider",
            "family": "Spider",
            "category": "ai",
            "kind": "unknown",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "Spider",
                "Mozilla/5.0 (compatible",
                "Spider/1.0",
                "+https://spider.cloud)",
                "Spider Bot"
            ],
            "primary_user_agent": "Spider",
            "robots_token": "Spider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "AI-focused web crawler designed for AI agents, LLMs, RAG systems, and data analysis.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://spider.cloud.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Spider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Spider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://spider.cloud"
                }
            ],
            "updated_at": "2026-04-01T00:55:18Z"
        },
        {
            "id": "spinn3r",
            "name": "Spinn3r",
            "slug": "spinn3r",
            "url": "https://botcrawl.com/bots/spinn3r/",
            "status": "active",
            "operator": "Spinn3r",
            "company": "Spinn3r",
            "family": "Spinn3r",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Spinn3r",
                "Mozilla/5.0 (compatible",
                "Spinn3r/3.1",
                "+http://spinn3r.com/robot)",
                "Spinn3r Bot"
            ],
            "primary_user_agent": "Spinn3r",
            "robots_token": "Spinn3r",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Spinn3r content aggregation crawler that indexes blog posts and news articles.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://spinn3r.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Spinn3r\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Spinn3r\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://spinn3r.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:37Z"
        },
        {
            "id": "splitsignalbot",
            "name": "SplitSignalBot",
            "slug": "splitsignalbot",
            "url": "https://botcrawl.com/bots/splitsignalbot/",
            "status": "active",
            "operator": "Semrush",
            "company": "Semrush",
            "family": "Semrush",
            "category": "scraper",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "SplitSignalBot",
                "SplitSignal test crawler"
            ],
            "primary_user_agent": "SplitSignalBot",
            "robots_token": "SplitSignalBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Crawler for SplitSignal SEO testing workflows.",
            "verification_method": "Identify the documented Semrush token and compare behavior against Semrush's official bot documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "ip_ranges_url": "https://www.semrush.com/bot/",
            "rules": {
                "robots": "User-agent: SplitSignalBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SplitSignalBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.semrush.com/bot/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "splunk",
            "name": "Splunk",
            "slug": "splunk",
            "url": "https://botcrawl.com/bots/splunk/",
            "status": "active",
            "operator": "Splunk",
            "company": "Splunk",
            "family": "Splunk",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Rigor",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "Rigor) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/86.0.4240.75 Safari/537.36"
            ],
            "primary_user_agent": "Rigor",
            "robots_token": "Rigor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Synthetics monitoring platform used by Enterprise organizations.",
            "short_description": "Synthetics monitoring platform used by Enterprise organizations.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Rigor\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Rigor\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.splunk.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "splunk-attack-analyzer",
            "name": "Splunk Attack Analyzer",
            "slug": "splunk-attack-analyzer",
            "url": "https://botcrawl.com/bots/splunk-attack-analyzer/",
            "status": "active",
            "operator": "Splunk",
            "company": "Splunk",
            "family": "Splunk",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "TwinWaveScanner"
            ],
            "primary_user_agent": "TwinWaveScanner",
            "robots_token": "TwinWaveScanner",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Splunk Attack Analyzer (formerly known as TwinWave), visits URLs submitted by customers using a headless Chrome browser. DOM (Document Object Model), HAR (HTTP Archive), and other relevant data from these visits are analyzed to determine if the page is hosting malicious content.",
            "short_description": "Splunk Attack Analyzer (formerly known as TwinWave), visits URLs submitted by customers using a headless Chrome browser.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TwinWaveScanner\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TwinWaveScanner\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.splunk.com/en_us/products/attack-analyzer.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "splunk-synthetics",
            "name": "Splunk Synthetics",
            "slug": "splunk-synthetics",
            "url": "https://botcrawl.com/bots/splunk-synthetics/",
            "status": "active",
            "operator": "Splunk",
            "company": "Splunk",
            "family": "Splunk",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Splunk Synthetics",
                "Mozilla/5.0 (X11",
                "Linux x86_64",
                "Splunk Synthetics) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/127.0.6533.88 Safari/537.36"
            ],
            "primary_user_agent": "Splunk Synthetics",
            "robots_token": "Splunk Synthetics",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Synthetic monitoring tool with global agents running in AWS",
            "short_description": "Synthetic monitoring tool with global agents running in AWS",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Splunk Synthetics\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Splunk Synthetics\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.splunk.com/observability/en/synthetics/intro-synthetics.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "spyglasses-bot",
            "name": "Spyglasses Bot",
            "slug": "spyglasses-bot",
            "url": "https://botcrawl.com/bots/spyglasses-bot/",
            "status": "active",
            "operator": "Spyglasses",
            "company": "Spyglasses",
            "family": "Spyglasses",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "SpyglassesBot",
                "Mozilla/5.0 (compatible",
                "SpyglassesBot/1.0",
                "+https://spyglasses.io/docs/help/spyglasses-bot) Brand Consistency Checker",
                "+https://spyglasses.io/docs/help/spyglasses-bot) AI Readiness Audit",
                "+https://spyglasses.io/docs/help/spyglasses-bot) FAQ Generation",
                "+https://spyglasses.io/docs/help/spyglasses-bot) AI Visibility Report",
                "+https://spyglasses.io/docs/help/spyglasses-bot) Page Title Fetcher",
                "+https://spyglasses.io/docs/help/spyglasses-bot) Citation Resolver",
                "+https://spyglasses.io/docs/help/spyglasses-bot) AI Access Checker"
            ],
            "primary_user_agent": "SpyglassesBot",
            "robots_token": "SpyglassesBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Spyglasses Bot evaluates website content to identify issues that would affect visibility by AI assistants.",
            "short_description": "Spyglasses Bot evaluates website content to identify issues that would affect visibility by AI assistants.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: SpyglassesBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SpyglassesBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.spyglasses.io/docs/help/spyglasses-bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "stably",
            "name": "Stably",
            "slug": "stably",
            "url": "https://botcrawl.com/bots/stably/",
            "status": "active",
            "operator": "Stably",
            "company": "Stably",
            "family": "Stably",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "stably"
            ],
            "primary_user_agent": "stably",
            "robots_token": "stably",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Runs functionality tests and user-flow regression checks on websites.",
            "short_description": "QA testing bot for end-to-end website checks.",
            "verification_method": "Verified on bots.fyi. Validate identifiers before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: stably\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"stably\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/stably"
                },
                {
                    "type": "operator",
                    "url": "https://docs.stably.ai/"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "stape-scanner",
            "name": "Stape Scanner",
            "slug": "stape-scanner",
            "url": "https://botcrawl.com/bots/stape-scanner/",
            "status": "active",
            "operator": "Stape Inc",
            "company": "Stape Inc",
            "family": "Stape Inc",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Stape",
                "Stape/1.0.0"
            ],
            "primary_user_agent": "Stape",
            "robots_token": "Stape",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Stape Scanner monitoring configuration of tracking tags.",
            "short_description": "Stape Scanner monitoring configuration of tracking tags.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Stape\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Stape\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://stape.io/helpdesk/documentation/stape-scanner"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "statabot",
            "name": "Statabot",
            "slug": "statabot",
            "url": "https://botcrawl.com/bots/statabot/",
            "status": "active",
            "operator": "StataCorp LLC",
            "company": "StataCorp LLC",
            "family": "StataCorp LLC",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Statabot",
                "Statabot/1.0 https://www.stata.com/support/statabot"
            ],
            "primary_user_agent": "Statabot",
            "robots_token": "Statabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Statabot searches for stata.toc files and indexes their contents",
            "short_description": "Statabot searches for stata.toc files and indexes their contents",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Statabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Statabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.stata.com/support/statabot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "statistikaustria",
            "name": "StatistikAustria",
            "slug": "statistikaustria",
            "url": "https://botcrawl.com/bots/statistikaustria/",
            "status": "active",
            "operator": "Statistik Bot",
            "company": "Statistik Bot",
            "family": "Statistik Bot",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "StatistikAustria",
                "Mozilla/5.0 (compatible",
                "StatistikAustria/1.0",
                "+http://www.statistik.gv.at)"
            ],
            "primary_user_agent": "StatistikAustria",
            "robots_token": "StatistikAustria",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Bot to collect product prices for the official consumer price index of Austria",
            "short_description": "Bot to collect product prices for the official consumer price index of Austria",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: StatistikAustria\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"StatistikAustria\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.statistik.at/ueber-uns/innovationen-und-experimentelle-statistik/einsatz-von-kassenscannerdaten-und-webscraping-in-der-preisstatistik"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "statsdronebot",
            "name": "StatsDroneBot",
            "slug": "statsdronebot",
            "url": "https://botcrawl.com/bots/statsdronebot/",
            "status": "active",
            "operator": "StatsDrone",
            "company": "StatsDrone",
            "family": "StatsDrone",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "StatsDroneBot",
                "StatsDroneBot (https://statsdrone.com/statsdrone-bot-documentation/) Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/131.0.0.0 Safari/537.36"
            ],
            "primary_user_agent": "StatsDroneBot",
            "robots_token": "StatsDroneBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The StatsDrone affiliate marketing statistics scraping and aggregating tool",
            "short_description": "The StatsDrone affiliate marketing statistics scraping and aggregating tool",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: StatsDroneBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"StatsDroneBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://statsdrone.com/statsdrone-bot-documentation/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "statuscake",
            "name": "StatusCake",
            "slug": "statuscake",
            "url": "https://botcrawl.com/bots/statuscake/",
            "status": "active",
            "operator": "StatusCake",
            "company": "StatusCake",
            "family": "StatusCake",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "statuscake",
                "statuscake uptime robot",
                "StatusCake uptime robot"
            ],
            "primary_user_agent": "statuscake",
            "robots_token": "statuscake",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The StatusCake uptime monitor is used to monitor webpage availability and performance.",
            "short_description": "The StatusCake uptime monitor is used to monitor webpage availability and performance.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: statuscake\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"statuscake\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.statuscake.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "steam-chat",
            "name": "Steam Chat",
            "slug": "steam-chat",
            "url": "https://botcrawl.com/bots/steam-chat/",
            "status": "active",
            "operator": "Valve Software",
            "company": "Valve Software",
            "family": "Valve Software",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "SteamChat",
                "Valve/Steam HTTP Client 1.0 (SteamChatURLLookup)",
                "SteamChatImageProxy"
            ],
            "primary_user_agent": "SteamChat",
            "robots_token": "SteamChat",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Steam Chat bot fetches previews of URLs shared within the Steam client's chat feature.",
            "short_description": "The Steam Chat bot fetches previews of URLs shared within the Steam client's chat feature.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SteamChat\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SteamChat\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://store.steampowered.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "stripe",
            "name": "Stripe",
            "slug": "stripe",
            "url": "https://botcrawl.com/bots/stripe/",
            "status": "active",
            "operator": "Stripe",
            "company": "Stripe",
            "family": "Stripe",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Stripe/1.0",
                "Stripe/1.0 (+https://stripe.com/docs/webhooks)"
            ],
            "primary_user_agent": "Stripe/1.0",
            "robots_token": "Stripe/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Stripe Webhooks service allows Stripe to push real-time event data to customers' application webhook endpoint when events happen in their Stripe account.",
            "short_description": "The Stripe Webhooks service allows Stripe to push real-time event data to customers' application webhook endpoint when events happen in their Stripe account.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Stripe/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Stripe/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://stripe.com/docs/webhooks"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "stripebot",
            "name": "Stripebot",
            "slug": "stripebot",
            "url": "https://botcrawl.com/bots/stripebot/",
            "status": "active",
            "operator": "Stripe",
            "company": "Stripe",
            "family": "Stripe",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Stripebot",
                "Mozilla/5.0 (X11",
                "Linux {version}) AppleWebKit/{version} (KHTML",
                "like Gecko) Chrome/{version} Safari/{version} (Stripebot/{version}",
                "+https://docs.stripe.com/stripebot-crawler)"
            ],
            "primary_user_agent": "Stripebot",
            "robots_token": "Stripebot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Stripebot is the Stripe automated web crawler that collects data from their users' websites. They use the collected data to provide services to their users and to comply with financial regulations.",
            "short_description": "Stripebot is the Stripe automated web crawler that collects data from their users' websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Stripebot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Stripebot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.stripe.com/stripebot-crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "sucuri",
            "name": "Sucuri",
            "slug": "sucuri",
            "url": "https://botcrawl.com/bots/sucuri/",
            "status": "active",
            "operator": "Sucuri",
            "company": "Sucuri",
            "family": "Sucuri",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Sucuri",
                "Mozilla/5.0 (Windows",
                "U",
                "Windows NT 6.1",
                "en-US",
                "rv:1.9.2) Gecko/20100115 Firefox/3.6 MSIE 7.0",
                "Sucuri Integrity Monitor/2.4"
            ],
            "primary_user_agent": "Sucuri",
            "robots_token": "Sucuri",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Sucuri bot is part of the Sucuri website security platform. It crawls websites to scan for malware, security risks, and blacklisting status.",
            "short_description": "The Sucuri bot is part of the Sucuri website security platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Sucuri\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Sucuri\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://blog.sucuri.net/2012/10/ask-sucuri-how-does-sitecheck-work.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "svix-webhooks",
            "name": "Svix Webhooks",
            "slug": "svix-webhooks",
            "url": "https://botcrawl.com/bots/svix-webhooks/",
            "status": "active",
            "operator": "Svix Inc.",
            "company": "Svix Inc.",
            "family": "Svix Inc.",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "sender-9YMgn",
                "Svix-Webhooks/1.65.0 (sender-9YMgn",
                "+https://www.svix.com/http-sender/)",
                "Webhooks/1.65.0 (sender-9YMgn)"
            ],
            "primary_user_agent": "sender-9YMgn",
            "robots_token": "sender-9YMgn",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Scalable webhook platform featuring automatic retries, signature verification, deep observability, and a static-IP delivery bot—deploy hosted or self-hosted.",
            "short_description": "Scalable webhook platform featuring automatic retries, signature verification, deep observability, and a static-IP delivery bot—deploy hosted or self-hosted.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: sender-9YMgn\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"sender-9YMgn\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.svix.com/receiving/source-ips"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "swiftbot",
            "name": "Swiftbot",
            "slug": "swiftbot",
            "url": "https://botcrawl.com/bots/swiftbot/",
            "status": "active",
            "operator": "Swiftype",
            "company": "Swiftype",
            "family": "Swiftype",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Swiftbot",
                "Mozilla/5.0 (compatible",
                "Swiftbot/1.0",
                "+https://swiftype.com/swiftbot)",
                "Swiftype Bot"
            ],
            "primary_user_agent": "Swiftbot",
            "robots_token": "Swiftbot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Swiftype enterprise search crawler that indexes website content for Swiftype search.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://swiftype.com/swiftbot.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Swiftbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Swiftbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://swiftype.com/swiftbot"
                }
            ],
            "updated_at": "2026-04-01T00:55:37Z"
        },
        {
            "id": "swifteq-link-checker",
            "name": "Swifteq Link Checker",
            "slug": "swifteq-link-checker",
            "url": "https://botcrawl.com/bots/swifteq-link-checker/",
            "status": "active",
            "operator": "Swfiteq Ltd",
            "company": "Swfiteq Ltd",
            "family": "Swfiteq Ltd",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "SwifteqLinkChecker"
            ],
            "primary_user_agent": "SwifteqLinkChecker",
            "robots_token": "SwifteqLinkChecker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Help Center Export is a Zendesk-approved app that integrates with any Zendesk help center and helps the customers with these tasks: Export all your articles and any meta-data: title, section, link, labels, updated time. Export all references to internal and external docs. Detect and export broken links and images for each article. In order to check for broken links the app is using a bot that attempts to access each link present in help center articles and check the response for errors.",
            "short_description": "Help Center Export is a Zendesk-approved app that integrates with any Zendesk help center and helps the customers with these tasks: Export all your articles and any…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: SwifteqLinkChecker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"SwifteqLinkChecker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.swifteq.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "synthetic-bot",
            "name": "Synthetic Bot",
            "slug": "synthetic-bot",
            "url": "https://botcrawl.com/bots/synthetic-bot/",
            "status": "active",
            "operator": "DataDog",
            "company": "DataDog",
            "family": "DataDog",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Datadog",
                "Datadog Synthetic"
            ],
            "primary_user_agent": "Datadog",
            "robots_token": "Datadog",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Datadog Synthetics gives you a new layer of visibility on the Datadog platform. By monitoring your applications and API endpoints via simulated user requests and browser rendering.",
            "short_description": "Datadog Synthetics gives you a new layer of visibility on the Datadog platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Datadog\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.datadoghq.com/synthetics/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "datadog-synthetic",
            "name": "Synthetic Bot",
            "slug": "datadog-synthetic",
            "url": "https://botcrawl.com/bots/datadog-synthetic/",
            "status": "active",
            "operator": "DataDog",
            "company": "DataDog",
            "family": "DataDog",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Datadog",
                "Datadog Synthetic"
            ],
            "primary_user_agent": "Datadog",
            "robots_token": "Datadog",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "unknown",
            "common_use": "Datadog Synthetics gives you a new layer of visibility on the Datadog platform. By monitoring your applications and API endpoints via simulated user requests and browser rendering.",
            "short_description": "Datadog Synthetics gives you a new layer of visibility on the Datadog platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Datadog\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Datadog\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.datadoghq.com/synthetics/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T07:23:57Z"
        },
        {
            "id": "taboola",
            "name": "Taboola",
            "slug": "taboola",
            "url": "https://botcrawl.com/bots/taboola/",
            "status": "active",
            "operator": "Taboola",
            "company": "Taboola",
            "family": "Taboola",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Taboolabot",
                "Mozilla/5.0 (compatible",
                "Taboolabot/3.7",
                "+http://www.taboola.com)"
            ],
            "primary_user_agent": "Taboolabot",
            "robots_token": "Taboolabot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Taboola crawler visits websites advertiser campaign websites to audit the content of the page and gather site metadata and summary data.",
            "short_description": "The Taboola crawler visits websites advertiser campaign websites to audit the content of the page and gather site metadata and summary data.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Taboolabot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Taboolabot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "telegram-bot",
            "name": "Telegram Bot",
            "slug": "telegram-bot",
            "url": "https://botcrawl.com/bots/telegram-bot/",
            "status": "active",
            "operator": "Telegram",
            "company": "Telegram",
            "family": "Telegram",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "TelegramBot",
                "TelegramBot (like TwitterBot)",
                "Telegram link preview bot"
            ],
            "primary_user_agent": "TelegramBot",
            "robots_token": "TelegramBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "TelegramBot crawls websites to render a link preview when people send a message containing a URL in the Telegram messaging service.",
            "short_description": "TelegramBot crawls websites to render a link preview when people send a message containing a URL in the Telegram messaging service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TelegramBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TelegramBot\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "termly",
            "name": "Termly",
            "slug": "termly",
            "url": "https://botcrawl.com/bots/termly/",
            "status": "active",
            "operator": "Termly",
            "company": "Termly",
            "family": "Termly",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "TermlyBot",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/W.X.Y.Z Safari/537.36 (compatible",
                "TermlyBot/W.X",
                "+http://www.termly.io/bot.html)"
            ],
            "primary_user_agent": "TermlyBot",
            "robots_token": "TermlyBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Termly bot scanners for site compliance",
            "short_description": "Termly bot scanners for site compliance",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TermlyBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TermlyBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.termly.io/hc/en-us/sections/30575355455633-Scanner"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:49Z"
        },
        {
            "id": "terracotta",
            "name": "Terracotta",
            "slug": "terracotta",
            "url": "https://botcrawl.com/bots/terracotta/",
            "status": "active",
            "operator": "Ceramic",
            "company": "Ceramic",
            "family": "Terracotta",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Terracotta",
                "Terracotta-News"
            ],
            "primary_user_agent": "Terracotta",
            "robots_token": "Terracotta",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Terracotta bot scrapes websites for use in generating indices for serving searches using Ceramic's search product.",
            "short_description": "The Terracotta bot scrapes websites for use in generating indices for serving searches using Ceramic's search product.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Terracotta\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Terracotta\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://github.com/CeramicTeam/CeramicTerracotta"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "the-old-reader",
            "name": "The Old Reader",
            "slug": "the-old-reader",
            "url": "https://botcrawl.com/bots/the-old-reader/",
            "status": "active",
            "operator": "The Old Reader",
            "company": "The Old Reader",
            "family": "The Old Reader",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "The Old Reader",
                "Mozilla/5.0 (compatible",
                "theoldreader.com",
                "+https://theoldreader.com)",
                "Old Reader Bot"
            ],
            "primary_user_agent": "The Old Reader",
            "robots_token": "The Old Reader",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "The Old Reader RSS/Atom feed aggregator that fetches content for its news reader.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://theoldreader.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: The Old Reader\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"The Old Reader\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://theoldreader.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:35Z"
        },
        {
            "id": "thousand-eyes-cloud-agent",
            "name": "Thousand Eyes Cloud Agent",
            "slug": "thousand-eyes-cloud-agent",
            "url": "https://botcrawl.com/bots/thousand-eyes-cloud-agent/",
            "status": "active",
            "operator": "Thousand Eyes",
            "company": "Thousand Eyes",
            "family": "Thousand Eyes",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "curl/7.58.0-DEV"
            ],
            "primary_user_agent": "curl/7.58.0-DEV",
            "robots_token": "curl/7.58.0-DEV",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ThousandEyes monitors network infrastructure, troubleshoots application delivery and maps Internet performance, all from a SaaS-based platform.",
            "short_description": "ThousandEyes monitors network infrastructure, troubleshoots application delivery and maps Internet performance, all from a SaaS-based platform.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: curl/7.58.0-DEV\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"curl/7.58.0-DEV\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.thousandeyes.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "tiktok",
            "name": "TikTok",
            "slug": "tiktok",
            "url": "https://botcrawl.com/bots/tiktok/",
            "status": "active",
            "operator": "ByteDance",
            "company": "ByteDance",
            "family": "ByteDance",
            "category": "scraper",
            "kind": "preview",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "TikTok",
                "tiktok"
            ],
            "primary_user_agent": "TikTok",
            "robots_token": "TikTok",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "TikTok link preview fetcher.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: TikTok\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TikTok\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.switchtheweb.com/agents/tiktok"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "tiktokspider",
            "name": "TikTokSpider",
            "slug": "tiktokspider",
            "url": "https://botcrawl.com/bots/tiktokspider/",
            "status": "active",
            "operator": "ByteDance",
            "company": "ByteDance",
            "family": "ByteDance",
            "category": "scraper",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "TikTokSpider",
                "TikTok spider"
            ],
            "primary_user_agent": "TikTokSpider",
            "robots_token": "TikTokSpider",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "short_description": "TikTok metadata fetcher for shared links.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or reverse DNS/IP verification when available.",
            "rules": {
                "robots": "User-agent: TikTokSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TikTokSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://darkvisitors.com/agents/tiktokspider"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "timpibot",
            "name": "Timpibot",
            "slug": "timpibot",
            "url": "https://botcrawl.com/bots/timpibot/",
            "status": "active",
            "operator": "Timpi",
            "company": "Timpi",
            "family": "Timpi",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Timpibot",
                "Mozilla/5.0 (compatible",
                "+https://timpi.io)",
                "Timpi Bot"
            ],
            "primary_user_agent": "Timpibot",
            "robots_token": "Timpibot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Timpi decentralized search network crawler that builds a community-powered search index.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://timpi.io.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Timpibot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Timpibot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://timpi.io"
                }
            ],
            "updated_at": "2026-04-01T00:55:27Z"
        },
        {
            "id": "toutiao",
            "name": "Toutiao",
            "slug": "toutiao",
            "url": "https://botcrawl.com/bots/toutiao/",
            "status": "active",
            "operator": "ByteDance",
            "company": "ByteDance",
            "family": "ByteDance",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "Bytespider",
                "Mozilla/5.0 (compatible",
                "https://zhanzhang.toutiao.com/) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/70.0.0.0 Safari/537.36",
                "Toutiao crawler",
                "ByteDance crawler",
                "zhanzhang.toutiao.com"
            ],
            "primary_user_agent": "Bytespider",
            "robots_token": "Bytespider",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "ByteDance crawler used by Toutiao for content aggregation and indexing.",
            "verification_method": "Validate the user-agent pattern, operator documentation, and any published IP ranges before trusting.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: zhanzhang.toutiao.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"zhanzhang.toutiao.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zhanzhang.toutiao.com/docs/intro/26899"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "bytespider",
            "name": "Toutiao",
            "slug": "bytespider",
            "url": "https://botcrawl.com/bots/bytespider/",
            "status": "active",
            "operator": "ByteDance",
            "company": "ByteDance",
            "family": "ByteDance",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "unknown",
            "user_agents": [
                "Bytespider",
                "Mozilla/5.0 (compatible",
                "https://zhanzhang.toutiao.com/) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/70.0.0.0 Safari/537.36",
                "Toutiao crawler",
                "ByteDance crawler",
                "zhanzhang.toutiao.com"
            ],
            "primary_user_agent": "Bytespider",
            "robots_token": "Bytespider",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "unknown",
            "short_description": "ByteDance crawler used by Toutiao for content aggregation and indexing.",
            "verification_method": "Validate the user-agent pattern, operator documentation, and any published IP ranges before trusting.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: zhanzhang.toutiao.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Bytespider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zhanzhang.toutiao.com/docs/intro/26899"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T07:23:58Z"
        },
        {
            "id": "trellis-services",
            "name": "Trellis-Services",
            "slug": "trellis-services",
            "url": "https://botcrawl.com/bots/trellis-services/",
            "status": "active",
            "operator": "Mediavine",
            "company": "Mediavine",
            "family": "Mediavine",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Trellis-Services"
            ],
            "primary_user_agent": "Trellis-Services",
            "robots_token": "Trellis-Services",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Critical CSS Generator to Optimize Websites",
            "short_description": "Critical CSS Generator to Optimize Websites",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Trellis-Services\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Trellis-Services\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.mediavine.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "trendiction-bot",
            "name": "Trendiction Bot",
            "slug": "trendiction-bot",
            "url": "https://botcrawl.com/bots/trendiction-bot/",
            "status": "active",
            "operator": "Trendiction S.A.",
            "company": "Trendiction S.A.",
            "family": "Trendiction S.A.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "trendictionbot",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64",
                "trendictionbot0.5.0",
                "trendiction search",
                "http://www.trendiction.de/bot",
                "please let us know of any problems",
                "web at trendiction.com) Gecko/20170101 Firefox/67.0"
            ],
            "primary_user_agent": "trendictionbot",
            "robots_token": "trendictionbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Talkwalker delivers the consumer insights that help brands drive business impact. In a world full of conversations, the most successful global brands have switched to Talkwalker because we provide them with a powerful software platform to uncover, understand and derive the most valuable insights from internal and external data. Our listening and analytics platform enables more than 2,500 companies worldwide to protect their brands, measure their impact and gain the key consumer insights that drive purchase decisions.",
            "short_description": "Talkwalker delivers the consumer insights that help brands drive business impact.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: trendictionbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"trendictionbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.talkwalker.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "trustly",
            "name": "Trustly",
            "slug": "trustly",
            "url": "https://botcrawl.com/bots/trustly/",
            "status": "active",
            "operator": "Trustly Group AB",
            "company": "Trustly Group AB",
            "family": "Trustly Group AB",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Trustly",
                "Trustly/1",
                "Trustly/"
            ],
            "primary_user_agent": "Trustly",
            "robots_token": "Trustly",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Trustly will send notifications / callbacks to merchant's system to provide updates on payment statuses.",
            "short_description": "Trustly will send notifications / callbacks to merchant's system to provide updates on payment statuses.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Trustly\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Trustly\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.trustly.net/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "ttd-content",
            "name": "TTD Content",
            "slug": "ttd-content",
            "url": "https://botcrawl.com/bots/ttd-content/",
            "status": "active",
            "operator": "The Trade Desk",
            "company": "The Trade Desk",
            "family": "The Trade Desk",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "TTD-Content",
                "Mozilla/5.0 (compatible",
                "+https://www.thetradedesk.com/general/ttd-content)"
            ],
            "primary_user_agent": "TTD-Content",
            "robots_token": "TTD-Content",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Trade Desk crawler classifies webpage content to allow advertisers to choose where they show ads.",
            "short_description": "The Trade Desk crawler classifies webpage content to allow advertisers to choose where they show ads.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TTD-Content\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TTD-Content\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.thetradedesk.com/us/ttd-content"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "tumblr",
            "name": "Tumblr",
            "slug": "tumblr",
            "url": "https://botcrawl.com/bots/tumblr/",
            "status": "active",
            "operator": "Automattic",
            "company": "Automattic",
            "family": "Automattic",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Tumblr",
                "Tumblr/14.0.835.186",
                "Tumblr/"
            ],
            "primary_user_agent": "Tumblr",
            "robots_token": "Tumblr",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "On Tumblr, post authors can paste a URL in their post, and we'll \"unfurl\" that URL into a pretty Link \"Block\" for their post by making a request to the URL and parsing the response.",
            "short_description": "On Tumblr, post authors can paste a URL in their post, and we'll \"unfurl\" that URL into a pretty Link \"Block\" for their post by making a request to the URL and parsing…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Tumblr\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Tumblr\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://automattic.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "turnitinbot",
            "name": "TurnitinBot",
            "slug": "turnitinbot",
            "url": "https://botcrawl.com/bots/turnitinbot/",
            "status": "active",
            "operator": "Turnitin",
            "company": "Turnitin",
            "family": "Turnitin",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Turnitin",
                "TurnitinBot/ContentIngest (http://www.turnitin.com/robot/crawlerinfo.html)",
                "Turnitin (https://bit.ly/2UvnfoQ)"
            ],
            "primary_user_agent": "Turnitin",
            "robots_token": "Turnitin",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Turnitin.com offers various services to the educational community. Most prominently, we provide a widely used and effective plagiarism detection service. Part of the plagiarism prevention service relies on comparing student papers to content found on the Internet. Since we do not know ahead of time which pages on the Internet a student will use we need to gather them all for comparison. However, we do have automated ways of throwing away content and links that would be irrelevant to our service.",
            "short_description": "Turnitin.com offers various services to the educational community.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Turnitin\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Turnitin\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://turnitin.com/robot/crawlerinfo.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "turnitinbot-contentingest",
            "name": "TurnitinBot ContentIngest",
            "slug": "turnitinbot-contentingest",
            "url": "https://botcrawl.com/bots/turnitinbot-contentingest/",
            "status": "active",
            "operator": "Turnitin",
            "company": "Turnitin",
            "family": "Turnitin",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "TurnitinBot/ContentIngest",
                "TurnitinBot/ContentIngest (http://www.turnitin.com/robot/crawlerinfo.html)",
                "Turnitin (https://bit.ly/2UvnfoQ)"
            ],
            "primary_user_agent": "TurnitinBot/ContentIngest",
            "robots_token": "Turnitin",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Turnitin\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TurnitinBot/ContentIngest\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://turnitin.com/robot/crawlerinfo.html"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "twilio-proxy",
            "name": "Twilio Proxy",
            "slug": "twilio-proxy",
            "url": "https://botcrawl.com/bots/twilio-proxy/",
            "status": "active",
            "operator": "Twilio, Inc.",
            "company": "Twilio, Inc.",
            "family": "Twilio, Inc.",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "TwilioProxy",
                "TwilioProxy/1.1",
                "TwilioProxy/"
            ],
            "primary_user_agent": "TwilioProxy",
            "robots_token": "TwilioProxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Twilio webhook requests triggered by Twilio when there are incoming SMSes, calls, etc.",
            "short_description": "Twilio webhook requests triggered by Twilio when there are incoming SMSes, calls, etc.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TwilioProxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TwilioProxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.twilio.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "twinagent",
            "name": "TwinAgent",
            "slug": "twinagent",
            "url": "https://botcrawl.com/bots/twinagent/",
            "status": "active",
            "operator": "Twin",
            "company": "Twin",
            "family": "Twin",
            "category": "ai",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "TwinAgent",
                "TwinAgent/1.0"
            ],
            "primary_user_agent": "TwinAgent",
            "robots_token": "TwinAgent",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Automate complex operations end-to-end.",
            "short_description": "Automate complex operations end-to-end.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: TwinAgent\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"TwinAgent\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://twin.so"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "twingly-recon-sjostrom",
            "name": "Twingly Recon-Sjostrom",
            "slug": "twingly-recon-sjostrom",
            "url": "https://botcrawl.com/bots/twingly-recon-sjostrom/",
            "status": "active",
            "operator": "Twingly",
            "company": "Twingly",
            "family": "Twingly",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Twingly Recon-Sjostrom",
                "Mozilla/5.0 (compatible",
                "+https://www.twingly.com/docs/blog-search/reference/twingly-web-crawler.html)",
                "Twingly Bot"
            ],
            "primary_user_agent": "Twingly Recon-Sjostrom",
            "robots_token": "Twingly Recon-Sjostrom",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Twingly social data crawler that supplies news articles and blog data from millions of sources.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://www.twingly.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Twingly Recon-Sjostrom\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Twingly Recon-Sjostrom\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.twingly.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:24Z"
        },
        {
            "id": "twitterbot",
            "name": "twitterbot",
            "slug": "twitterbot",
            "url": "https://botcrawl.com/bots/twitterbot/",
            "status": "active",
            "operator": "Twitter",
            "company": "Twitter",
            "family": "Twitter",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Twitterbot",
                "Twitterbot/1.0",
                "Twitterbot/"
            ],
            "primary_user_agent": "Twitterbot",
            "robots_token": "Twitterbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A Twitter bot is a type of bot software that controls a Twitter account via the Twitter API. The bot software may autonomously perform actions such as tweeting, re-tweeting, liking, following, unfollowing, or direct messaging other accounts.",
            "short_description": "A Twitter bot is a type of bot software that controls a Twitter account via the Twitter API.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Twitterbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Twitterbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://twitter.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "typesense-scraper",
            "name": "Typesense Scraper",
            "slug": "typesense-scraper",
            "url": "https://botcrawl.com/bots/typesense-scraper/",
            "status": "active",
            "operator": "Typesense",
            "company": "Typesense",
            "family": "Typesense",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Typesense Scraper",
                "Mozilla/5.0 (compatible",
                "+https://typesense.org)",
                "Typesense Bot"
            ],
            "primary_user_agent": "Typesense Scraper",
            "robots_token": "Typesense Scraper",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Typesense open-source search engine scraper for indexing website content.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://typesense.org.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Typesense Scraper\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Typesense Scraper\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://typesense.org"
                }
            ],
            "updated_at": "2026-04-01T00:55:38Z"
        },
        {
            "id": "um-ic",
            "name": "um-IC",
            "slug": "um-ic",
            "url": "https://botcrawl.com/bots/um-ic/",
            "status": "active",
            "operator": "Ubermetrics Technologies",
            "company": "Ubermetrics Technologies",
            "family": "Ubermetrics Technologies",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "um-IC",
                "Mozilla/5.0 (compatible",
                "+https://ubermetrics-technologies.com)",
                "Ubermetrics IC Bot"
            ],
            "primary_user_agent": "um-IC",
            "robots_token": "um-IC",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Ubermetrics Technologies web crawler that collects content for AI-based communication intelligence.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://ubermetrics-technologies.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: um-IC\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"um-IC\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ubermetrics-technologies.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:24Z"
        },
        {
            "id": "um-ln",
            "name": "um-LN",
            "slug": "um-ln",
            "url": "https://botcrawl.com/bots/um-ln/",
            "status": "active",
            "operator": "Ubermetrics Technologies",
            "company": "Ubermetrics Technologies",
            "family": "Ubermetrics Technologies",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "um-LN",
                "Mozilla/5.0 (compatible",
                "+https://ubermetrics-technologies.com)",
                "Ubermetrics LN Bot"
            ],
            "primary_user_agent": "um-LN",
            "robots_token": "um-LN",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Ubermetrics Technologies LN crawler that collects web content for strategic business intelligence.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://ubermetrics-technologies.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: um-LN\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"um-LN\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ubermetrics-technologies.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:25Z"
        },
        {
            "id": "upday",
            "name": "upday",
            "slug": "upday",
            "url": "https://botcrawl.com/bots/upday/",
            "status": "active",
            "operator": "upday GmbH & Co. KG",
            "company": "upday GmbH & Co. KG",
            "family": "upday GmbH & Co. KG",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "upday",
                "Mozilla/5.0 (compatible",
                "upday/1.0",
                "+upday)",
                "upday/"
            ],
            "primary_user_agent": "upday",
            "robots_token": "upday",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "upday is a news aggregator app, and its bot crawls news sources. It collects and indexes articles to be recommended to users on its platform.",
            "short_description": "upday is a news aggregator app, and its bot crawls news sources.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: upday\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"upday\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.upday.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "updownbot",
            "name": "UpDownBot",
            "slug": "updownbot",
            "url": "https://botcrawl.com/bots/updownbot/",
            "status": "active",
            "operator": "Updown",
            "company": "Updown",
            "family": "Updown",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "updown.io",
                "updown.io daemon 2.2"
            ],
            "primary_user_agent": "updown.io",
            "robots_token": "updown.io",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "UpDownBot is the HTTP monitoring probe for updown.io.",
            "short_description": "UpDownBot is the HTTP monitoring probe for updown.io.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: updown.io\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"updown.io\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://updown.io/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "uptime-llc",
            "name": "Uptime LLC",
            "slug": "uptime-llc",
            "url": "https://botcrawl.com/bots/uptime-llc/",
            "status": "active",
            "operator": "Uptime",
            "company": "Uptime",
            "family": "Uptime",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Uptime",
                "Mozilla/5.0 (compatible",
                "Uptime/1.0",
                "http://uptime.com)",
                "Uptime/"
            ],
            "primary_user_agent": "Uptime",
            "robots_token": "Uptime",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime.com HTTP probe for website availability and performance monitoring.",
            "short_description": "Uptime.com HTTP probe for website availability and performance monitoring.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Uptime\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Uptime\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://uptime.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "uptime-monitoring",
            "name": "Uptime Monitoring",
            "slug": "uptime-monitoring",
            "url": "https://botcrawl.com/bots/uptime-monitoring/",
            "status": "active",
            "operator": "GoDaddy",
            "company": "GoDaddy",
            "family": "GoDaddy",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "UptimeMonitoring"
            ],
            "primary_user_agent": "UptimeMonitoring",
            "robots_token": "UptimeMonitoring",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptime monitoring is a service that checks if a website is online. It will send you an alert if your website is “down”.",
            "short_description": "Uptime monitoring is a service that checks if a website is online.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: UptimeMonitoring\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"UptimeMonitoring\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.sucuri.net/website-monitoring/understanding-alerts/uptime-monitor-alerts/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "uptimebot",
            "name": "UptimeBot",
            "slug": "uptimebot",
            "url": "https://botcrawl.com/bots/uptimebot/",
            "status": "active",
            "operator": "UptimeBot",
            "company": "UptimeBot",
            "family": "UptimeBot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "UptimeBot/1",
                "UptimeBot/1.0 EU-1 (+https://uptime.bot/hello)"
            ],
            "primary_user_agent": "UptimeBot/1",
            "robots_token": "UptimeBot/1",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "UptimeBot is an actionable website monitoring tool that works great with Slack.",
            "short_description": "UptimeBot is an actionable website monitoring tool that works great with Slack.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: UptimeBot/1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"UptimeBot/1\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "uptimerobot",
            "name": "UptimeRobot",
            "slug": "uptimerobot",
            "url": "https://botcrawl.com/bots/uptimerobot/",
            "status": "active",
            "operator": "Uptimerobot",
            "company": "Uptimerobot",
            "family": "Uptimerobot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "UptimeRobot",
                "Mozilla/5.0+(compatible",
                "UptimeRobot/2.0",
                "http://www.uptimerobot.com/)",
                "UptimeRobot monitor"
            ],
            "primary_user_agent": "UptimeRobot",
            "robots_token": "UptimeRobot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Free Website Uptime Monitoring",
            "short_description": "Free Website Uptime Monitoring",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: UptimeRobot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"UptimeRobot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.uptimerobot.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:50Z"
        },
        {
            "id": "uptimia",
            "name": "Uptimia",
            "slug": "uptimia",
            "url": "https://botcrawl.com/bots/uptimia/",
            "status": "active",
            "operator": "Uptimia",
            "company": "Uptimia",
            "family": "Uptimia",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "www.uptimia.com",
                "Mozilla/5.0 (compatible",
                "Uptimia",
                "www.uptimia.com)"
            ],
            "primary_user_agent": "www.uptimia.com",
            "robots_token": "www.uptimia.com",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Uptimia is a website monitoring service, monitoring website performance and availability.",
            "short_description": "Uptimia is a website monitoring service, monitoring website performance and availability.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: www.uptimia.com\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"www.uptimia.com\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.uptimia.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "uptrendsbot",
            "name": "UptrendsBot",
            "slug": "uptrendsbot",
            "url": "https://botcrawl.com/bots/uptrendsbot/",
            "status": "active",
            "operator": "Uptrends GmbH",
            "company": "Uptrends GmbH",
            "family": "Uptrends GmbH",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "uptrends",
                "Uptrends monitoring bot"
            ],
            "primary_user_agent": "uptrends",
            "robots_token": "uptrends",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Global load tests and synthetic monitoring",
            "short_description": "Global load tests and synthetic monitoring",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: uptrends\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"uptrends\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.uptrends.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "v0bot",
            "name": "v0bot",
            "slug": "v0bot",
            "url": "https://botcrawl.com/bots/v0bot/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "v0bot"
            ],
            "primary_user_agent": "v0bot",
            "robots_token": "v0bot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "AI Crawler",
            "short_description": "Bot for v0 services.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"v0bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/v0bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "vaultpress",
            "name": "VaultPress",
            "slug": "vaultpress",
            "url": "https://botcrawl.com/bots/vaultpress/",
            "status": "active",
            "operator": "Automattic",
            "company": "Automattic",
            "family": "Automattic",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "VaultPress"
            ],
            "primary_user_agent": "VaultPress",
            "robots_token": "VaultPress",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "VaultPress is a subscription service developed by Automattic, the company behind WordPress, that offers automated daily and real-time backups of WordPress websites onto WordPress.com's cloud servers. It is known for its ease of use, secure backups, and proactive security scanning.",
            "short_description": "VaultPress is a subscription service developed by Automattic, the company behind WordPress, that offers automated daily and real-time backups of WordPress websites onto…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: VaultPress\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"VaultPress\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://vaultpress.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "velenpublicwebcrawler",
            "name": "VelenPublicWebCrawler",
            "slug": "velenpublicwebcrawler",
            "url": "https://botcrawl.com/bots/velenpublicwebcrawler/",
            "status": "active",
            "operator": "Velen / Hunter",
            "company": "Velen / Hunter",
            "family": "Velen / Hunter",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "VelenPublicWebCrawler",
                "Mozilla/5.0 (compatible",
                "+https://velen.io)",
                "Velen Crawler",
                "Hunter Bot"
            ],
            "primary_user_agent": "VelenPublicWebCrawler",
            "robots_token": "VelenPublicWebCrawler",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Velen web crawler for Hunter that analyzes millions of public websites for contact data.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://velen.io.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: VelenPublicWebCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"VelenPublicWebCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://velen.io"
                }
            ],
            "updated_at": "2026-04-01T00:55:27Z"
        },
        {
            "id": "vemetric-favicon-bot",
            "name": "Vemetric Favicon Bot",
            "slug": "vemetric-favicon-bot",
            "url": "https://botcrawl.com/bots/vemetric-favicon-bot/",
            "status": "active",
            "operator": "Vemetric",
            "company": "Vemetric",
            "family": "Vemetric",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "vemetric-favicon-bot",
                "Vemetric Favicon Bot"
            ],
            "primary_user_agent": "vemetric-favicon-bot",
            "robots_token": "vemetric-favicon-bot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Fetches favicons from websites in the highest quality available.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"vemetric-favicon-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/vemetric-favicon-bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "vercel-build-container",
            "name": "Vercel build container",
            "slug": "vercel-build-container",
            "url": "https://botcrawl.com/bots/vercel-build-container/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "unknown",
            "kind": "preview",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "vercel-build-container"
            ],
            "primary_user_agent": "vercel-build-container",
            "robots_token": "vercel-build-container",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fetches content during Vercel build processes.",
            "short_description": "System-initiated requests made from Vercel's build container during a build.",
            "verification_method": "Verified on bots.fyi. Treat as a platform/service identifier and validate before allow-listing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: vercel-build-container\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"vercel-build-container\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/vercel-build-container"
                },
                {
                    "type": "operator",
                    "url": "https://vercel.com/docs/bot-management"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "vercel-favicon-bot",
            "name": "Vercel Favicon Bot",
            "slug": "vercel-favicon-bot",
            "url": "https://botcrawl.com/bots/vercel-favicon-bot/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "vercel-favicon-bot",
                "Vercel Favicon Bot"
            ],
            "primary_user_agent": "vercel-favicon-bot",
            "robots_token": "vercel-favicon-bot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Vercel favicon fetcher bot.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"vercel-favicon-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/vercel-favicon-bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "vercel-screenshot-bot",
            "name": "Vercel Screenshot Bot",
            "slug": "vercel-screenshot-bot",
            "url": "https://botcrawl.com/bots/vercel-screenshot-bot/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "vercel-screenshot-bot",
                "Vercel Screenshot Bot"
            ],
            "primary_user_agent": "vercel-screenshot-bot",
            "robots_token": "vercel-screenshot-bot",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "botsfyi-verified-listing",
            "common_use": "Preview",
            "short_description": "Vercel screenshot preview bot.",
            "verification_method": "Public verified bot directory listing",
            "spoofing_risk": "Medium",
            "rules": {
                "cloudflare": "(http.user_agent contains \"vercel-screenshot-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/vercel-screenshot-bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "vercelflags",
            "name": "vercelflags",
            "slug": "vercelflags",
            "url": "https://botcrawl.com/bots/vercelflags/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "monitoring",
            "kind": "agent",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "vercelflags"
            ],
            "primary_user_agent": "vercelflags",
            "robots_token": "vercelflags",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Used by Vercel feature flags infrastructure and related checks.",
            "short_description": "Vercel flags service traffic.",
            "verification_method": "Verified on bots.fyi. Treat as a platform/service identifier and validate before allow-listing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: vercelflags\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"vercelflags\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/vercelflags"
                },
                {
                    "type": "operator",
                    "url": "https://vercel.com/docs/bot-management"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "verceltracing",
            "name": "verceltracing",
            "slug": "verceltracing",
            "url": "https://botcrawl.com/bots/verceltracing/",
            "status": "active",
            "operator": "Vercel",
            "company": "Vercel",
            "family": "Vercel",
            "category": "monitoring",
            "kind": "agent",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "verceltracing"
            ],
            "primary_user_agent": "verceltracing",
            "robots_token": "verceltracing",
            "verified": "yes",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Used by Vercel tracing and observability-related infrastructure.",
            "short_description": "Vercel tracing service traffic.",
            "verification_method": "Verified on bots.fyi. Treat as a platform/service identifier and validate before allow-listing.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: verceltracing\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"verceltracing\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://bots.fyi/d/verceltracing"
                },
                {
                    "type": "operator",
                    "url": "https://vercel.com/docs/bot-management"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "verity",
            "name": "Verity",
            "slug": "verity",
            "url": "https://botcrawl.com/bots/verity/",
            "status": "active",
            "operator": "GumGum",
            "company": "GumGum",
            "family": "GumGum",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Verity",
                "Mozilla/5.0 (compatible",
                "+https://gumgum.com/verity)",
                "GumGum Verity"
            ],
            "primary_user_agent": "Verity",
            "robots_token": "Verity",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "GumGum contextual advertising crawler that analyzes web page content for ad targeting and brand safety.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://gumgum.com/verity.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Verity\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Verity\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://gumgum.com/verity"
                }
            ],
            "updated_at": "2026-04-01T00:55:25Z"
        },
        {
            "id": "videootv-bot",
            "name": "videootv Bot",
            "slug": "videootv-bot",
            "url": "https://botcrawl.com/bots/videootv-bot/",
            "status": "active",
            "operator": "Digital Green",
            "company": "Digital Green",
            "family": "Digital Green",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "videootvBot",
                "Mozilla/5.0 (compatible",
                "+https://www.videoo.tv)"
            ],
            "primary_user_agent": "videootvBot",
            "robots_token": "videootvBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Crawler to extract the newest articles in the publisher's website (via feed or parsing html) to make a carrousel with images, links and text for our native ads module in order to improve recirculation in the publisher's web. Only crawls our publisher's webpages.",
            "short_description": "Crawler to extract the newest articles in the publisher's website (via feed or parsing html) to make a carrousel with images, links and text for our native ads module…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: videootvBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"videootvBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://videoo.tv/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "visually-io-shopify-editor",
            "name": "Visually.io Shopify Editor",
            "slug": "visually-io-shopify-editor",
            "url": "https://botcrawl.com/bots/visually-io-shopify-editor/",
            "status": "active",
            "operator": "Unknown operator",
            "company": "Unknown operator",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Visually.io Shopify Editor",
                "AI Assistant"
            ],
            "primary_user_agent": "Visually.io Shopify Editor",
            "robots_token": "Visually.io Shopify Editor",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Shopify theme editor alternative for live, real-time store editing via a secure iframe and controlled proxy.",
            "verification_method": "Cloudflare verified bot directory mirror; verify against the operator's documentation or published infrastructure details where needed.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: Visually.io Shopify Editor\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.visually.io/visually.io-editor-cloudflare-signed-agent"
                }
            ],
            "updated_at": "2026-03-31T20:49:11Z"
        },
        {
            "id": "w3-validator-services",
            "name": "W3 Validator Services",
            "slug": "w3-validator-services",
            "url": "https://botcrawl.com/bots/w3-validator-services/",
            "status": "active",
            "operator": "World Wide Web Consortium (W3C)",
            "company": "World Wide Web Consortium (W3C)",
            "family": "World Wide Web Consortium (W3C)",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "W3C-checklink"
            ],
            "primary_user_agent": "W3C-checklink",
            "robots_token": "W3C-checklink",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "W3C provides various free validation services that help check the conformance of Web sites against open standards.",
            "short_description": "W3C provides various free validation services that help check the conformance of Web sites against open standards.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: W3C-checklink\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"W3C-checklink\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://validator.w3.org/services"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "wardbot",
            "name": "WARDBot",
            "slug": "wardbot",
            "url": "https://botcrawl.com/bots/wardbot/",
            "status": "active",
            "operator": "WEBSPARK",
            "company": "WEBSPARK",
            "family": "WEBSPARK",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "WARDBot",
                "Mozilla/5.0 (compatible",
                "WARDBot/1.0",
                "http://ward.ai/robot)"
            ],
            "primary_user_agent": "WARDBot",
            "robots_token": "WARDBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WARDBot tracks URL status codes, helping users monitor the availability of web pages they have added to the monitoring list.",
            "short_description": "WARDBot tracks URL status codes, helping users monitor the availability of web pages they have added to the monitoring list.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WARDBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WARDBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ward.ai/robot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "watchbot",
            "name": "Watchbot",
            "slug": "watchbot",
            "url": "https://botcrawl.com/bots/watchbot/",
            "status": "active",
            "operator": "WatchBot",
            "company": "WatchBot",
            "family": "WatchBot",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Watchbot",
                "Watchbot monitoring robot (https://watchbot.fflow.net)"
            ],
            "primary_user_agent": "Watchbot",
            "robots_token": "Watchbot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website availability & SSL certificate expiration monitoring",
            "short_description": "Website availability & SSL certificate expiration monitoring",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Watchbot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Watchbot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://watchbot.fflow.net/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "watchful",
            "name": "Watchful",
            "slug": "watchful",
            "url": "https://botcrawl.com/bots/watchful/",
            "status": "active",
            "operator": "Watchful LLC",
            "company": "Watchful LLC",
            "family": "Watchful LLC",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WebsiteOps",
                "WebsiteOps (hello@websiteops.io)"
            ],
            "primary_user_agent": "WebsiteOps",
            "robots_token": "WebsiteOps",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Monitoring system to check uptime on client websites.",
            "short_description": "Monitoring system to check uptime on client websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WebsiteOps\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WebsiteOps\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://watchful.net/faqs/technical-support/how-do-i-whitelist-the-watchful-ip-address"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "weborama-fetcher",
            "name": "weborama-fetcher",
            "slug": "weborama-fetcher",
            "url": "https://botcrawl.com/bots/weborama-fetcher/",
            "status": "active",
            "operator": "Weborama",
            "company": "Weborama",
            "family": "Weborama",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "weborama-fetcher",
                "Mozilla/5.0 (compatible",
                "+https://weborama.com)",
                "Weborama Bot"
            ],
            "primary_user_agent": "weborama-fetcher",
            "robots_token": "weborama-fetcher",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Weborama AdTech crawler that collects website data for audience insights and contextual targeting.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://weborama.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: weborama-fetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"weborama-fetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weborama.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:25Z"
        },
        {
            "id": "webpagetest",
            "name": "webpagetest",
            "slug": "webpagetest",
            "url": "https://botcrawl.com/bots/webpagetest/",
            "status": "active",
            "operator": "WebPageTest",
            "company": "WebPageTest",
            "family": "WebPageTest",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "monitor/webpagetest",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64",
                "rv:69.0) Gecko/20100101 Firefox/69.0 monitor/webpagetest PTST/190827.180809",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/76.0.3809.132 Safari/537.36 monitor/webpagetest PTST/190827.180809"
            ],
            "primary_user_agent": "monitor/webpagetest",
            "robots_token": "monitor/webpagetest",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WebPageTest is one of the most popular and free tools for measuring webpage performance and enables you to run web performance tests on your site from a number of different locations across the world in a number of different browsers.",
            "short_description": "WebPageTest is one of the most popular and free tools for measuring webpage performance and enables you to run web performance tests on your site from a number of…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: monitor/webpagetest\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"monitor/webpagetest\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.webpagetest.org/about"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "websitepulsebot",
            "name": "WebsitePulseBot",
            "slug": "websitepulsebot",
            "url": "https://botcrawl.com/bots/websitepulsebot/",
            "status": "active",
            "operator": "WebsitePulse",
            "company": "WebsitePulse",
            "family": "WebsitePulse",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "websitepulse checker",
                "websitepulse checker/3.0 (compatible",
                "MSIE 5.5",
                "Netscape 4.75",
                "Linux)"
            ],
            "primary_user_agent": "websitepulse checker",
            "robots_token": "websitepulse checker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WebsitePulseBot is the HTTP monitoring probe for WebsitePulse's monitoring servince.",
            "short_description": "WebsitePulseBot is the HTTP monitoring probe for WebsitePulse's monitoring servince.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: websitepulse checker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"websitepulse checker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.websitepulse.com/kb/websitepulse"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "webspidermount",
            "name": "WebSpiderMount",
            "slug": "webspidermount",
            "url": "https://botcrawl.com/bots/webspidermount/",
            "status": "active",
            "operator": "AspenTechLabs Inc",
            "company": "AspenTechLabs Inc",
            "family": "AspenTechLabs Inc",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "webspidermount",
                "Mozilla/5.0 (compatible",
                "WSM/2.0",
                "+https://webspidermount.com/)"
            ],
            "primary_user_agent": "webspidermount",
            "robots_token": "webspidermount",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Job wrapping data processor handling jobs distribution from employer websites to multiple endpoints, like job boards, advertisement platforms, job alerts etc.",
            "short_description": "Job wrapping data processor handling jobs distribution from employer websites to multiple endpoints, like job boards, advertisement platforms, job alerts etc.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: webspidermount\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"webspidermount\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://webspidermount.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "webstatus247",
            "name": "Webstatus247",
            "slug": "webstatus247",
            "url": "https://botcrawl.com/bots/webstatus247/",
            "status": "active",
            "operator": "Webstatus247",
            "company": "Webstatus247",
            "family": "Webstatus247",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Webstatus247",
                "Mozilla/5.0 (Windows NT 10.0",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/127.0.0.0 Safari/537.36 Webstatus247/1.0"
            ],
            "primary_user_agent": "Webstatus247",
            "robots_token": "Webstatus247",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WebStatus247 is an intelligent website monitoring bot that continuously checks the availability and uptime",
            "short_description": "WebStatus247 is an intelligent website monitoring bot that continuously checks the availability and uptime",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Webstatus247\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Webstatus247\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.webstatus247.com/bot-information"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "webtotembot",
            "name": "WebTotemBot",
            "slug": "webtotembot",
            "url": "https://botcrawl.com/bots/webtotembot/",
            "status": "active",
            "operator": "WebTotem",
            "company": "WebTotem",
            "family": "WebTotem",
            "category": "security",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WebTotemBot",
                "WTotem"
            ],
            "primary_user_agent": "WebTotemBot",
            "robots_token": "WebTotemBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "directory",
            "short_description": "WebTotem monitors and defends web applications and their data.",
            "verification_method": "Follow the operator's own documentation when additional verification details are available.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: WebTotemBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WebTotemBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wtotem.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:51Z"
        },
        {
            "id": "webzio",
            "name": "webzio",
            "slug": "webzio",
            "url": "https://botcrawl.com/bots/webzio/",
            "status": "active",
            "operator": "Webz.io",
            "company": "Webz.io",
            "family": "Webz.io",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "webzio",
                "Mozilla/5.0 (compatible",
                "webzio/1.0",
                "+https://webz.io/bot.html)",
                "Webz.io Bot"
            ],
            "primary_user_agent": "webzio",
            "robots_token": "webzio",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Webz.io open web crawler that collects data from news, blogs, forums, and social media.",
            "verification_method": "Verify the exact user-agent against Webz.io's published crawler documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: webzio\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"webzio\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://webz.io/bot.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:25Z"
        },
        {
            "id": "webzio-extended",
            "name": "webzio-extended",
            "slug": "webzio-extended",
            "url": "https://botcrawl.com/bots/webzio-extended/",
            "status": "active",
            "operator": "Webz.io",
            "company": "Webz.io",
            "family": "Webz.io",
            "category": "ai",
            "kind": "unknown",
            "purpose": "control-token",
            "identity_type": "unknown",
            "user_agents": [
                "webzio-extended",
                "Mozilla/5.0 (compatible",
                "webzio-extended/1.0",
                "+https://webz.io/bot.html)",
                "Webz.io Extended"
            ],
            "primary_user_agent": "webzio-extended",
            "robots_token": "webzio-extended",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "high",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Webz.io extended web crawler that maintains a repository of web crawl data.",
            "verification_method": "Verify the exact user-agent against Webz.io's published crawler documentation.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: webzio-extended\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"webzio-extended\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://webz.io/bot.html"
                }
            ],
            "updated_at": "2026-04-01T00:55:18Z"
        },
        {
            "id": "whatsapp",
            "name": "WhatsApp",
            "slug": "whatsapp",
            "url": "https://botcrawl.com/bots/whatsapp/",
            "status": "active",
            "operator": "Meta",
            "company": "Meta",
            "family": "Meta",
            "category": "scraper",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WhatsApp",
                "WhatsApp/2.0 (+https://www.whatsapp.com/legal/crawler)",
                "WhatsApp Bot",
                "WhatsApp Preview"
            ],
            "primary_user_agent": "WhatsApp",
            "robots_token": "WhatsApp",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "WhatsApp link preview crawler that fetches URLs shared in WhatsApp messages.",
            "verification_method": "Verify the exact user-agent against Meta's crawler documentation and confirm Meta ownership where possible.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: WhatsApp\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WhatsApp\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.whatsapp.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:30Z"
        },
        {
            "id": "wkndhealthcheckbot",
            "name": "WkndHealthCheckBot",
            "slug": "wkndhealthcheckbot",
            "url": "https://botcrawl.com/bots/wkndhealthcheckbot/",
            "status": "active",
            "operator": "Wunderkind",
            "company": "Wunderkind",
            "family": "Wunderkind",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "wknd-bot"
            ],
            "primary_user_agent": "wknd-bot",
            "robots_token": "wknd-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Used by Wunderkind to perform health check on clients' domain.",
            "short_description": "Used by Wunderkind to perform health check on clients' domain.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: wknd-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"wknd-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://developer.wunderkind.co/docs/server-side-tracking-implementation"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wmf-citoid",
            "name": "WMF Citoid",
            "slug": "wmf-citoid",
            "url": "https://botcrawl.com/bots/wmf-citoid/",
            "status": "active",
            "operator": "Wikimedia Foundation",
            "company": "Wikimedia Foundation",
            "family": "Wikimedia Foundation",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Citoid/WMF",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/110.0.0.0 Safari/537.36 Citoid/WMF (mailto:noc@wikimedia.org)",
                "Wikimedia Citoid"
            ],
            "primary_user_agent": "Citoid/WMF",
            "robots_token": "Citoid/WMF",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Citoid is a Wikimedia service in VisualEditor that generates citations from URLs, DOIs, and ISBNs, relying on the Zotero Translation Server (see wikimedia-zotero) for accurate metadata, processed on demand from website visitors.",
            "short_description": "Citoid is a Wikimedia service in VisualEditor that generates citations from URLs, DOIs, and ISBNs, relying on the Zotero Translation Server (see wikimedia-zotero) for…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Citoid/WMF\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Citoid/WMF\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wikitech.wikimedia.org/wiki/Citoid"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wmf-zotero-translation-server",
            "name": "WMF Zotero Translation Server",
            "slug": "wmf-zotero-translation-server",
            "url": "https://botcrawl.com/bots/wmf-zotero-translation-server/",
            "status": "active",
            "operator": "Wikimedia Foundation",
            "company": "Wikimedia Foundation",
            "family": "Wikimedia Foundation",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "ZoteroTranslationServer/WMF",
                "Mozilla/5.0 (Macintosh",
                "Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/110.0.0.0 Safari/537.36 ZoteroTranslationServer/WMF (mailto:noc@wikimedia.org)",
                "Wikimedia Zotero"
            ],
            "primary_user_agent": "ZoteroTranslationServer/WMF",
            "robots_token": "ZoteroTranslationServer/WMF",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Wikimedia Foundation's Zotero Translation Server is a customized metadata extraction tool that powers Citoid (see wikimedia-citoid), retrieving citation data from URLs, DOIs, and ISBNs using Zotero translators, on demand from website visitor requests.",
            "short_description": "The Wikimedia Foundation's Zotero Translation Server is a customized metadata extraction tool that powers Citoid (see wikimedia-citoid), retrieving citation data from…",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ZoteroTranslationServer/WMF\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ZoteroTranslationServer/WMF\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wikitech.wikimedia.org/wiki/Zotero"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wordcountbot",
            "name": "WordCountBot",
            "slug": "wordcountbot",
            "url": "https://botcrawl.com/bots/wordcountbot/",
            "status": "active",
            "operator": "Weglot",
            "company": "Weglot",
            "family": "Weglot",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "WordCountBot",
                "Mozilla/5.0 (compatible",
                "WordCountBot/0.1",
                ")"
            ],
            "primary_user_agent": "WordCountBot",
            "robots_token": "WordCountBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WordCountBot analyzes website word count based on public pages. All words belonging to public pages and included in HTML source code",
            "short_description": "WordCountBot analyzes website word count based on public pages.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WordCountBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WordCountBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weglot.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wordpress-namecheap",
            "name": "WordPress Namecheap",
            "slug": "wordpress-namecheap",
            "url": "https://botcrawl.com/bots/wordpress-namecheap/",
            "status": "active",
            "operator": "namecheap",
            "company": "namecheap",
            "family": "namecheap",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "WordPress",
                "WordPress/5.5.1",
                "https://cresiap.org.mx",
                "WordPress/"
            ],
            "primary_user_agent": "WordPress",
            "robots_token": "WordPress",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "This is the major egress IP for our containerised WordPress platform so it is likely to be many flavours of WordPress and the potential to be any domain.",
            "short_description": "This is the major egress IP for our containerised WordPress platform so it is likely to be many flavours of WordPress and the potential to be any domain.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WordPress\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WordPress\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://namecheap.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "worldline-bot",
            "name": "Worldline Bot",
            "slug": "worldline-bot",
            "url": "https://botcrawl.com/bots/worldline-bot/",
            "status": "active",
            "operator": "Worldline",
            "company": "Worldline",
            "family": "Worldline",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Indy Library",
                "Mozilla/3.0 (compatible",
                "Indy Library)"
            ],
            "primary_user_agent": "Indy Library",
            "robots_token": "Indy Library",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Worldline Bot is associated with Worldline, a payment and transactional services company. It handles notifications and callbacks related to payment processing.",
            "short_description": "The Worldline Bot is associated with Worldline, a payment and transactional services company.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Indy Library\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Indy Library\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://secure.ogone.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "worldpay",
            "name": "WorldPay",
            "slug": "worldpay",
            "url": "https://botcrawl.com/bots/worldpay/",
            "status": "active",
            "operator": "WorldPay",
            "company": "WorldPay",
            "family": "WorldPay",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "WJHRO/1.0",
                "WJHRO/1.0 (WorldPay Java HTTP Request Object)"
            ],
            "primary_user_agent": "WJHRO/1.0",
            "robots_token": "WJHRO/1.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Payment confirmation callbacks to ecommerce backends",
            "short_description": "Payment confirmation callbacks to ecommerce backends",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WJHRO/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WJHRO/1.0\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wormlybot",
            "name": "WormlyBot",
            "slug": "wormlybot",
            "url": "https://botcrawl.com/bots/wormlybot/",
            "status": "active",
            "operator": "Wormly",
            "company": "Wormly",
            "family": "Wormly",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WormlyBot",
                "Mozilla/5.0 (compatible",
                "+http://wormly.com)"
            ],
            "primary_user_agent": "WormlyBot",
            "robots_token": "WormlyBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WormlyBot is HTTP monitoring probe for Wormly's uptime monitoring service.",
            "short_description": "WormlyBot is HTTP monitoring probe for Wormly's uptime monitoring service.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WormlyBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WormlyBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.wormly.com/help/server-monitoring/website"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wovn-crawler",
            "name": "WOVN Crawler",
            "slug": "wovn-crawler",
            "url": "https://botcrawl.com/bots/wovn-crawler/",
            "status": "active",
            "operator": "Wovn Technologies, Inc.",
            "company": "Wovn Technologies, Inc.",
            "family": "Wovn Technologies, Inc.",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WovnCrawler",
                "WovnCrawler/1.0"
            ],
            "primary_user_agent": "WovnCrawler",
            "robots_token": "WovnCrawler",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "We offer WOVN.io, a service for localization websites. We run a crawler to get source language of our clients' websites",
            "short_description": "We offer WOVN.io, a service for localization websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WovnCrawler\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WovnCrawler\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.wovn.io/hc/ja/articles/360043165091"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wp-time-capsule",
            "name": "WP Time Capsule",
            "slug": "wp-time-capsule",
            "url": "https://botcrawl.com/bots/wp-time-capsule/",
            "status": "active",
            "operator": "WP Time Capsule",
            "company": "WP Time Capsule",
            "family": "WP Time Capsule",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "WP Time Capsule API/",
                "WP Time Capsule API/1.0 ( https://cron.wptimecapsule.com/",
                "https://service.wptimecapsule.com/ )"
            ],
            "primary_user_agent": "WP Time Capsule API/",
            "robots_token": "WP Time Capsule API/",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Our plugin WPTimeCapsule is installed in more 30000 WordPress sites. When our backup servers send requests to trigger the backup on the WordPress sites, it is being blocked",
            "short_description": "Our plugin WPTimeCapsule is installed in more 30000 WordPress sites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WP Time Capsule API/\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WP Time Capsule API/\")"
            },
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wp-umbrella",
            "name": "WP Umbrella",
            "slug": "wp-umbrella",
            "url": "https://botcrawl.com/bots/wp-umbrella/",
            "status": "active",
            "operator": "WP Umbrella",
            "company": "WP Umbrella",
            "family": "WP Umbrella",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WPUmbrella"
            ],
            "primary_user_agent": "WPUmbrella",
            "robots_token": "WPUmbrella",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Fetches data from Wordpress enabled sites for Umbrella plugin users.",
            "short_description": "Fetches data from Wordpress enabled sites for Umbrella plugin users.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WPUmbrella\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WPUmbrella\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wp-umbrella.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wp-umbrella-bot",
            "name": "WP Umbrella Bot",
            "slug": "wp-umbrella-bot",
            "url": "https://botcrawl.com/bots/wp-umbrella-bot/",
            "status": "active",
            "operator": "WP Umbrella",
            "company": "WP Umbrella",
            "family": "WP Umbrella",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "umbrella bot"
            ],
            "primary_user_agent": "umbrella bot",
            "robots_token": "umbrella bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WP Umbrella is the ultimate all-in-one solution to manage, maintain and monitor one, or multiple WordPress websites.",
            "short_description": "WP Umbrella is the ultimate all-in-one solution to manage, maintain and monitor one, or multiple WordPress websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: umbrella bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"umbrella bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wp-umbrella.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wpmu-dev-broken-link-checker",
            "name": "WPMU DEV Broken Link Checker",
            "slug": "wpmu-dev-broken-link-checker",
            "url": "https://botcrawl.com/bots/wpmu-dev-broken-link-checker/",
            "status": "active",
            "operator": "WPMUDEV",
            "company": "WPMUDEV",
            "family": "WPMUDEV",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WPMU DEV Broken Link Checker",
                "WPMU DEV Broken Link Checker Spider"
            ],
            "primary_user_agent": "WPMU DEV Broken Link Checker",
            "robots_token": "WPMU DEV Broken Link Checker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Runs a full scan of a site to find any broken links",
            "short_description": "Runs a full scan of a site to find any broken links",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WPMU DEV Broken Link Checker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WPMU DEV Broken Link Checker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wpmudev.com/docs/hub-2-0/broken-link-checker-2/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "wpmudev-uptime-monitor-5-0",
            "name": "WPMUDEV Uptime Monitor 5.0",
            "slug": "wpmudev-uptime-monitor-5-0",
            "url": "https://botcrawl.com/bots/wpmudev-uptime-monitor-5-0/",
            "status": "active",
            "operator": "WPMUDEV",
            "company": "WPMUDEV",
            "family": "WPMUDEV",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "WPMUDEV",
                "WPMUDEV Uptime Monitor 5.0 (https://wpmudev.com)"
            ],
            "primary_user_agent": "WPMUDEV",
            "robots_token": "WPMUDEV",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "WPMUDEV Uptime Monitor 5.0 (https://wpmudev.com)",
            "short_description": "WPMUDEV Uptime Monitor 5.0 (https://wpmudev.com)",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: WPMUDEV\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"WPMUDEV\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://wpmudev.com/monitor/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "xy-archive-compliance-bot",
            "name": "XY Archive Compliance Bot",
            "slug": "xy-archive-compliance-bot",
            "url": "https://botcrawl.com/bots/xy-archive-compliance-bot/",
            "status": "active",
            "operator": "XY Archive Compliance",
            "company": "XY Archive Compliance",
            "family": "XY Archive Compliance",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "XY-Archive-Compliance",
                "Mozilla/5.0 (compatible",
                "XY-Archive-Compliance-Crawler",
                "+https://archive.xyplanningnetwork.com/)",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/127.0.0.0 Safari/537.36 (compatible",
                "XY-Archive-Compliance-Archiver"
            ],
            "primary_user_agent": "XY-Archive-Compliance",
            "robots_token": "XY-Archive-Compliance",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Website archiver for our customers who have archive compliance requirements to fulfill them.",
            "short_description": "Website archiver for our customers who have archive compliance requirements to fulfill them.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: XY-Archive-Compliance\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"XY-Archive-Compliance\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://xy-archive.helpscoutdocs.com/article/61-does-xy-archive-have-a-dedicated-ip-address"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yacybot",
            "name": "yacybot",
            "slug": "yacybot",
            "url": "https://botcrawl.com/bots/yacybot/",
            "status": "active",
            "operator": "YaCy",
            "company": "YaCy",
            "family": "YaCy",
            "category": "search",
            "kind": "crawler",
            "purpose": "search",
            "identity_type": "official-documented",
            "user_agents": [
                "yacybot",
                "YaCy-Bot"
            ],
            "primary_user_agent": "yacybot",
            "robots_token": "yacybot",
            "verified": "unknown",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "official",
            "short_description": "The YaCy project documents yacybot as the user-agent used by YaCy web crawlers.",
            "verification_method": "Official YaCy bot documentation; verify by user-agent and behavior, but note that crawlers are run by YaCy users rather than a single central service.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: yacybot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"yacybot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yacy.net/bot.html"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yadirectfetcher",
            "name": "YaDirectFetcher",
            "slug": "yadirectfetcher",
            "url": "https://botcrawl.com/bots/yadirectfetcher/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YaDirectFetcher",
                "Mozilla/5.0 (compatible",
                "YaDirectFetcher/1.0",
                "Dyatel",
                "+http://yandex.com/bots)",
                "YaDirectFetcher Yandex robot"
            ],
            "primary_user_agent": "YaDirectFetcher",
            "robots_token": "YaDirectFetcher",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex advertising fetcher that downloads target pages of ads to check availability and topic.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YaDirectFetcher\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YaDirectFetcher\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yahoo-ad-monitoring",
            "name": "Yahoo Ad Monitoring",
            "slug": "yahoo-ad-monitoring",
            "url": "https://botcrawl.com/bots/yahoo-ad-monitoring/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Yahoo Ad monitoring",
                "Mozilla/5.0 (compatible",
                "https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html) yahoo.adquality.lwd.desktop/1565637561-0"
            ],
            "primary_user_agent": "Yahoo Ad monitoring",
            "robots_token": "Yahoo Ad monitoring",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo Ad Monitor monitors the contents of webpages where Yahoo! ads are served.",
            "short_description": "Yahoo Ad Monitor monitors the contents of webpages where Yahoo!",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Yahoo Ad monitoring\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Yahoo Ad monitoring\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.yahoo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yahoo-japan-seo-crawler",
            "name": "Yahoo Japan SEO Crawler",
            "slug": "yahoo-japan-seo-crawler",
            "url": "https://botcrawl.com/bots/yahoo-japan-seo-crawler/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "J-BRW",
                "Y!J-BRW/1.0 (https://support.yahoo-net.jp/PccSearch/s/article/H000007955)",
                "J-BRW/"
            ],
            "primary_user_agent": "J-BRW",
            "robots_token": "J-BRW",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo Japan search engine crawler for SEO analysis",
            "short_description": "Yahoo Japan search engine crawler for SEO analysis",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: J-BRW\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"J-BRW\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.yahoo-help.jp"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yahoo-japan-yj-asr",
            "name": "Yahoo Japan Y!J-ASR",
            "slug": "yahoo-japan-yj-asr",
            "url": "https://botcrawl.com/bots/yahoo-japan-yj-asr/",
            "status": "active",
            "operator": "Yahoo Japan Corporation",
            "company": "Yahoo Japan Corporation",
            "family": "Yahoo Japan Corporation",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Y!J-ASR",
                "Y!J-BRU/VSIDX",
                "Y!J-BRW",
                "Y!J-MMP/dscv",
                "Y!J-WSC",
                "Y!J-BRJ/YATS crawler",
                "Y!J-BRY/YATSH crawler",
                "Y!J-BRZ/YATSHA crawler",
                "YJ-SAD",
                "Y!J-B|YJ-SAD"
            ],
            "primary_user_agent": "Y!J-ASR",
            "robots_token": "Y!J-ASR",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo Japan Advertising Bot",
            "short_description": "Yahoo Japan Advertising Bot",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Y!J-ASR\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Y!J-ASR\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yahoo.jp/3BSZgF"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yahoo-link-preview",
            "name": "Yahoo Link Preview",
            "slug": "yahoo-link-preview",
            "url": "https://botcrawl.com/bots/yahoo-link-preview/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "Yahoo Link Preview",
                "Mozilla/5.0 (compatible",
                "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)",
                "Yahoo preview bot"
            ],
            "primary_user_agent": "Yahoo Link Preview",
            "robots_token": "Yahoo Link Preview",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo Link Preview's bot fetches data from URLs shared on Yahoo platforms.",
            "short_description": "Yahoo Link Preview's bot fetches data from URLs shared on Yahoo platforms.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Yahoo Link Preview\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Yahoo Link Preview\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yahoo-slurp",
            "name": "Yahoo Slurp",
            "slug": "yahoo-slurp",
            "url": "https://botcrawl.com/bots/yahoo-slurp/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Yahoo! Slurp",
                "Mozilla/5.0 (compatible",
                "http://help.yahoo.com/help/us/ysearch/slurp) sieve-gq1/1560642271-0"
            ],
            "primary_user_agent": "Yahoo! Slurp",
            "robots_token": "Yahoo! Slurp",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo! Slurp was the search engine crawler for Yahoo's search engine.",
            "short_description": "Yahoo!",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Yahoo! Slurp\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Yahoo! Slurp\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://help.yahoo.com/help/us/ysearch/slurp"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:52Z"
        },
        {
            "id": "yahoo-japan-j-dlc",
            "name": "Yahoo! JAPAN J-DLC",
            "slug": "yahoo-japan-j-dlc",
            "url": "https://botcrawl.com/bots/yahoo-japan-j-dlc/",
            "status": "active",
            "operator": "Yahoo! JAPAN",
            "company": "Yahoo! JAPAN",
            "family": "Yahoo! JAPAN",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "J-DLC",
                "Y!J-DLC/1.0 (https://support.yahoo-net.jp/PccSearch/s/article/H000007955)",
                "J-DLC/"
            ],
            "primary_user_agent": "J-DLC",
            "robots_token": "J-DLC",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo! JAPAN manages and operates a system that accesses web pages published on the Internet for the purpose of providing services, research, development, maintenance, etc.",
            "short_description": "Yahoo!",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: J-DLC\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"J-DLC\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.yahoo.co.jp/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yahoocachesystem",
            "name": "YahooCacheSystem",
            "slug": "yahoocachesystem",
            "url": "https://botcrawl.com/bots/yahoocachesystem/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YahooCacheSystem",
                "YahooWebServiceClient"
            ],
            "primary_user_agent": "YahooCacheSystem",
            "robots_token": "YahooCacheSystem",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "YahooCacheSystem caches website contents as part of the Yahoo! Search Service.",
            "short_description": "YahooCacheSystem caches website contents as part of the Yahoo!",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: YahooCacheSystem\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YahooCacheSystem\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.yahoo.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yahoomailproxy",
            "name": "YahooMailProxy",
            "slug": "yahoomailproxy",
            "url": "https://botcrawl.com/bots/yahoomailproxy/",
            "status": "active",
            "operator": "Yahoo",
            "company": "Yahoo",
            "family": "Yahoo",
            "category": "scraper",
            "kind": "preview",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YahooMailProxy",
                "https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html",
                "Yahoo Mail"
            ],
            "primary_user_agent": "YahooMailProxy",
            "robots_token": "YahooMailProxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yahoo Mail Proxy is a content fetch proxy that retrieves the page content of URLs that are embedded within emails sent to Yahoo Mail users. Having the content displayed through the proxy improves the security for email users while reducing overall network usage.",
            "short_description": "Yahoo Mail Proxy is a content fetch proxy that retrieves the page content of URLs that are embedded within emails sent to Yahoo Mail users.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: YahooMailProxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YahooMailProxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yak",
            "name": "YaK",
            "slug": "yak",
            "url": "https://botcrawl.com/bots/yak/",
            "status": "active",
            "operator": "Linkfluence",
            "company": "Linkfluence",
            "family": "Linkfluence",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "YaK",
                "Mozilla/5.0 (compatible",
                "+https://linkfluence.com)",
                "Linkfluence YaK"
            ],
            "primary_user_agent": "YaK",
            "robots_token": "YaK",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Linkfluence social media intelligence crawler for market research and brand monitoring.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://linkfluence.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YaK\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YaK\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://linkfluence.com"
                }
            ],
            "updated_at": "2026-04-01T00:55:26Z"
        },
        {
            "id": "yandexmetrika-yabs01",
            "name": "Yandex.Metrika",
            "slug": "yandexmetrika-yabs01",
            "url": "https://botcrawl.com/bots/yandexmetrika-yabs01/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "monitoring",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "YandexMetrika/2.0",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex Metrica Bot"
            ],
            "primary_user_agent": "YandexMetrika/2.0",
            "robots_token": "YandexMetrika/2.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex Metrica analytics bot for analytics-related crawling tasks.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexMetrika/2.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMetrika/2.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-04-01T00:55:32Z"
        },
        {
            "id": "yandexaccessibilitybot",
            "name": "YandexAccessibilityBot",
            "slug": "yandexaccessibilitybot",
            "url": "https://botcrawl.com/bots/yandexaccessibilitybot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexAccessibilityBot",
                "Mozilla/5.0 (compatible",
                "YandexAccessibilityBot/3.0",
                "+http://yandex.com/bots)",
                "Yandex accessibility checker"
            ],
            "primary_user_agent": "YandexAccessibilityBot",
            "robots_token": "YandexAccessibilityBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex crawler for accessibility checks.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexAccessibilityBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexAccessibilityBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexadditional",
            "name": "YandexAdditional",
            "slug": "yandexadditional",
            "url": "https://botcrawl.com/bots/yandexadditional/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "ai",
            "kind": "control-token",
            "purpose": "control-token",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexAdditional",
                "Mozilla/5.0 (compatible",
                "YandexAdditional/1.0",
                "+http://yandex.com/bots)",
                "YandexAdditional Yandex robot"
            ],
            "primary_user_agent": "YandexAdditional",
            "robots_token": "YandexAdditional",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot token that helps prevent indexed page content from appearing in Yandex AI responses.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexAdditional\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexAdditional\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexadditionalbot",
            "name": "YandexAdditionalBot",
            "slug": "yandexadditionalbot",
            "url": "https://botcrawl.com/bots/yandexadditionalbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "ai",
            "kind": "control-token",
            "purpose": "control-token",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexAdditionalBot",
                "Mozilla/5.0 (compatible",
                "YandexAdditionalBot/1.0",
                "+http://yandex.com/bots)",
                "YandexAdditionalBot Yandex robot"
            ],
            "primary_user_agent": "YandexAdditionalBot",
            "robots_token": "YandexAdditionalBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that helps process robots.txt so indexed pages can be excluded from Yandex AI responses.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexAdditionalBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexAdditionalBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexadnet",
            "name": "YandexAdNet",
            "slug": "yandexadnet",
            "url": "https://botcrawl.com/bots/yandexadnet/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexAdNet",
                "Mozilla/5.0 (compatible",
                "YandexAdNet/1.0",
                "+http://yandex.com/bots)",
                "YandexAdNet Yandex robot"
            ],
            "primary_user_agent": "YandexAdNet",
            "robots_token": "YandexAdNet",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Advertising Network robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexAdNet\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexAdNet\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexblogs",
            "name": "YandexBlogs",
            "slug": "yandexblogs",
            "url": "https://botcrawl.com/bots/yandexblogs/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexBlogs",
                "Mozilla/5.0 (compatible",
                "YandexBlogs/1.0",
                "+http://yandex.com/bots)",
                "YandexBlogs Yandex robot"
            ],
            "primary_user_agent": "YandexBlogs",
            "robots_token": "YandexBlogs",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex blog-search robot that indexes post comments.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexBlogs\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexBlogs\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexbot",
            "name": "YandexBot",
            "slug": "yandexbot",
            "url": "https://botcrawl.com/bots/yandexbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexBot",
                "Mozilla/5.0 (compatible",
                "YandexBot/3.0",
                "+http://yandex.com/bots)",
                "http://yandex.com/bots",
                "Yandex main crawler"
            ],
            "primary_user_agent": "YandexBot",
            "robots_token": "YandexBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex's primary search indexing robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexbot-mirrordetector",
            "name": "YandexBot MirrorDetector",
            "slug": "yandexbot-mirrordetector",
            "url": "https://botcrawl.com/bots/yandexbot-mirrordetector/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "YandexBot/3.0; MirrorDetector",
                "Mozilla/5.0 (compatible",
                "YandexBot/3.0",
                "MirrorDetector",
                "+http://yandex.com/bots)",
                "Yandex",
                "Official"
            ],
            "primary_user_agent": "YandexBot/3.0; MirrorDetector",
            "robots_token": "YandexBot/3.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex mirror-detection crawler used to find alternate site addresses.",
            "verification_method": "Official Yandex documentation says to verify authenticity with reverse DNS and confirm the hostname resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexBot/3.0\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-03-31T21:13:31Z"
        },
        {
            "id": "yandexcalendar",
            "name": "YandexCalendar",
            "slug": "yandexcalendar",
            "url": "https://botcrawl.com/bots/yandexcalendar/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexCalendar",
                "Mozilla/5.0 (compatible",
                "YandexCalendar/1.0",
                "+http://yandex.com/bots)",
                "Yandex calendar fetcher"
            ],
            "primary_user_agent": "YandexCalendar",
            "robots_token": "YandexCalendar",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Calendar fetcher for user-requested calendar files.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexCalendar\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexCalendar\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexcombot",
            "name": "YandexComBot",
            "slug": "yandexcombot",
            "url": "https://botcrawl.com/bots/yandexcombot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexComBot",
                "Mozilla/5.0 (compatible",
                "YandexComBot/1.0",
                "+http://yandex.com/bots)",
                "YandexComBot Yandex robot"
            ],
            "primary_user_agent": "YandexComBot",
            "robots_token": "YandexComBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that indexes content for search in languages other than Russian.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexComBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexComBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexdirect",
            "name": "YandexDirect",
            "slug": "yandexdirect",
            "url": "https://botcrawl.com/bots/yandexdirect/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexDirect",
                "Mozilla/5.0 (compatible",
                "YandexDirect/1.0",
                "+http://yandex.com/bots)",
                "YandexDirect Yandex robot"
            ],
            "primary_user_agent": "YandexDirect",
            "robots_token": "YandexDirect",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Direct robot that downloads site content for advertising topic matching.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexDirect\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexDirect\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexdirectdyn",
            "name": "YandexDirectDyn",
            "slug": "yandexdirectdyn",
            "url": "https://botcrawl.com/bots/yandexdirectdyn/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexDirectDyn",
                "Mozilla/5.0 (compatible",
                "YandexDirectDyn/1.0",
                "+http://yandex.com/bots)",
                "YandexDirectDyn Yandex robot"
            ],
            "primary_user_agent": "YandexDirectDyn",
            "robots_token": "YandexDirectDyn",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that generates dynamic banners.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexDirectDyn\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexDirectDyn\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexfavicons",
            "name": "YandexFavicons",
            "slug": "yandexfavicons",
            "url": "https://botcrawl.com/bots/yandexfavicons/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexFavicons",
                "Mozilla/5.0 (compatible",
                "YandexFavicons/1.0",
                "+http://yandex.com/bots)",
                "YandexFavicons Yandex robot"
            ],
            "primary_user_agent": "YandexFavicons",
            "robots_token": "YandexFavicons",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that downloads favicon files for search results.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexFavicons\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexFavicons\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandeximageresizer",
            "name": "YandexImageResizer",
            "slug": "yandeximageresizer",
            "url": "https://botcrawl.com/bots/yandeximageresizer/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexImageResizer",
                "Mozilla/5.0 (compatible",
                "YandexImageResizer/1.0",
                "+http://yandex.com/bots)",
                "YandexImageResizer Yandex robot"
            ],
            "primary_user_agent": "YandexImageResizer",
            "robots_token": "YandexImageResizer",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex mobile-device robot used for image resizing workflows.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexImageResizer\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexImageResizer\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandeximages",
            "name": "YandexImages",
            "slug": "yandeximages",
            "url": "https://botcrawl.com/bots/yandeximages/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexImages",
                "Mozilla/5.0 (compatible",
                "YandexImages/3.0",
                "+http://yandex.com/bots)",
                "Yandex image crawler"
            ],
            "primary_user_agent": "YandexImages",
            "robots_token": "YandexImages",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex crawler for image indexing.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexImages\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexImages\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexmarket",
            "name": "YandexMarket",
            "slug": "yandexmarket",
            "url": "https://botcrawl.com/bots/yandexmarket/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexMarket",
                "Mozilla/5.0 (compatible",
                "YandexMarket/1.0",
                "+http://yandex.com/bots)",
                "YandexMarket Yandex robot"
            ],
            "primary_user_agent": "YandexMarket",
            "robots_token": "YandexMarket",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Market robot used for Yandex Market product and catalog workflows.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexMarket\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMarket\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexmarket-2-0",
            "name": "YandexMarket 2.0",
            "slug": "yandexmarket-2-0",
            "url": "https://botcrawl.com/bots/yandexmarket-2-0/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "YandexMarket/2.0",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex",
                "Official"
            ],
            "primary_user_agent": "YandexMarket/2.0",
            "robots_token": "YandexMarket/2.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex Market 2.0 crawler variant used for marketplace operations.",
            "verification_method": "Official Yandex documentation says to verify authenticity with reverse DNS and confirm the hostname resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexMarket/2.0\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-03-31T21:13:32Z"
        },
        {
            "id": "yandexmedia",
            "name": "YandexMedia",
            "slug": "yandexmedia",
            "url": "https://botcrawl.com/bots/yandexmedia/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexMedia",
                "Mozilla/5.0 (compatible",
                "YandexMedia/1.0",
                "+http://yandex.com/bots)",
                "YandexMedia Yandex robot"
            ],
            "primary_user_agent": "YandexMedia",
            "robots_token": "YandexMedia",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that indexes multimedia data.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexMedia\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMedia\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexmetrika",
            "name": "YandexMetrika",
            "slug": "yandexmetrika",
            "url": "https://botcrawl.com/bots/yandexmetrika/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexMetrika",
                "Mozilla/5.0 (compatible",
                "YandexMetrika/1.0",
                "+http://yandex.com/bots)",
                "YandexMetrika Yandex robot"
            ],
            "primary_user_agent": "YandexMetrika",
            "robots_token": "YandexMetrika",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Metrica robot used for availability checks and Webvisor rendering workflows.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexMetrika\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMetrika\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexmetrika-3-0",
            "name": "YandexMetrika 3.0",
            "slug": "yandexmetrika-3-0",
            "url": "https://botcrawl.com/bots/yandexmetrika-3-0/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)",
                "Mozilla/5.0 (compatible",
                "YandexMetrika/3.0",
                "+http://yandex.com/bots)",
                "Yandex Metrica robot",
                "Official"
            ],
            "primary_user_agent": "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)",
            "robots_token": "YandexMetrika/3.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex Metrica crawler variant for analytics-related fetching.",
            "verification_method": "Official Yandex documentation says to verify authenticity with reverse DNS and confirm the hostname resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexMetrika/3.0\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-03-31T21:27:34Z"
        },
        {
            "id": "yandexmetrika-webvisor",
            "name": "YandexMetrika Webvisor",
            "slug": "yandexmetrika-webvisor",
            "url": "https://botcrawl.com/bots/yandexmetrika-webvisor/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "category": "unknown",
            "kind": "unknown",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "YandexMetrika/4.0",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex",
                "Official"
            ],
            "primary_user_agent": "YandexMetrika/4.0",
            "robots_token": "YandexMetrika/4.0",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex Metrica Webvisor bot that caches CSS for page rendering playback.",
            "verification_method": "Official Yandex documentation says to verify authenticity with reverse DNS and confirm the hostname resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexMetrika/4.0\nDisallow: /"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-03-31T21:13:33Z"
        },
        {
            "id": "yandexmobilebot",
            "name": "YandexMobileBot",
            "slug": "yandexmobilebot",
            "url": "https://botcrawl.com/bots/yandexmobilebot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexMobileBot",
                "Mozilla/5.0 (iPhone",
                "CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML",
                "like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible",
                "YandexMobileBot/3.0",
                "+http://yandex.com/bots)",
                "Yandex mobile crawler"
            ],
            "primary_user_agent": "YandexMobileBot",
            "robots_token": "YandexMobileBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex crawler for mobile-layout checks.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexMobileBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMobileBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexmobilescreenshotbot",
            "name": "YandexMobileScreenShotBot",
            "slug": "yandexmobilescreenshotbot",
            "url": "https://botcrawl.com/bots/yandexmobilescreenshotbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexMobileScreenShotBot",
                "Mozilla/5.0 (compatible",
                "YandexMobileScreenShotBot/1.0",
                "+http://yandex.com/bots)",
                "YandexMobileScreenShotBot Yandex robot"
            ],
            "primary_user_agent": "YandexMobileScreenShotBot",
            "robots_token": "YandexMobileScreenShotBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that takes mobile page screenshots.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexMobileScreenShotBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexMobileScreenShotBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexnews",
            "name": "YandexNews",
            "slug": "yandexnews",
            "url": "https://botcrawl.com/bots/yandexnews/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "YandexNews/3.0",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex News Bot"
            ],
            "primary_user_agent": "YandexNews/3.0",
            "robots_token": "YandexNews/3.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex News crawler that indexes news articles for Yandex News.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexNews/3.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexNews/3.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-04-01T00:55:33Z"
        },
        {
            "id": "yandexontodb",
            "name": "YandexOntoDB",
            "slug": "yandexontodb",
            "url": "https://botcrawl.com/bots/yandexontodb/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexOntoDB",
                "Mozilla/5.0 (compatible",
                "YandexOntoDB/1.0",
                "+http://yandex.com/bots)",
                "YandexOntoDB Yandex robot"
            ],
            "primary_user_agent": "YandexOntoDB",
            "robots_token": "YandexOntoDB",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex information-card robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexOntoDB\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexOntoDB\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexontodbapi",
            "name": "YandexOntoDBAPI",
            "slug": "yandexontodbapi",
            "url": "https://botcrawl.com/bots/yandexontodbapi/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexOntoDBAPI",
                "Mozilla/5.0 (compatible",
                "YandexOntoDBAPI/1.0",
                "+http://yandex.com/bots)",
                "YandexOntoDBAPI Yandex robot"
            ],
            "primary_user_agent": "YandexOntoDBAPI",
            "robots_token": "YandexOntoDBAPI",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex information-card robot that downloads dynamic data.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexOntoDBAPI\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexOntoDBAPI\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexontodbparser",
            "name": "YandexOntoDBParser",
            "slug": "yandexontodbparser",
            "url": "https://botcrawl.com/bots/yandexontodbparser/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "YandexOntoDBParser/1.1",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex Onto Bot"
            ],
            "primary_user_agent": "YandexOntoDBParser/1.1",
            "robots_token": "YandexOntoDBParser/1.1",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex ontology database parser bot used for structured data extraction.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexOntoDBParser/1.1\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexOntoDBParser/1.1\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-04-01T00:55:34Z"
        },
        {
            "id": "yandexpagechecker",
            "name": "YandexPagechecker",
            "slug": "yandexpagechecker",
            "url": "https://botcrawl.com/bots/yandexpagechecker/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexPagechecker",
                "Mozilla/5.0 (compatible",
                "YandexPagechecker/1.0",
                "+http://yandex.com/bots)",
                "YandexPagechecker Yandex robot"
            ],
            "primary_user_agent": "YandexPagechecker",
            "robots_token": "YandexPagechecker",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex structured-data validation robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexPagechecker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexPagechecker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexpartner",
            "name": "YandexPartner",
            "slug": "yandexpartner",
            "url": "https://botcrawl.com/bots/yandexpartner/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexPartner",
                "Mozilla/5.0 (compatible",
                "YandexPartner/1.0",
                "+http://yandex.com/bots)",
                "YandexPartner Yandex robot"
            ],
            "primary_user_agent": "YandexPartner",
            "robots_token": "YandexPartner",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex partner robot that downloads information about partner-site content.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexPartner\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexPartner\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexrca",
            "name": "YandexRCA",
            "slug": "yandexrca",
            "url": "https://botcrawl.com/bots/yandexrca/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "preview",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexRCA",
                "Mozilla/5.0 (compatible",
                "YandexRCA/1.0",
                "+http://yandex.com/bots)",
                "YandexRCA Yandex robot"
            ],
            "primary_user_agent": "YandexRCA",
            "robots_token": "YandexRCA",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that collects data for generating previews.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexRCA\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexRCA\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexrenderresourcesbot",
            "name": "YandexRenderResourcesBot",
            "slug": "yandexrenderresourcesbot",
            "url": "https://botcrawl.com/bots/yandexrenderresourcesbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexRenderResourcesBot",
                "Mozilla/5.0 (compatible",
                "YandexRenderResourcesBot/1.0",
                "+http://yandex.com/bots)",
                "YandexRenderResourcesBot Yandex robot"
            ],
            "primary_user_agent": "YandexRenderResourcesBot",
            "robots_token": "YandexRenderResourcesBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that loads resources for JavaScript page rendering.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexRenderResourcesBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexRenderResourcesBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:55Z"
        },
        {
            "id": "yandexscreenshotbot",
            "name": "YandexScreenshotBot",
            "slug": "yandexscreenshotbot",
            "url": "https://botcrawl.com/bots/yandexscreenshotbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexScreenshotBot",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/W.X.Y.Z Safari/537.36 (compatible",
                "YandexScreenshotBot/3.0",
                "+http://yandex.com/bots)",
                "YandexScreenshotBot Yandex robot"
            ],
            "primary_user_agent": "YandexScreenshotBot",
            "robots_token": "YandexScreenshotBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that takes screenshots of pages.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexScreenshotBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexScreenshotBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexsearchshop",
            "name": "YandexSearchShop",
            "slug": "yandexsearchshop",
            "url": "https://botcrawl.com/bots/yandexsearchshop/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexSearchShop",
                "Mozilla/5.0 (compatible",
                "YandexSearchShop/1.0",
                "+http://yandex.com/bots)",
                "YandexSearchShop Yandex robot"
            ],
            "primary_user_agent": "YandexSearchShop",
            "robots_token": "YandexSearchShop",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that downloads product catalogs in YML files by user request.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexSearchShop\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexSearchShop\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexsitelinks",
            "name": "YandexSitelinks",
            "slug": "yandexsitelinks",
            "url": "https://botcrawl.com/bots/yandexsitelinks/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexSitelinks",
                "Mozilla/5.0 (compatible",
                "Dyatel",
                "+http://yandex.com/bots)",
                "YandexSitelinks Yandex robot"
            ],
            "primary_user_agent": "YandexSitelinks",
            "robots_token": "YandexSitelinks",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that checks availability of pages used as sitelinks.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexSitelinks\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexSitelinks\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexspravbot",
            "name": "YandexSpravBot",
            "slug": "yandexspravbot",
            "url": "https://botcrawl.com/bots/yandexspravbot/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexSpravBot",
                "Mozilla/5.0 (compatible",
                "YandexSpravBot/1.0",
                "+http://yandex.com/bots)",
                "YandexSpravBot Yandex robot"
            ],
            "primary_user_agent": "YandexSpravBot",
            "robots_token": "YandexSpravBot",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Business robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexSpravBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexSpravBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandextracker",
            "name": "YandexTracker",
            "slug": "yandextracker",
            "url": "https://botcrawl.com/bots/yandextracker/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "monitoring",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexTracker",
                "Mozilla/5.0 (compatible",
                "YandexTracker/1.0",
                "+http://yandex.com/bots)",
                "YandexTracker Yandex robot"
            ],
            "primary_user_agent": "YandexTracker",
            "robots_token": "YandexTracker",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Tracker robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexTracker\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexTracker\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexturbo",
            "name": "YandexTurbo",
            "slug": "yandexturbo",
            "url": "https://botcrawl.com/bots/yandexturbo/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "unknown",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "YandexTurbo/1.0",
                "Mozilla/5.0 (compatible",
                "+http://yandex.com/bots)",
                "Yandex Turbo Bot"
            ],
            "primary_user_agent": "YandexTurbo/1.0",
            "robots_token": "YandexTurbo/1.0",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "neutral",
            "recommended_action": "allow",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yandex Turbo bot used for creating and serving Turbo pages for faster mobile loading.",
            "verification_method": "Verify the exact user-agent and confirm reverse DNS resolves under yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexTurbo/1.0\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexTurbo/1.0\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "updated_at": "2026-04-01T00:55:34Z"
        },
        {
            "id": "yandexuserproxy",
            "name": "YandexUserproxy",
            "slug": "yandexuserproxy",
            "url": "https://botcrawl.com/bots/yandexuserproxy/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "user-triggered",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexUserproxy",
                "Mozilla/5.0 (compatible",
                "YandexUserproxy/1.0",
                "+http://yandex.com/bots)",
                "YandexUserproxy Yandex robot"
            ],
            "primary_user_agent": "YandexUserproxy",
            "robots_token": "YandexUserproxy",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex proxy robot for user actions on Yandex services, including button clicks and online translation.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexUserproxy\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexUserproxy\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexverticals",
            "name": "YandexVerticals",
            "slug": "yandexverticals",
            "url": "https://botcrawl.com/bots/yandexverticals/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexVerticals",
                "Mozilla/5.0 (compatible",
                "YandexVerticals/1.0",
                "+http://yandex.com/bots)",
                "YandexVerticals Yandex robot"
            ],
            "primary_user_agent": "YandexVerticals",
            "robots_token": "YandexVerticals",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Verticals robot for classifieds products such as Auto.ru, Yandex Realty, Yandex Jobs, and Yandex Reviews.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexVerticals\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexVerticals\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexvertis",
            "name": "YandexVertis",
            "slug": "yandexvertis",
            "url": "https://botcrawl.com/bots/yandexvertis/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexVertis",
                "Mozilla/5.0 (compatible",
                "YandexVertis/1.0",
                "+http://yandex.com/bots)",
                "YandexVertis Yandex robot"
            ],
            "primary_user_agent": "YandexVertis",
            "robots_token": "YandexVertis",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex search-verticals robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexVertis\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexVertis\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexvideo",
            "name": "YandexVideo",
            "slug": "yandexvideo",
            "url": "https://botcrawl.com/bots/yandexvideo/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexVideo",
                "Mozilla/5.0 (compatible",
                "YandexVideo/3.0",
                "+http://yandex.com/bots)",
                "Yandex video crawler"
            ],
            "primary_user_agent": "YandexVideo",
            "robots_token": "YandexVideo",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex crawler for video indexing.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YandexVideo\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexVideo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yandexvideoparser",
            "name": "YandexVideoParser",
            "slug": "yandexvideoparser",
            "url": "https://botcrawl.com/bots/yandexvideoparser/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexVideoParser",
                "Mozilla/5.0 (compatible",
                "YandexVideoParser/1.0",
                "+http://yandex.com/bots)",
                "YandexVideoParser Yandex robot"
            ],
            "primary_user_agent": "YandexVideoParser",
            "robots_token": "YandexVideoParser",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex robot that parses videos for Yandex video search.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexVideoParser\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexVideoParser\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yandexwebmaster",
            "name": "YandexWebmaster",
            "slug": "yandexwebmaster",
            "url": "https://botcrawl.com/bots/yandexwebmaster/",
            "status": "active",
            "operator": "Yandex",
            "company": "Yandex",
            "family": "Yandex",
            "category": "search",
            "kind": "fetcher",
            "purpose": "site-owner-fetch",
            "identity_type": "official-documented",
            "user_agents": [
                "YandexWebmaster",
                "Mozilla/5.0 (compatible",
                "YandexWebmaster/1.0",
                "+http://yandex.com/bots)",
                "YandexWebmaster Yandex robot"
            ],
            "primary_user_agent": "YandexWebmaster",
            "robots_token": "YandexWebmaster",
            "verified": "yes",
            "respects_robots": "yes",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "high",
            "source_type": "official",
            "short_description": "Yandex Webmaster robot.",
            "verification_method": "Verify with reverse DNS and forward DNS checks; valid Yandex hosts end in yandex.ru, yandex.net, or yandex.com.",
            "spoofing_risk": "Yandex user-agent strings are often spoofed. Verify the source host with reverse DNS and forward DNS before allow-listing.",
            "rules": {
                "robots": "User-agent: YandexWebmaster\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YandexWebmaster\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yandex.com/support/webmaster/en/robot-workings/check-yandex-robots"
                }
            ],
            "last_verified": "2026-04-29",
            "last_checked": "2026-04-29",
            "updated_at": "2026-04-30T09:15:56Z"
        },
        {
            "id": "yeti",
            "name": "Yeti",
            "slug": "yeti",
            "url": "https://botcrawl.com/bots/yeti/",
            "status": "active",
            "operator": "Naver",
            "company": "Naver",
            "family": "Naver",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "Yeti",
                "Mozilla/5.0 (compatible",
                "Yeti/1.1",
                "+http://naver.me/spd)",
                "Mozilla/5.0 (Windows NT 6.1",
                "Win64",
                "x64) AppleWebKit/537.36 (KHTML",
                "like Gecko) Chrome/63.0.3239.0 Safari/537.36 (compatible",
                "Yeti/"
            ],
            "primary_user_agent": "Yeti",
            "robots_token": "Yeti",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yeti is the web crawler for Naver, a South Korean search engine. It indexes websites to provide search results and power other services on the Naver platform.",
            "short_description": "Yeti is the web crawler for Naver, a South Korean search engine.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Yeti\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Yeti\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://help.naver.com/support/contents/contents.help?serviceNo=19634&amp;categoryNo=19668&amp;_website_robots"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yext-inc",
            "name": "Yext Inc",
            "slug": "yext-inc",
            "url": "https://botcrawl.com/bots/yext-inc/",
            "status": "active",
            "operator": "Yext",
            "company": "Yext",
            "family": "Yext",
            "category": "scraper",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YextBot",
                "Mozilla/5.0 (X11",
                "Linux x86_64) AppleWebKit/537.36 (KHTML",
                "like Gecko) HeadlessChrome/87.0.4280.88 YextBot/Java Safari/537.36"
            ],
            "primary_user_agent": "YextBot",
            "robots_token": "YextBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "The Yext Crawler provides Yext customers with a tool to retrieve data from their own websites.",
            "short_description": "The Yext Crawler provides Yext customers with a tool to retrieve data from their own websites.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: YextBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YextBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.yext.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "ygs-group-falconer-scraper",
            "name": "YGS Group Falconer Scraper",
            "slug": "ygs-group-falconer-scraper",
            "url": "https://botcrawl.com/bots/ygs-group-falconer-scraper/",
            "status": "active",
            "operator": "YGS Group",
            "company": "YGS Group",
            "family": "YGS Group Falconer Scraper",
            "category": "ai",
            "kind": "crawler",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "ygs-scraper-bot",
                "ygs-scraper-bot/1.0"
            ],
            "primary_user_agent": "ygs-scraper-bot",
            "robots_token": "ygs-scraper-bot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "A content based scraper only for partners we collaborate with who have given permission to have their website scraped.",
            "short_description": "A content based scraper only for partners we collaborate with who have given permission to have their website scraped.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ygs-scraper-bot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ygs-scraper-bot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://ygscontentlicensing.com/falconer/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "yisouspider",
            "name": "YisouSpider",
            "slug": "yisouspider",
            "url": "https://botcrawl.com/bots/yisouspider/",
            "status": "active",
            "operator": "Yisou",
            "company": "Yisou",
            "family": "Yisou",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YisouSpider"
            ],
            "primary_user_agent": "YisouSpider",
            "robots_token": "YisouSpider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yisou crawler token surfaced in sogou robots.txt.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YisouSpider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YisouSpider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:49Z"
        },
        {
            "id": "yokoy-group-webhooks",
            "name": "Yokoy Group Webhooks",
            "slug": "yokoy-group-webhooks",
            "url": "https://botcrawl.com/bots/yokoy-group-webhooks/",
            "status": "active",
            "operator": "Yokoy Group AG",
            "company": "Yokoy Group AG",
            "family": "Yokoy Group AG",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "YokoyGroupAG",
                "YokoyGroupAG/1.0",
                "YokoyGroupAG/"
            ],
            "primary_user_agent": "YokoyGroupAG",
            "robots_token": "YokoyGroupAG",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Yokoy is a spend management SAAS solution. Webhooks generate requests to book expenses or invoices to customer's ERP system whenever the processing and approval process has been completed in Yokoy.",
            "short_description": "Yokoy is a spend management SAAS solution.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: YokoyGroupAG\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YokoyGroupAG\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://yokoy.io"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "youbot",
            "name": "YouBot",
            "slug": "youbot",
            "url": "https://botcrawl.com/bots/youbot/",
            "status": "active",
            "operator": "You.com",
            "company": "You.com",
            "family": "YouBot",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "YouBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko",
                "compatible",
                "YouBot/1.0",
                "+https://docs.you.com/youbot",
                "env:prod) Chrome/X.X.X.X Safari/537.36"
            ],
            "primary_user_agent": "YouBot",
            "robots_token": "YouBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "You.com Search Engine Crawler",
            "short_description": "You.com Search Engine Crawler",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: YouBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YouBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://docs.you.com/youbot"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "youdaobot",
            "name": "YoudaoBot",
            "slug": "youdaobot",
            "url": "https://botcrawl.com/bots/youdaobot/",
            "status": "active",
            "operator": "Youdao",
            "company": "Youdao",
            "family": "Youdao",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YoudaoBot"
            ],
            "primary_user_agent": "YoudaoBot",
            "robots_token": "YoudaoBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Youdao crawler token surfaced in sogou robots.txt.",
            "verification_method": "This token is surfaced in a Sogou-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YoudaoBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YoudaoBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://weixin.sogou.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:48Z"
        },
        {
            "id": "yyspider",
            "name": "YYspider",
            "slug": "yyspider",
            "url": "https://botcrawl.com/bots/yyspider/",
            "status": "active",
            "operator": "YY",
            "company": "YY",
            "family": "YY",
            "category": "search",
            "kind": "unknown",
            "purpose": "general-crawl",
            "identity_type": "unknown",
            "user_agents": [
                "YYspider"
            ],
            "primary_user_agent": "YYspider",
            "robots_token": "YYspider",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Yy crawler token surfaced in baidu property robots.txt.",
            "verification_method": "This token is surfaced in a Baidu-owned robots.txt file, but the source used for this entry does not publish a dedicated verification method. Confirm behavior conservatively and do not rely on the user-agent string alone.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: YYspider\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"YYspider\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://map.baidu.com/robots.txt"
                }
            ],
            "updated_at": "2026-04-01T01:04:50Z"
        },
        {
            "id": "zanistabot",
            "name": "ZanistaBot",
            "slug": "zanistabot",
            "url": "https://botcrawl.com/bots/zanistabot/",
            "status": "active",
            "operator": "Zanista",
            "company": "Zanista",
            "family": "Zanista",
            "category": "ai",
            "kind": "unknown",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "ZanistaBot",
                "Mozilla/5.0 (compatible",
                "ZanistaBot/1.0",
                "+https://zanista.ai)",
                "Zanista Bot"
            ],
            "primary_user_agent": "ZanistaBot",
            "robots_token": "ZanistaBot",
            "verified": "unknown",
            "respects_robots": "unknown",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "unknown",
            "source_type": "unknown",
            "short_description": "Zanista AI search crawler that indexes web content for its AI search platform.",
            "verification_method": "No official IP-range or reverse-DNS verification method is documented here. Match the exact user-agent and request behavior against the source documentation at https://zanista.ai.",
            "spoofing_risk": "User-agent strings can be spoofed. Verify the source before allow-listing or trusting a match.",
            "rules": {
                "robots": "User-agent: ZanistaBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ZanistaBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zanista.ai"
                }
            ],
            "updated_at": "2026-04-01T00:55:19Z"
        },
        {
            "id": "zapier",
            "name": "Zapier",
            "slug": "zapier",
            "url": "https://botcrawl.com/bots/zapier/",
            "status": "active",
            "operator": "Zapier Inc.",
            "company": "Zapier Inc.",
            "family": "Zapier Inc.",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Zapier"
            ],
            "primary_user_agent": "Zapier",
            "robots_token": "Zapier",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Easy automation for busy people. Zapier moves info between your web apps automatically, so you can focus on your most important work.",
            "short_description": "Easy automation for busy people.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Zapier\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Zapier\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zapier.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "zendesk-webhook",
            "name": "Zendesk Webhook",
            "slug": "zendesk-webhook",
            "url": "https://botcrawl.com/bots/zendesk-webhook/",
            "status": "active",
            "operator": "Zendesk",
            "company": "Zendesk",
            "family": "Zendesk",
            "category": "unknown",
            "kind": "fetcher",
            "purpose": "unknown",
            "identity_type": "unknown",
            "user_agents": [
                "Zendesk Webhook"
            ],
            "primary_user_agent": "Zendesk Webhook",
            "robots_token": "Zendesk Webhook",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Webhooks for development of Zendesk ticketing system and apps.",
            "short_description": "Webhooks for development of Zendesk ticketing system and apps.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Zendesk Webhook\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Zendesk Webhook\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://support.zendesk.com/"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "zipchatbot",
            "name": "ZipchatBot",
            "slug": "zipchatbot",
            "url": "https://botcrawl.com/bots/zipchatbot/",
            "status": "active",
            "operator": "Zipchat Inc",
            "company": "Zipchat Inc",
            "family": "Zipchat",
            "category": "ai",
            "kind": "agent",
            "purpose": "user-triggered",
            "identity_type": "unknown",
            "user_agents": [
                "ZipchatBot",
                "Mozilla/5.0 AppleWebKit/537.36 (KHTML",
                "like Gecko) compatible",
                "ZipchatBot/1.0",
                "+https://zipchat.ai/bot"
            ],
            "primary_user_agent": "ZipchatBot",
            "robots_token": "ZipchatBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "neutral",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "AI assistant for e-commerce stores. Only crawls sites upon request from the site owner.",
            "short_description": "AI assistant for e-commerce stores. Only crawls sites upon request from the site owner.",
            "verification_method": "Validate the published identifier against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or stronger verification when available.",
            "rules": {
                "robots": "User-agent: ZipchatBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ZipchatBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://www.zipchat.ai/bot"
                }
            ],
            "last_verified": "2026-04-19",
            "last_checked": "2026-04-19",
            "updated_at": "2026-04-30T09:15:54Z"
        },
        {
            "id": "zoominfo",
            "name": "ZoomInfo",
            "slug": "zoominfo",
            "url": "https://botcrawl.com/bots/zoominfo/",
            "status": "active",
            "operator": "ZoomInfo",
            "company": "ZoomInfo",
            "family": "ZoomInfo",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "ZoominfoBot",
                "ZoominfoBot (zoominfobot at zoominfo dot com)"
            ],
            "primary_user_agent": "ZoominfoBot",
            "robots_token": "ZoominfoBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "Zoominfobot is an indexing robot for a web search engine, similar to Google. Created by Zoom Information Inc.(www.zoominfo.com), Zoominfobot’s patented technology continually scans millions of corporate websites, press releases, electronic news services, SEC filings and other online sources. Using advanced natural language processing algorithms, ZoomInfo has created a next generation search engine focused on finding pages with information about businesses and business professionals.",
            "short_description": "Zoominfobot is an indexing robot for a web search engine, similar to Google.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ZoominfoBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ZoominfoBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zoominfo.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "zumbot",
            "name": "ZumBot",
            "slug": "zumbot",
            "url": "https://botcrawl.com/bots/zumbot/",
            "status": "active",
            "operator": "Zum Internet Corp",
            "company": "Zum Internet Corp",
            "family": "Zum Internet Corp",
            "category": "search",
            "kind": "crawler",
            "purpose": "indexing",
            "identity_type": "unknown",
            "user_agents": [
                "ZumBot",
                "Mozilla/5.0 (compatible",
                "ZumBot/1.0",
                "http://help.zum.com/inquiry)"
            ],
            "primary_user_agent": "ZumBot",
            "robots_token": "ZumBot",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "depends",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "ZumBot is a web crawler that indexes webpages for Zum Open Internet Search.",
            "short_description": "ZumBot is a web crawler that indexes webpages for Zum Open Internet Search.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: ZumBot\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"ZumBot\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "http://www.zuminternet.com/en"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        },
        {
            "id": "zvelo",
            "name": "Zvelo",
            "slug": "zvelo",
            "url": "https://botcrawl.com/bots/zvelo/",
            "status": "active",
            "operator": "Zvelo",
            "company": "Zvelo",
            "family": "Zvelo",
            "category": "monitoring",
            "kind": "monitor",
            "purpose": "site-owner-fetch",
            "identity_type": "unknown",
            "user_agents": [
                "Zvelo"
            ],
            "primary_user_agent": "Zvelo",
            "robots_token": "Zvelo",
            "verified": "yes",
            "respects_robots": "no",
            "risk": "safe",
            "recommended_action": "allow",
            "confidence": "medium",
            "source_type": "directory",
            "common_use": "zvelo fetches content for web categorization.",
            "short_description": "zvelo fetches content for web categorization.",
            "verification_method": "Validate the identifying user-agent or signature against the operator documentation before creating hard allow rules.",
            "spoofing_risk": "User-agent strings can be spoofed. For allow-listing or low-friction rules, pair the published identifier with operator documentation or cryptographic verification when available.",
            "rules": {
                "robots": "User-agent: Zvelo\nDisallow: /",
                "cloudflare": "(http.user_agent contains \"Zvelo\")"
            },
            "sources": [
                {
                    "type": "source",
                    "url": "https://zvelo.com"
                }
            ],
            "last_verified": "2026-04-01",
            "last_checked": "2026-04-01",
            "updated_at": "2026-04-30T09:15:53Z"
        }
    ]
}