User-agent: * Allow: / Disallow: /admin/ Disallow: /includes/ Disallow: /config/ Disallow: /search?* Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /login?redirect= Disallow: /register?redirect= Disallow: /login?business_id= Disallow: /login?redirect_source= Disallow: /login?action=claim Disallow: /register?business_id= Disallow: /register?redirect_source= Disallow: /register?action=claim # ============================================ # BLOCK HTTP URLs (CRITICAL FIX) # ============================================ # Disallow: http://joobaa.com/ # Disallow: http://www.joobaa.com/ # ============================================ # BLOCK WRONG PINCODE URLs # ============================================ # Block URLs with known wrong pincodes (like 9900, 0000, etc.) Disallow: /*-9900/ Disallow: /*-0000/ Disallow: /*-9999/ Disallow: /*-000000/ Disallow: /*-999999/ # Crawl delay for bots Crawl-delay: 1 # ============================================ # BUSINESS SITEMAPS (ALL HTTPS) # ============================================ Sitemap: https://www.joobaa.com/sitemap.xml Sitemap: https://www.joobaa.com/sitemap-premium.xml Sitemap: https://www.joobaa.com/sitemap-featured.xml Sitemap: https://www.joobaa.com/sitemap-basic.xml Sitemap: https://www.joobaa.com/sitemap-pincode.xml Sitemap: https://www.joobaa.com/sitemap-cities.xml Sitemap: https://www.joobaa.com/sitemap-states.xml Sitemap: https://www.joobaa.com/sitemap-districts.xml Sitemap: https://www.joobaa.com/sitemap-categories.xml Sitemap: https://www.joobaa.com/sitemap-subcategories.xml # ============================================ # BLOG SITEMAPS # ============================================ Sitemap: https://www.joobaa.com/sitemap-blogs.xml Sitemap: https://www.joobaa.com/sitemap-blogs-categories.xml Sitemap: https://www.joobaa.com/sitemap-blogs-subcategories.xml # ============================================ # OTHER SITEMAPS # ============================================ Sitemap: https://www.joobaa.com/sitemap-pages.xml # Legacy sitemaps Sitemap: https://www.joobaa.com/sitemap-business.xml Sitemap: https://www.joobaa.com/sitemap.php # ============================================ # YANDEX SPECIFIC (ALLOW ALL - RUSSIA) # ============================================ # Only Yandex user-agent gets the Host directive User-agent: Yandex Allow: / Disallow: /admin/ Disallow: /includes/ Disallow: /config/ Crawl-delay: 1 # Other Yandex bots don't need Host directive User-agent: YandexBot Allow: / Disallow: /admin/ Disallow: /includes/ Disallow: /config/ Crawl-delay: 1 User-agent: YandexImages Allow: / Disallow: /admin/ Disallow: /includes/ Crawl-delay: 2 # ============================================ # GOOGLEBOT SPECIFIC (ALLOW PAGINATION) # ============================================ User-agent: Googlebot Allow: / Disallow: /admin/ Disallow: /includes/ Disallow: /config/ Disallow: /search?* # EXPLICITLY ALLOW PAGINATION FOR GOOGLEBOT Allow: /*?page=* Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /login?redirect= Disallow: /register?redirect= Disallow: /login?business_id= Disallow: /login?redirect_source= Disallow: /login?action=claim Disallow: /register?business_id= Disallow: /register?redirect_source= Disallow: /register?action=claim # Disallow: http://joobaa.com/ # Disallow: http://www.joobaa.com/ Crawl-delay: 1 # Googlebot-Image User-agent: Googlebot-Image Allow: / Disallow: /admin/ Disallow: /includes/ Crawl-delay: 2 # ============================================ # BINGBOT SPECIFIC (ALLOW PAGINATION) # ============================================ User-agent: Bingbot Allow: / Disallow: /admin/ Disallow: /includes/ Disallow: /config/ Disallow: /search?* # EXPLICITLY ALLOW PAGINATION FOR BINGBOT Allow: /*?page=* Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /login?redirect= Disallow: /register?redirect= Disallow: /login?business_id= Disallow: /login?redirect_source= Disallow: /login?action=claim Disallow: /register?business_id= Disallow: /register?redirect_source= Disallow: /register?action=claim # Disallow: http://joobaa.com/ # Disallow: http://www.joobaa.com/ Crawl-delay: 2 # ============================================ # BAD BOTS TO BLOCK # ============================================ User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: MegaIndex Disallow: / User-agent: BUbiNG Disallow: / User-agent: CCBot Disallow: / User-agent: ia_archiver Disallow: / # ============================================ # SOCIAL MEDIA BOTS (ALLOW) # ============================================ User-agent: FacebookExternalHit Allow: / Crawl-delay: 5 User-agent: Twitterbot Allow: / Crawl-delay: 5 User-agent: LinkedInBot Allow: / Crawl-delay: 5 User-agent: Pinterest Allow: / Crawl-delay: 5 # ============================================ # AI/SCRAPER BOTS TO BLOCK # ============================================ User-agent: ChatGPT-User Disallow: / User-agent: GPTBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: Anthropic-ai Disallow: / # ============================================ # ARCHIVE BOTS TO BLOCK # ============================================ User-agent: archive.org_bot Disallow: / User-agent: ia_archiver Disallow: /