summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/middleware/extraheaders.go7
-rw-r--r--internal/web/robots.go9
2 files changed, 16 insertions, 0 deletions
diff --git a/internal/middleware/extraheaders.go b/internal/middleware/extraheaders.go
index c75b65551..fb91bcc93 100644
--- a/internal/middleware/extraheaders.go
+++ b/internal/middleware/extraheaders.go
@@ -44,5 +44,12 @@ func ExtraHeaders() gin.HandlerFunc {
//
// See: https://github.com/patcg-individual-drafts/topics
c.Header("Permissions-Policy", "browsing-topics=()")
+
+ // Some AI scrapers respect the following tags to opt-out
+ // of their crawling and datasets.
+ c.Header("X-Robots-Tag", "noimageai")
+ // c.Header calls .Set(), but we want to emit the header
+ // twice, not override it.
+ c.Writer.Header().Add("X-Robots-Tag", "noai")
}
}
diff --git a/internal/web/robots.go b/internal/web/robots.go
index 39708eb55..3309de97c 100644
--- a/internal/web/robots.go
+++ b/internal/web/robots.go
@@ -43,15 +43,24 @@ User-agent: Claude-Web
User-agent: cohere-ai
User-agent: Diffbot
User-agent: FacebookBot
+User-agent: facebookexternalhit
User-agent: FriendlyCrawler
User-agent: Google-Extended
User-agent: GoogleOther
+User-agent: GoogleOther-Image
+User-agent: GoogleOther-Video
User-agent: GPTBot
User-agent: ImagesiftBot
User-agent: img2dataset
+User-agent: Meta-ExternalAgent
+User-agent: OAI-SearchBot
User-agent: omgili
User-agent: omgilibot
User-agent: PerplexityBot
+User-agent: PetalBot
+User-agent: Scrapy
+User-agent: Timpibot
+User-agent: VelenPublicWebCrawler
User-agent: YouBot
Disallow: /