From fd670c6a279e2aa54822546536dbf88b45a93051 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 24 Feb 2025 11:17:18 +0100 Subject: [feature] Use ETag for robots.txt to prevent mishaps (#3829) * [feature] Use ETag for robots.txt to prevent mishaps * check incoming if-none-match header --- internal/api/robots.go | 2 +- internal/api/robots/robots.go | 20 ++++++++++++++++++++ internal/api/util/robots.go | 5 +++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/internal/api/robots.go b/internal/api/robots.go index 3ed8282f5..29e2823ef 100644 --- a/internal/api/robots.go +++ b/internal/api/robots.go @@ -39,7 +39,7 @@ func (rb *Robots) Route(r *router.Router, m ...gin.HandlerFunc) { // https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4 robotsGroup.Use( middleware.CacheControl(middleware.CacheControlConfig{ - Directives: []string{"public", "max-age=86400"}, + Directives: []string{"public", "no-cache"}, Vary: []string{"Accept-Encoding"}, }), ) diff --git a/internal/api/robots/robots.go b/internal/api/robots/robots.go index 98db4682d..b72a01ed6 100644 --- a/internal/api/robots/robots.go +++ b/internal/api/robots/robots.go @@ -49,9 +49,29 @@ func (m *Module) Route(attachHandler func(method string, path string, f ...gin.H } func (m *Module) robotsGETHandler(c *gin.Context) { + const ETag = "\"" + apiutil.RobotsTxtETag + "\"" + c.Header("ETag", ETag) + + if c.Request.Header.Get("If-None-Match") == ETag { + // Cached. + c.AbortWithStatus(http.StatusNotModified) + return + } + + // Not cached, serve. c.String(http.StatusOK, apiutil.RobotsTxt) } func (m *Module) robotsGETHandlerDisallowNodeInfo(c *gin.Context) { + const ETag = "\"" + apiutil.RobotsTxtDisallowNodeInfoETag + "\"" + c.Header("ETag", ETag) + + if c.Request.Header.Get("If-None-Match") == ETag { + // Cached. + c.AbortWithStatus(http.StatusNotModified) + return + } + + // Not cached, serve. c.String(http.StatusOK, apiutil.RobotsTxtDisallowNodeInfo) } diff --git a/internal/api/util/robots.go b/internal/api/util/robots.go index 49fb04561..32d510edd 100644 --- a/internal/api/util/robots.go +++ b/internal/api/util/robots.go @@ -130,4 +130,9 @@ Disallow: /.well-known/webfinger Disallow: /.well-known/nodeinfo Disallow: /nodeinfo/ ` + + // MD5 hash of basic robots.txt. + RobotsTxtETag = `ce6729aacbb16fae3628210c04b462b7` + // MD5 hash of robots.txt with NodeInfo disallowed. + RobotsTxtDisallowNodeInfoETag = `a1e4ce6342978bc8d6c3e3dfab07cab4` ) -- cgit v1.2.3