summaryrefslogtreecommitdiff
path: root/internal/web
diff options
context:
space:
mode:
Diffstat (limited to 'internal/web')
-rw-r--r--internal/web/profile.go2
-rw-r--r--internal/web/robots.go157
-rw-r--r--internal/web/web.go37
3 files changed, 19 insertions, 177 deletions
diff --git a/internal/web/profile.go b/internal/web/profile.go
index a6d96a9ea..cf12ca33a 100644
--- a/internal/web/profile.go
+++ b/internal/web/profile.go
@@ -103,7 +103,7 @@ func (m *Module) profileGETHandler(c *gin.Context) {
// index if account is discoverable.
var robotsMeta string
if targetAccount.Discoverable {
- robotsMeta = robotsMetaAllowSome
+ robotsMeta = apiutil.RobotsDirectivesAllowSome
}
// We need to change our response slightly if the
diff --git a/internal/web/robots.go b/internal/web/robots.go
deleted file mode 100644
index 524550642..000000000
--- a/internal/web/robots.go
+++ /dev/null
@@ -1,157 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-package web
-
-import (
- "net/http"
-
- "github.com/gin-gonic/gin"
- "github.com/superseriousbusiness/gotosocial/internal/config"
-)
-
-const (
- robotsPath = "/robots.txt"
- robotsMetaAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
- robotsTxt = `# GoToSocial robots.txt -- to edit, see internal/web/robots.go
-# More info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro
-
-# AI scrapers and the like.
-# https://github.com/ai-robots-txt/ai.robots.txt/
-User-agent: AI2Bot
-User-agent: Ai2Bot-Dolma
-User-agent: Amazonbot
-User-agent: anthropic-ai
-User-agent: Applebot
-User-agent: Applebot-Extended
-User-agent: Bytespider
-User-agent: CCBot
-User-agent: ChatGPT-User
-User-agent: ClaudeBot
-User-agent: Claude-Web
-User-agent: cohere-ai
-User-agent: cohere-training-data-crawler
-User-agent: Diffbot
-User-agent: DuckAssistBot
-User-agent: FacebookBot
-User-agent: FriendlyCrawler
-User-agent: Google-Extended
-User-agent: GoogleOther
-User-agent: GoogleOther-Image
-User-agent: GoogleOther-Video
-User-agent: GPTBot
-User-agent: iaskspider/2.0
-User-agent: ICC-Crawler
-User-agent: ImagesiftBot
-User-agent: img2dataset
-User-agent: ISSCyberRiskCrawler
-User-agent: Kangaroo Bot
-User-agent: Meta-ExternalAgent
-User-agent: Meta-ExternalFetcher
-User-agent: OAI-SearchBot
-User-agent: omgili
-User-agent: omgilibot
-User-agent: PanguBot
-User-agent: PerplexityBot
-User-agent: PetalBot
-User-agent: Scrapy
-User-agent: Sidetrade indexer bot
-User-agent: Timpibot
-User-agent: VelenPublicWebCrawler
-User-agent: Webzio-Extended
-User-agent: YouBot
-Disallow: /
-
-# Marketing/SEO "intelligence" data scrapers
-User-agent: AwarioRssBot
-User-agent: AwarioSmartBot
-User-agent: DataForSeoBot
-User-agent: magpie-crawler
-User-agent: Meltwater
-User-agent: peer39_crawler
-User-agent: peer39_crawler/1.0
-User-agent: PiplBot
-User-agent: scoop.it
-User-agent: Seekr
-Disallow: /
-
-# Well-known.dev crawler. Indexes stuff under /.well-known.
-# https://well-known.dev/about/
-User-agent: WellKnownBot
-Disallow: /
-
-# Rules for everything else.
-User-agent: *
-Crawl-delay: 500
-
-# API endpoints.
-Disallow: /api/
-
-# Auth/Sign in endpoints.
-Disallow: /auth/
-Disallow: /oauth/
-Disallow: /check_your_email
-Disallow: /wait_for_approval
-Disallow: /account_disabled
-Disallow: /signup
-
-# Fileserver/media.
-Disallow: /fileserver/
-
-# Fedi S2S API endpoints.
-Disallow: /users/
-Disallow: /emoji/
-
-# Settings panels.
-Disallow: /admin
-Disallow: /user
-Disallow: /settings/
-
-# Domain blocklist.
-Disallow: /about/suspended
-
-# Webfinger endpoint.
-Disallow: /.well-known/webfinger
-`
-
- robotsTxtNoNodeInfo = robotsTxt + `
-# Disallow nodeinfo
-Disallow: /.well-known/nodeinfo
-Disallow: /nodeinfo/
-`
-)
-
-// robotsGETHandler returns a decent robots.txt that prevents crawling
-// the api, auth pages, settings pages, etc.
-//
-// More granular robots meta tags are then applied for web pages
-// depending on user preferences (see internal/web).
-func (m *Module) robotsGETHandler(c *gin.Context) {
- // Allow caching for 24 hrs.
- // https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4
- c.Header("Cache-Control", "public, max-age=86400")
-
- if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
- // Serve robots.txt as-is
- // without forbidding nodeinfo.
- c.String(http.StatusOK, robotsTxt)
- return
- }
-
- // Disallow scraping nodeinfo.
- c.String(http.StatusOK, robotsTxtNoNodeInfo)
-}
diff --git a/internal/web/web.go b/internal/web/web.go
index cfadc9283..e5d4db4c4 100644
--- a/internal/web/web.go
+++ b/internal/web/web.go
@@ -95,8 +95,6 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
// Route static assets.
routeAssets(m, r, mi...)
- // Route all other endpoints + handlers.
- //
// Handlers that serve profiles and statuses should use
// the SignatureCheck middleware, so that requests with
// content-type application/activity+json can be served
@@ -108,24 +106,25 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group
profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler)
- // Individual web handlers requiring no specific middlewares.
- r.AttachHandler(http.MethodGet, "/", m.indexHandler) // front-page
- r.AttachHandler(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
- r.AttachHandler(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
- r.AttachHandler(http.MethodGet, customCSSPath, m.customCSSGETHandler)
- r.AttachHandler(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
- r.AttachHandler(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
- r.AttachHandler(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
- r.AttachHandler(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
- r.AttachHandler(http.MethodGet, robotsPath, m.robotsGETHandler)
- r.AttachHandler(http.MethodGet, aboutPath, m.aboutGETHandler)
- r.AttachHandler(http.MethodGet, loginPath, m.loginGETHandler)
- r.AttachHandler(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
- r.AttachHandler(http.MethodGet, tagsPath, m.tagGETHandler)
- r.AttachHandler(http.MethodGet, signupPath, m.signupGETHandler)
- r.AttachHandler(http.MethodPost, signupPath, m.signupPOSTHandler)
+ // Group for all other web handlers.
+ everythingElseGroup := r.AttachGroup("")
+ everythingElseGroup.Use(mi...)
+ everythingElseGroup.Handle(http.MethodGet, "/", m.indexHandler) // front-page
+ everythingElseGroup.Handle(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
+ everythingElseGroup.Handle(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
+ everythingElseGroup.Handle(http.MethodGet, customCSSPath, m.customCSSGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
+ everythingElseGroup.Handle(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
+ everythingElseGroup.Handle(http.MethodGet, aboutPath, m.aboutGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, loginPath, m.loginGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, tagsPath, m.tagGETHandler)
+ everythingElseGroup.Handle(http.MethodGet, signupPath, m.signupGETHandler)
+ everythingElseGroup.Handle(http.MethodPost, signupPath, m.signupPOSTHandler)
- // Redirects from old endpoints to for back compat.
+ // Redirects from old endpoints for back compat.
r.AttachHandler(http.MethodGet, "/auth/edit", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
r.AttachHandler(http.MethodGet, "/user", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
r.AttachHandler(http.MethodGet, "/admin", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, adminPanelPath) })