diff options
Diffstat (limited to 'internal/web')
-rw-r--r-- | internal/web/profile.go | 2 | ||||
-rw-r--r-- | internal/web/robots.go | 157 | ||||
-rw-r--r-- | internal/web/web.go | 37 |
3 files changed, 19 insertions, 177 deletions
diff --git a/internal/web/profile.go b/internal/web/profile.go index a6d96a9ea..cf12ca33a 100644 --- a/internal/web/profile.go +++ b/internal/web/profile.go @@ -103,7 +103,7 @@ func (m *Module) profileGETHandler(c *gin.Context) { // index if account is discoverable. var robotsMeta string if targetAccount.Discoverable { - robotsMeta = robotsMetaAllowSome + robotsMeta = apiutil.RobotsDirectivesAllowSome } // We need to change our response slightly if the diff --git a/internal/web/robots.go b/internal/web/robots.go deleted file mode 100644 index 524550642..000000000 --- a/internal/web/robots.go +++ /dev/null @@ -1,157 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -package web - -import ( - "net/http" - - "github.com/gin-gonic/gin" - "github.com/superseriousbusiness/gotosocial/internal/config" -) - -const ( - robotsPath = "/robots.txt" - robotsMetaAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta - robotsTxt = `# GoToSocial robots.txt -- to edit, see internal/web/robots.go -# More info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro - -# AI scrapers and the like. -# https://github.com/ai-robots-txt/ai.robots.txt/ -User-agent: AI2Bot -User-agent: Ai2Bot-Dolma -User-agent: Amazonbot -User-agent: anthropic-ai -User-agent: Applebot -User-agent: Applebot-Extended -User-agent: Bytespider -User-agent: CCBot -User-agent: ChatGPT-User -User-agent: ClaudeBot -User-agent: Claude-Web -User-agent: cohere-ai -User-agent: cohere-training-data-crawler -User-agent: Diffbot -User-agent: DuckAssistBot -User-agent: FacebookBot -User-agent: FriendlyCrawler -User-agent: Google-Extended -User-agent: GoogleOther -User-agent: GoogleOther-Image -User-agent: GoogleOther-Video -User-agent: GPTBot -User-agent: iaskspider/2.0 -User-agent: ICC-Crawler -User-agent: ImagesiftBot -User-agent: img2dataset -User-agent: ISSCyberRiskCrawler -User-agent: Kangaroo Bot -User-agent: Meta-ExternalAgent -User-agent: Meta-ExternalFetcher -User-agent: OAI-SearchBot -User-agent: omgili -User-agent: omgilibot -User-agent: PanguBot -User-agent: PerplexityBot -User-agent: PetalBot -User-agent: Scrapy -User-agent: Sidetrade indexer bot -User-agent: Timpibot -User-agent: VelenPublicWebCrawler -User-agent: Webzio-Extended -User-agent: YouBot -Disallow: / - -# Marketing/SEO "intelligence" data scrapers -User-agent: AwarioRssBot -User-agent: AwarioSmartBot -User-agent: DataForSeoBot -User-agent: magpie-crawler -User-agent: Meltwater -User-agent: peer39_crawler -User-agent: peer39_crawler/1.0 -User-agent: PiplBot -User-agent: scoop.it -User-agent: Seekr -Disallow: / - -# Well-known.dev crawler. Indexes stuff under /.well-known. -# https://well-known.dev/about/ -User-agent: WellKnownBot -Disallow: / - -# Rules for everything else. -User-agent: * -Crawl-delay: 500 - -# API endpoints. -Disallow: /api/ - -# Auth/Sign in endpoints. -Disallow: /auth/ -Disallow: /oauth/ -Disallow: /check_your_email -Disallow: /wait_for_approval -Disallow: /account_disabled -Disallow: /signup - -# Fileserver/media. -Disallow: /fileserver/ - -# Fedi S2S API endpoints. -Disallow: /users/ -Disallow: /emoji/ - -# Settings panels. -Disallow: /admin -Disallow: /user -Disallow: /settings/ - -# Domain blocklist. -Disallow: /about/suspended - -# Webfinger endpoint. -Disallow: /.well-known/webfinger -` - - robotsTxtNoNodeInfo = robotsTxt + ` -# Disallow nodeinfo -Disallow: /.well-known/nodeinfo -Disallow: /nodeinfo/ -` -) - -// robotsGETHandler returns a decent robots.txt that prevents crawling -// the api, auth pages, settings pages, etc. -// -// More granular robots meta tags are then applied for web pages -// depending on user preferences (see internal/web). -func (m *Module) robotsGETHandler(c *gin.Context) { - // Allow caching for 24 hrs. - // https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4 - c.Header("Cache-Control", "public, max-age=86400") - - if config.GetInstanceStatsMode() == config.InstanceStatsModeServe { - // Serve robots.txt as-is - // without forbidding nodeinfo. - c.String(http.StatusOK, robotsTxt) - return - } - - // Disallow scraping nodeinfo. - c.String(http.StatusOK, robotsTxtNoNodeInfo) -} diff --git a/internal/web/web.go b/internal/web/web.go index cfadc9283..e5d4db4c4 100644 --- a/internal/web/web.go +++ b/internal/web/web.go @@ -95,8 +95,6 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) { // Route static assets. routeAssets(m, r, mi...) - // Route all other endpoints + handlers. - // // Handlers that serve profiles and statuses should use // the SignatureCheck middleware, so that requests with // content-type application/activity+json can be served @@ -108,24 +106,25 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) { profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler) - // Individual web handlers requiring no specific middlewares. - r.AttachHandler(http.MethodGet, "/", m.indexHandler) // front-page - r.AttachHandler(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler) - r.AttachHandler(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler) - r.AttachHandler(http.MethodGet, customCSSPath, m.customCSSGETHandler) - r.AttachHandler(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler) - r.AttachHandler(http.MethodGet, rssFeedPath, m.rssFeedGETHandler) - r.AttachHandler(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler) - r.AttachHandler(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler) - r.AttachHandler(http.MethodGet, robotsPath, m.robotsGETHandler) - r.AttachHandler(http.MethodGet, aboutPath, m.aboutGETHandler) - r.AttachHandler(http.MethodGet, loginPath, m.loginGETHandler) - r.AttachHandler(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler) - r.AttachHandler(http.MethodGet, tagsPath, m.tagGETHandler) - r.AttachHandler(http.MethodGet, signupPath, m.signupGETHandler) - r.AttachHandler(http.MethodPost, signupPath, m.signupPOSTHandler) + // Group for all other web handlers. + everythingElseGroup := r.AttachGroup("") + everythingElseGroup.Use(mi...) + everythingElseGroup.Handle(http.MethodGet, "/", m.indexHandler) // front-page + everythingElseGroup.Handle(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler) + everythingElseGroup.Handle(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler) + everythingElseGroup.Handle(http.MethodGet, customCSSPath, m.customCSSGETHandler) + everythingElseGroup.Handle(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler) + everythingElseGroup.Handle(http.MethodGet, rssFeedPath, m.rssFeedGETHandler) + everythingElseGroup.Handle(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler) + everythingElseGroup.Handle(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler) + everythingElseGroup.Handle(http.MethodGet, aboutPath, m.aboutGETHandler) + everythingElseGroup.Handle(http.MethodGet, loginPath, m.loginGETHandler) + everythingElseGroup.Handle(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler) + everythingElseGroup.Handle(http.MethodGet, tagsPath, m.tagGETHandler) + everythingElseGroup.Handle(http.MethodGet, signupPath, m.signupGETHandler) + everythingElseGroup.Handle(http.MethodPost, signupPath, m.signupPOSTHandler) - // Redirects from old endpoints to for back compat. + // Redirects from old endpoints for back compat. r.AttachHandler(http.MethodGet, "/auth/edit", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) }) r.AttachHandler(http.MethodGet, "/user", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) }) r.AttachHandler(http.MethodGet, "/admin", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, adminPanelPath) }) |