From 941893a774c83802afdc4cc76e1d30c59b6c5585 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 2 Jan 2023 13:10:50 +0100 Subject: [chore] The Big Middleware and API Refactor (tm) (#1250) * interim commit: start refactoring middlewares into package under router * another interim commit, this is becoming a big job * another fucking massive interim commit * refactor bookmarks to new style * ambassador, wiz zeze commits you are spoiling uz * she compiles, we're getting there * we're just normal men; we're just innocent men * apiutil * whoopsie * i'm glad noone reads commit msgs haha :blob_sweat: * use that weirdo go-bytesize library for maxMultipartMemory * fix media module paths --- internal/web/robots.go | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'internal/web/robots.go') diff --git a/internal/web/robots.go b/internal/web/robots.go index c3307d068..0babb31b7 100644 --- a/internal/web/robots.go +++ b/internal/web/robots.go @@ -18,7 +18,45 @@ package web -// https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta +import ( + "net/http" + + "github.com/gin-gonic/gin" +) + const ( - robotsAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" + robotsPath = "/robots.txt" + robotsMetaAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta + robotsTxt = `# GoToSocial robots.txt -- to edit, see internal/web/robots.go +# more info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro +User-agent: * +Crawl-delay: 500 +# api stuff +Disallow: /api/ +# auth/login stuff +Disallow: /auth/ +Disallow: /oauth/ +Disallow: /check_your_email +Disallow: /wait_for_approval +Disallow: /account_disabled +# well known stuff +Disallow: /.well-known/ +# files +Disallow: /fileserver/ +# s2s AP stuff +Disallow: /users/ +Disallow: /emoji/ +# panels +Disallow: /admin +Disallow: /user +Disallow: /settings/` ) + +// robotsGETHandler returns a decent robots.txt that prevents crawling +// the api, auth pages, settings pages, etc. +// +// More granular robots meta tags are then applied for web pages +// depending on user preferences (see internal/web). +func (m *Module) robotsGETHandler(c *gin.Context) { + c.String(http.StatusOK, robotsTxt) +} -- cgit v1.2.3