From dd83ad053c0cde5b948cbfe34ec4864cf0a123e3 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 29 Sep 2022 12:03:17 +0200 Subject: [feature] Add `meta robots` tag; allow robots to index profile card if user is Discoverable (#842) * rework robots.txt response * don't let robots snippet from statuses/threads * allow robots to index if user is Discoverable * add license text --- internal/api/security/robots.go | 46 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'internal/api/security/robots.go') diff --git a/internal/api/security/robots.go b/internal/api/security/robots.go index 65056072a..5b8ba3c05 100644 --- a/internal/api/security/robots.go +++ b/internal/api/security/robots.go @@ -1,3 +1,21 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + package security import ( @@ -7,11 +25,33 @@ import ( ) const robotsString = `User-agent: * -Disallow: / +Crawl-delay: 500 +# api stuff +Disallow: /api/ +# auth/login stuff +Disallow: /auth/ +Disallow: /oauth/ +Disallow: /check_your_email +Disallow: /wait_for_approval +Disallow: /account_disabled +# well known stuff +Disallow: /.well-known/ +# files +Disallow: /fileserver/ +# s2s AP stuff +Disallow: /users/ +Disallow: /emoji/ +# panels +Disallow: /admin +Disallow: /user +Disallow: /settings/ ` -// RobotsGETHandler returns the most restrictive possible robots.txt file in response to a call to /robots.txt. -// The response instructs bots with *any* user agent not to index the instance at all. +// RobotsGETHandler returns a decent robots.txt that prevents crawling +// the api, auth pages, settings pages, etc. +// +// More granular robots meta tags are then applied for web pages +// depending on user preferences (see internal/web). func (m *Module) RobotsGETHandler(c *gin.Context) { c.String(http.StatusOK, robotsString) } -- cgit v1.2.3