From d8c4d9fc5a62741f0c4c2b692a3a94874714bbcc Mon Sep 17 00:00:00 2001 From: kim Date: Mon, 28 Apr 2025 20:12:27 +0000 Subject: [feature] proof of work scraper deterrence (#4043) This adds a proof-of-work based scraper deterrence to GoToSocial's middleware stack on profile and status web pages. Heavily inspired by https://github.com/TecharoHQ/anubis, but massively stripped back for our own usecase. Todo: - ~~add configuration option so this is disabled by default~~ - ~~fix whatever weirdness is preventing this working with CSP (even in debug)~~ - ~~use our standard templating mechanism going through apiutil helper func~~ - ~~probably some absurdly small performance improvements to be made in pooling re-used hex encode / hash encode buffers~~ the web endpoints aren't as hot a path as API / ActivityPub, will leave as-is for now as it is already very minimal and well optimized - ~~verify the cryptographic assumptions re: using a portion of token as challenge data~~ this isn't a serious application of cryptography, if it turns out to be a problem we'll fix it, but it definitely should not be easily possible to guess a SHA256 hash from the first 1/4 of it even if mathematically it might make it a bit easier - ~~theme / make look nice??~~ - ~~add a spinner~~ - ~~add entry in example configuration~~ - ~~add documentation~~ Verification page originally based on https://github.com/LucienV1/powtect Co-authored-by: tobi Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4043 Reviewed-by: tobi Co-authored-by: kim Co-committed-by: kim --- internal/middleware/nollamas_test.go | 178 +++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 internal/middleware/nollamas_test.go (limited to 'internal/middleware/nollamas_test.go') diff --git a/internal/middleware/nollamas_test.go b/internal/middleware/nollamas_test.go new file mode 100644 index 000000000..92a044d32 --- /dev/null +++ b/internal/middleware/nollamas_test.go @@ -0,0 +1,178 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package middleware_test + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "io" + "net/http" + "net/http/httptest" + "slices" + "strconv" + "strings" + "testing" + + "code.superseriousbusiness.org/gotosocial/internal/api/model" + "code.superseriousbusiness.org/gotosocial/internal/config" + "code.superseriousbusiness.org/gotosocial/internal/gtserror" + "code.superseriousbusiness.org/gotosocial/internal/middleware" + "code.superseriousbusiness.org/gotosocial/internal/router" + "codeberg.org/gruf/go-byteutil" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" +) + +func TestNoLLaMasMiddleware(t *testing.T) { + // Gin test engine. + e := gin.New() + + // Setup necessary configuration variables. + config.SetAdvancedScraperDeterrence(true) + config.SetWebTemplateBaseDir("../../web/template") + + // Load templates into engine. + err := router.LoadTemplates(e) + assert.NoError(t, err) + + // Add middleware to the gin engine handler stack. + middleware := middleware.NoLLaMas(getInstanceV1) + e.Use(middleware) + + // Set test handler we can + // easily check if was used. + e.Handle("GET", "/", testHandler) + + // Test with differing user-agents. + for _, userAgent := range []string{ + "CURL", + "Mozilla FireSox", + "Google Gnome", + } { + testNoLLaMasMiddleware(t, e, userAgent) + } +} + +func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) { + // Prepare a test request for gin engine. + r := httptest.NewRequest("GET", "/", nil) + r.Header.Set("User-Agent", userAgent) + rw := httptest.NewRecorder() + + // Pass req through + // engine handler. + e.ServeHTTP(rw, r) + + // Get http result. + res := rw.Result() + + // It should have been stopped + // by middleware and NOT used + // the expected test handler. + ok := usedTestHandler(res) + assert.False(t, ok) + + // Read entire response body. + b, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + + var difficulty uint64 + var challenge string + + // Parse output body and find the challenge / difficulty. + for _, line := range strings.Split(string(b), "\n") { + line = strings.TrimSpace(line) + switch { + case strings.HasPrefix(line, "data-nollamas-challenge=\""): + line = line[25:] + line = line[:len(line)-1] + challenge = line + case strings.HasPrefix(line, "data-nollamas-difficulty=\""): + line = line[26:] + line = line[:len(line)-1] + var err error + difficulty, err = strconv.ParseUint(line, 10, 8) + assert.NoError(t, err) + } + } + + // Ensure valid posed challenge. + assert.NotZero(t, difficulty) + assert.NotEmpty(t, challenge) + + // Prepare a test request for gin engine. + r = httptest.NewRequest("GET", "/", nil) + r.Header.Set("User-Agent", userAgent) + rw = httptest.NewRecorder() + + // Now compute and set solution query paramater. + solution := computeSolution(challenge, difficulty) + r.URL.RawQuery = "nollamas_solution=" + solution + + // Pass req through + // engine handler. + e.ServeHTTP(rw, r) + + // Get http result. + res = rw.Result() + + // Should have received redirect. + uri, err := res.Location() + assert.NoError(t, err) + assert.Equal(t, uri.String(), "/") + + // Ensure our expected solution cookie (to bypass challenge) was set. + ok = slices.ContainsFunc(res.Cookies(), func(c *http.Cookie) bool { + return c.Name == "gts-nollamas" + }) + assert.True(t, ok) +} + +// computeSolution does the functional equivalent of our nollamas workerTask.js. +func computeSolution(challenge string, difficulty uint64) string { +outer: + for i := 0; ; i++ { + solution := strconv.Itoa(i) + combined := challenge + solution + hash := sha256.Sum256(byteutil.S2B(combined)) + encoded := hex.EncodeToString(hash[:]) + for i := range difficulty { + if encoded[i] != '0' { + continue outer + } + } + return solution + } +} + +// usedTestHandler returns whether testHandler() was used. +func usedTestHandler(res *http.Response) bool { + return res.Header.Get("test-handler") == "ok" +} + +func testHandler(c *gin.Context) { + c.Writer.Header().Set("test-handler", "ok") + c.Writer.WriteHeader(http.StatusOK) +} + +func getInstanceV1(context.Context) (*model.InstanceV1, gtserror.WithCode) { + return &model.InstanceV1{}, nil +} -- cgit v1.2.3