summaryrefslogtreecommitdiff
path: root/internal/middleware
diff options
context:
space:
mode:
authorLibravatar kim <grufwub@gmail.com>2025-04-28 20:12:27 +0000
committerLibravatar kim <gruf@noreply.codeberg.org>2025-04-28 20:12:27 +0000
commitd8c4d9fc5a62741f0c4c2b692a3a94874714bbcc (patch)
treeb64e5f1a635149db4b549fecd09437e9874572ad /internal/middleware
parent[chore/docs] add symmetry to the politics (#4081) (diff)
downloadgotosocial-d8c4d9fc5a62741f0c4c2b692a3a94874714bbcc.tar.xz
[feature] proof of work scraper deterrence (#4043)
This adds a proof-of-work based scraper deterrence to GoToSocial's middleware stack on profile and status web pages. Heavily inspired by https://github.com/TecharoHQ/anubis, but massively stripped back for our own usecase. Todo: - ~~add configuration option so this is disabled by default~~ - ~~fix whatever weirdness is preventing this working with CSP (even in debug)~~ - ~~use our standard templating mechanism going through apiutil helper func~~ - ~~probably some absurdly small performance improvements to be made in pooling re-used hex encode / hash encode buffers~~ the web endpoints aren't as hot a path as API / ActivityPub, will leave as-is for now as it is already very minimal and well optimized - ~~verify the cryptographic assumptions re: using a portion of token as challenge data~~ this isn't a serious application of cryptography, if it turns out to be a problem we'll fix it, but it definitely should not be easily possible to guess a SHA256 hash from the first 1/4 of it even if mathematically it might make it a bit easier - ~~theme / make look nice??~~ - ~~add a spinner~~ - ~~add entry in example configuration~~ - ~~add documentation~~ Verification page originally based on https://github.com/LucienV1/powtect Co-authored-by: tobi <tobi.smethurst@protonmail.com> Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4043 Reviewed-by: tobi <tsmethurst@noreply.codeberg.org> Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/middleware')
-rw-r--r--internal/middleware/headerfilter.go15
-rw-r--r--internal/middleware/nollamas.go309
-rw-r--r--internal/middleware/nollamas_test.go178
3 files changed, 487 insertions, 15 deletions
diff --git a/internal/middleware/headerfilter.go b/internal/middleware/headerfilter.go
index 6fb9fc996..0fd2a8877 100644
--- a/internal/middleware/headerfilter.go
+++ b/internal/middleware/headerfilter.go
@@ -150,11 +150,6 @@ func isHeaderBlocked(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
- // if expr != "" {
- // // TODO: replace expvar with build
- // // taggable metrics types in State{}.
- // }
-
// A header was matched against!
// i.e. this request is blocked.
return true, nil
@@ -185,11 +180,6 @@ func isHeaderAllowed(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
- // if expr != "" {
- // // TODO: replace expvar with build
- // // taggable metrics types in State{}.
- // }
-
// A header was matched against!
// i.e. this request is allowed.
return true, nil
@@ -220,11 +210,6 @@ func isHeaderNotAllowed(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
- // if expr != "" {
- // // TODO: replace expvar with build
- // // taggable metrics types in State{}.
- // }
-
// A header was matched against!
// i.e. request is NOT allowed.
return true, nil
diff --git a/internal/middleware/nollamas.go b/internal/middleware/nollamas.go
new file mode 100644
index 000000000..7f01c5afc
--- /dev/null
+++ b/internal/middleware/nollamas.go
@@ -0,0 +1,309 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package middleware
+
+import (
+ "context"
+ "crypto/rand"
+ "crypto/sha256"
+ "crypto/subtle"
+ "encoding/hex"
+ "hash"
+ "io"
+ "net/http"
+ "time"
+
+ apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
+ apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
+ "code.superseriousbusiness.org/gotosocial/internal/config"
+ "code.superseriousbusiness.org/gotosocial/internal/gtscontext"
+ "code.superseriousbusiness.org/gotosocial/internal/gtserror"
+ "code.superseriousbusiness.org/gotosocial/internal/log"
+ "code.superseriousbusiness.org/gotosocial/internal/oauth"
+ "codeberg.org/gruf/go-byteutil"
+ "github.com/gin-gonic/gin"
+)
+
+// NoLLaMas returns a piece of HTTP middleware that provides a deterrence
+// on routes it is applied to, against bots and scrapers. It generates a
+// unique but deterministic challenge for each HTTP client within an hour
+// TTL that requires a proof-of-work solution to pass onto the next handler.
+// On successful solution, the client is provided a cookie that allows them
+// to bypass this check within that hour TTL. The outcome of this is that it
+// should make scraping of these endpoints economically unfeasible, when enabled,
+// and with an absurdly minimal performance impact. The downside is that it
+// requires javascript to be enabled on the client to pass the middleware check.
+//
+// Heavily inspired by: https://github.com/TecharoHQ/anubis
+func NoLLaMas(getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode)) gin.HandlerFunc {
+
+ if !config.GetAdvancedScraperDeterrence() {
+ // NoLLaMas middleware disabled.
+ return func(*gin.Context) {}
+ }
+
+ seed := make([]byte, 32)
+
+ // Read random data for the token seed.
+ _, err := io.ReadFull(rand.Reader, seed)
+ if err != nil {
+ panic(err)
+ }
+
+ // Configure nollamas.
+ var nollamas nollamas
+ nollamas.seed = seed
+ nollamas.ttl = time.Hour
+ nollamas.diff = 4
+ nollamas.getInstanceV1 = getInstanceV1
+ return nollamas.Serve
+}
+
+// hashWithBufs encompasses a hash along
+// with the necessary buffers to generate
+// a hashsum and then encode that sum.
+type hashWithBufs struct {
+ hash hash.Hash
+ hbuf []byte
+ ebuf []byte
+}
+
+type nollamas struct {
+ seed []byte // unique token seed
+ ttl time.Duration
+ diff uint8
+
+ // extra fields required for
+ // our template rendering.
+ getInstanceV1 func(ctx context.Context) (*apimodel.InstanceV1, gtserror.WithCode)
+}
+
+func (m *nollamas) Serve(c *gin.Context) {
+ if c.Request.Method != http.MethodGet {
+ // Only interested in protecting
+ // crawlable 'GET' endpoints.
+ c.Next()
+ return
+ }
+
+ // Extract request context.
+ ctx := c.Request.Context()
+
+ if ctx.Value(oauth.SessionAuthorizedToken) != nil {
+ // Don't guard against requests
+ // providing valid OAuth tokens.
+ c.Next()
+ return
+ }
+
+ if gtscontext.HTTPSignature(ctx) != "" {
+ // Don't guard against requests
+ // providing HTTP signatures.
+ c.Next()
+ return
+ }
+
+ // i.e. outputted hash slice length.
+ const hashLen = sha256.Size
+
+ // i.e. hex.EncodedLen(hashLen).
+ const encodedHashLen = 2 * hashLen
+
+ // Prepare hash + buffers.
+ hash := hashWithBufs{
+ hash: sha256.New(),
+ hbuf: make([]byte, 0, hashLen),
+ ebuf: make([]byte, encodedHashLen),
+ }
+
+ // Extract client fingerprint data.
+ userAgent := c.GetHeader("User-Agent")
+ clientIP := c.ClientIP()
+
+ // Generate a unique token for this request,
+ // only valid for a period of now +- m.ttl.
+ token := m.token(&hash, userAgent, clientIP)
+
+ // For unique challenge string just use a
+ // single portion of their 'success' token.
+ // SHA256 is not yet cracked, this is not an
+ // application of a hash requiring serious
+ // cryptographic security and it rotates on
+ // a TTL basis, so it should be fine.
+ challenge := token[:len(token)/4]
+
+ // Check for a provided success token.
+ cookie, _ := c.Cookie("gts-nollamas")
+
+ // Check whether passed cookie
+ // is the expected success token.
+ if subtle.ConstantTimeCompare(
+ byteutil.S2B(token),
+ byteutil.S2B(cookie),
+ ) == 1 {
+
+ // They passed us a valid, expected
+ // token. They already passed checks.
+ c.Next()
+ return
+ }
+
+ // Prepare new log entry.
+ l := log.WithContext(ctx).
+ WithField("userAgent", userAgent).
+ WithField("challenge", challenge)
+
+ // Extract and parse query.
+ query := c.Request.URL.Query()
+
+ // Check query to see if an in-progress
+ // challenge solution has been provided.
+ nonce := query.Get("nollamas_solution")
+ if nonce == "" || len(nonce) > 20 {
+
+ // noting that here, 20 is
+ // max integer string len.
+ //
+ // An invalid solution string, just
+ // present them with new challenge.
+ l.Info("posing new challenge")
+ m.renderChallenge(c, challenge)
+ return
+ }
+
+ // Reset the hash.
+ hash.hash.Reset()
+
+ // Check challenge+nonce as possible solution.
+ if !m.checkChallenge(&hash, challenge, nonce) {
+
+ // They failed challenge,
+ // re-present challenge page.
+ l.Info("invalid solution provided")
+ m.renderChallenge(c, challenge)
+ return
+ }
+
+ l.Infof("challenge passed: %s", nonce)
+
+ // Don't pass to further
+ // handlers, we'll redirect.
+ c.Abort()
+
+ // Drop solution query and encode.
+ query.Del("nollamas_solution")
+ c.Request.URL.RawQuery = query.Encode()
+
+ // They passed the challenge! Set success token
+ // cookie and allow them to continue to next handlers.
+ c.SetCookie("gts-nollamas", token, int(m.ttl/time.Second), "", "", false, false)
+ c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
+}
+
+func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
+ // Don't pass to further
+ // handlers, they only get
+ // our challenge page.
+ c.Abort()
+
+ // Fetch current instance information for templating vars.
+ instance, errWithCode := m.getInstanceV1(c.Request.Context())
+ if errWithCode != nil {
+ apiutil.ErrorHandler(c, errWithCode, m.getInstanceV1)
+ return
+ }
+
+ // Write templated challenge response to client.
+ apiutil.TemplateWebPage(c, apiutil.WebPage{
+ Template: "nollamas.tmpl",
+ Instance: instance,
+ Stylesheets: []string{
+ "/assets/dist/nollamas.css",
+ // Include fork-awesome stylesheet
+ // to get nice loading spinner.
+ "/assets/Fork-Awesome/css/fork-awesome.min.css",
+ },
+ Extra: map[string]any{
+ "challenge": challenge,
+ "difficulty": m.diff,
+ },
+ Javascript: []apiutil.JavascriptEntry{
+ {
+ Src: "/assets/dist/nollamas.js",
+ Defer: true,
+ },
+ },
+ })
+}
+
+func (m *nollamas) token(hash *hashWithBufs, userAgent, clientIP string) string {
+ // Use our unique seed to seed hash,
+ // to ensure we have cryptographically
+ // unique, yet deterministic, tokens
+ // generated for a given http client.
+ hash.hash.Write(m.seed)
+
+ // Include difficulty level in
+ // hash input data so if config
+ // changes then token invalidates.
+ hash.hash.Write([]byte{m.diff})
+
+ // Also seed the generated input with
+ // current time rounded to TTL, so our
+ // single comparison handles expiries.
+ now := time.Now().Round(m.ttl).Unix()
+ hash.hash.Write([]byte{
+ byte(now >> 56),
+ byte(now >> 48),
+ byte(now >> 40),
+ byte(now >> 32),
+ byte(now >> 24),
+ byte(now >> 16),
+ byte(now >> 8),
+ byte(now),
+ })
+
+ // Finally, append unique client request data.
+ hash.hash.Write(byteutil.S2B(userAgent))
+ hash.hash.Write(byteutil.S2B(clientIP))
+
+ // Return hex encoded hash output.
+ hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
+ hex.Encode(hash.ebuf, hash.hbuf)
+ return string(hash.ebuf)
+}
+
+func (m *nollamas) checkChallenge(hash *hashWithBufs, challenge, nonce string) bool {
+ // Hash and encode input challenge with
+ // proposed nonce as a possible solution.
+ hash.hash.Write(byteutil.S2B(challenge))
+ hash.hash.Write(byteutil.S2B(nonce))
+ hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
+ hex.Encode(hash.ebuf, hash.hbuf)
+ solution := hash.ebuf
+
+ // Check that the first 'diff'
+ // many chars are indeed zeroes.
+ for i := range m.diff {
+ if solution[i] != '0' {
+ return false
+ }
+ }
+
+ return true
+}
diff --git a/internal/middleware/nollamas_test.go b/internal/middleware/nollamas_test.go
new file mode 100644
index 000000000..92a044d32
--- /dev/null
+++ b/internal/middleware/nollamas_test.go
@@ -0,0 +1,178 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package middleware_test
+
+import (
+ "context"
+ "crypto/sha256"
+ "encoding/hex"
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "slices"
+ "strconv"
+ "strings"
+ "testing"
+
+ "code.superseriousbusiness.org/gotosocial/internal/api/model"
+ "code.superseriousbusiness.org/gotosocial/internal/config"
+ "code.superseriousbusiness.org/gotosocial/internal/gtserror"
+ "code.superseriousbusiness.org/gotosocial/internal/middleware"
+ "code.superseriousbusiness.org/gotosocial/internal/router"
+ "codeberg.org/gruf/go-byteutil"
+ "github.com/gin-gonic/gin"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestNoLLaMasMiddleware(t *testing.T) {
+ // Gin test engine.
+ e := gin.New()
+
+ // Setup necessary configuration variables.
+ config.SetAdvancedScraperDeterrence(true)
+ config.SetWebTemplateBaseDir("../../web/template")
+
+ // Load templates into engine.
+ err := router.LoadTemplates(e)
+ assert.NoError(t, err)
+
+ // Add middleware to the gin engine handler stack.
+ middleware := middleware.NoLLaMas(getInstanceV1)
+ e.Use(middleware)
+
+ // Set test handler we can
+ // easily check if was used.
+ e.Handle("GET", "/", testHandler)
+
+ // Test with differing user-agents.
+ for _, userAgent := range []string{
+ "CURL",
+ "Mozilla FireSox",
+ "Google Gnome",
+ } {
+ testNoLLaMasMiddleware(t, e, userAgent)
+ }
+}
+
+func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
+ // Prepare a test request for gin engine.
+ r := httptest.NewRequest("GET", "/", nil)
+ r.Header.Set("User-Agent", userAgent)
+ rw := httptest.NewRecorder()
+
+ // Pass req through
+ // engine handler.
+ e.ServeHTTP(rw, r)
+
+ // Get http result.
+ res := rw.Result()
+
+ // It should have been stopped
+ // by middleware and NOT used
+ // the expected test handler.
+ ok := usedTestHandler(res)
+ assert.False(t, ok)
+
+ // Read entire response body.
+ b, err := io.ReadAll(res.Body)
+ if err != nil {
+ panic(err)
+ }
+
+ var difficulty uint64
+ var challenge string
+
+ // Parse output body and find the challenge / difficulty.
+ for _, line := range strings.Split(string(b), "\n") {
+ line = strings.TrimSpace(line)
+ switch {
+ case strings.HasPrefix(line, "data-nollamas-challenge=\""):
+ line = line[25:]
+ line = line[:len(line)-1]
+ challenge = line
+ case strings.HasPrefix(line, "data-nollamas-difficulty=\""):
+ line = line[26:]
+ line = line[:len(line)-1]
+ var err error
+ difficulty, err = strconv.ParseUint(line, 10, 8)
+ assert.NoError(t, err)
+ }
+ }
+
+ // Ensure valid posed challenge.
+ assert.NotZero(t, difficulty)
+ assert.NotEmpty(t, challenge)
+
+ // Prepare a test request for gin engine.
+ r = httptest.NewRequest("GET", "/", nil)
+ r.Header.Set("User-Agent", userAgent)
+ rw = httptest.NewRecorder()
+
+ // Now compute and set solution query paramater.
+ solution := computeSolution(challenge, difficulty)
+ r.URL.RawQuery = "nollamas_solution=" + solution
+
+ // Pass req through
+ // engine handler.
+ e.ServeHTTP(rw, r)
+
+ // Get http result.
+ res = rw.Result()
+
+ // Should have received redirect.
+ uri, err := res.Location()
+ assert.NoError(t, err)
+ assert.Equal(t, uri.String(), "/")
+
+ // Ensure our expected solution cookie (to bypass challenge) was set.
+ ok = slices.ContainsFunc(res.Cookies(), func(c *http.Cookie) bool {
+ return c.Name == "gts-nollamas"
+ })
+ assert.True(t, ok)
+}
+
+// computeSolution does the functional equivalent of our nollamas workerTask.js.
+func computeSolution(challenge string, difficulty uint64) string {
+outer:
+ for i := 0; ; i++ {
+ solution := strconv.Itoa(i)
+ combined := challenge + solution
+ hash := sha256.Sum256(byteutil.S2B(combined))
+ encoded := hex.EncodeToString(hash[:])
+ for i := range difficulty {
+ if encoded[i] != '0' {
+ continue outer
+ }
+ }
+ return solution
+ }
+}
+
+// usedTestHandler returns whether testHandler() was used.
+func usedTestHandler(res *http.Response) bool {
+ return res.Header.Get("test-handler") == "ok"
+}
+
+func testHandler(c *gin.Context) {
+ c.Writer.Header().Set("test-handler", "ok")
+ c.Writer.WriteHeader(http.StatusOK)
+}
+
+func getInstanceV1(context.Context) (*model.InstanceV1, gtserror.WithCode) {
+ return &model.InstanceV1{}, nil
+}