summaryrefslogtreecommitdiff
path: root/internal/util/namestring.go
diff options
context:
space:
mode:
authorLibravatar Daenney <daenney@users.noreply.github.com>2023-09-11 18:38:31 +0200
committerLibravatar GitHub <noreply@github.com>2023-09-11 18:38:31 +0200
commit2cac5a4613ab51a5ac33a16cb54bb1210be9e8ce (patch)
tree96db54305c3208bb6458c015ac774b229561596a /internal/util/namestring.go
parent[chore] bump bun v1.1.14 -> v1.1.15 (#2195) (diff)
downloadgotosocial-2cac5a4613ab51a5ac33a16cb54bb1210be9e8ce.tar.xz
[feature] Support Actor URIs for webfinger queries (#2187)
* [feature] Support Actor URIs for webfinger queries It's now possible to pass an Actor URI as the resource to query for when doing a webfinger query. The code now extracts the username and domain from the URI. The URI needs to be fully qualified, including having a scheme of http or https to be recognised as such. The acct scheme is handled as we used to, including dealing with an erroneous leading @ on the username. We retain the ability to handle resources without a scheme by parsing them again with the acct scheme if the original parse failed. This can happen due to parsing ambiguities when dealing with a string like user@domain.tld:port. * [bugfix] Remove debugging changes * [chore] Make TestExtractNamestring table-driven * [chore] Unnest Trim and Split for readability
Diffstat (limited to 'internal/util/namestring.go')
-rw-r--r--internal/util/namestring.go85
1 files changed, 75 insertions, 10 deletions
diff --git a/internal/util/namestring.go b/internal/util/namestring.go
index 6109f8ebb..e510fe43f 100644
--- a/internal/util/namestring.go
+++ b/internal/util/namestring.go
@@ -19,6 +19,7 @@ package util
import (
"fmt"
+ "net/url"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
@@ -40,19 +41,83 @@ func ExtractNamestringParts(mention string) (username, host string, err error) {
}
}
-// ExtractWebfingerParts returns username test_user and
-// domain example.org from a string like acct:test_user@example.org,
-// or acct:@test_user@example.org.
+// ExtractWebfingerParts returns the username and domain from either an
+// account query or an actor URI.
//
-// If nothing is extracted, it will return an error.
+// All implementations in the wild generate webfinger account resource
+// queries with the "acct" scheme and without a leading "@"" on the username.
+// This is also the format the "subject" in a webfinger response adheres to.
+//
+// Despite this fact, we're being permissive about a single leading @. This
+// makes a query for acct:user@domain.tld and acct:@user@domain.tld
+// equivalent. But a query for acct:@@user@domain.tld will have its username
+// returned with the @ prefix.
+//
+// We also permit a resource of user@domain.tld or @user@domain.tld, without
+// a scheme. In that case it gets interpreted as if it was using the "acct"
+// scheme.
+//
+// When parsing fails, an error is returned.
func ExtractWebfingerParts(webfinger string) (username, host string, err error) {
- // remove the acct: prefix if it's present
- webfinger = strings.TrimPrefix(webfinger, "acct:")
+ orig := webfinger
+
+ u, oerr := url.ParseRequestURI(webfinger)
+ if oerr != nil {
+ // Most likely reason for failing to parse is if the "acct" scheme was
+ // missing but a :port was included. So try an extra time with the scheme.
+ u, err = url.ParseRequestURI("acct:" + webfinger)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to parse %s with acct sheme: %w", orig, oerr)
+ }
+ }
+
+ if u.Scheme == "http" || u.Scheme == "https" {
+ return ExtractWebfingerPartsFromURI(u)
+ }
+
+ if u.Scheme != "acct" {
+ return "", "", fmt.Errorf("unsupported scheme: %s for resource: %s", u.Scheme, orig)
+ }
+
+ stripped := strings.TrimPrefix(u.Opaque, "@")
+ userDomain := strings.Split(stripped, "@")
+ if len(userDomain) != 2 {
+ return "", "", fmt.Errorf("failed to extract user and domain from: %s", orig)
+ }
+ return userDomain[0], userDomain[1], nil
+}
+
+// ExtractWebfingerPartsFromURI returns the user and domain extracted from
+// the passed in URI. The URI should be an actor URI.
+//
+// The domain returned is the hostname, and the user will be extracted
+// from either /@test_user or /users/test_user. These two paths match the
+// "aliasses" we include in our webfinger response and are also present in
+// our "links".
+//
+// Like with ExtractWebfingerParts, we're being permissive about a single
+// leading @.
+//
+// Errors are returned in case we end up with an empty domain or username.
+func ExtractWebfingerPartsFromURI(uri *url.URL) (username, host string, err error) {
+ host = uri.Host
+ if host == "" {
+ return "", "", fmt.Errorf("failed to extract domain from: %s", uri)
+ }
+
+ // strip any leading slashes
+ path := strings.TrimLeft(uri.Path, "/")
+ segs := strings.Split(path, "/")
+ if segs[0] == "users" {
+ username = segs[1]
+ } else {
+ username = segs[0]
+ }
- // prepend an @ if necessary
- if webfinger[0] != '@' {
- webfinger = "@" + webfinger
+ username = strings.TrimPrefix(username, "@")
+ if username == "" {
+ return "", "", fmt.Errorf("failed to extract username from: %s", uri)
}
- return ExtractNamestringParts(webfinger)
+ return
}