summaryrefslogtreecommitdiff
path: root/vendor/github.com/rivo/uniseg/gen_properties.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/rivo/uniseg/gen_properties.go')
-rw-r--r--vendor/github.com/rivo/uniseg/gen_properties.go261
1 files changed, 261 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/gen_properties.go b/vendor/github.com/rivo/uniseg/gen_properties.go
new file mode 100644
index 000000000..8992d2c5f
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/gen_properties.go
@@ -0,0 +1,261 @@
+//go:build generate
+
+// This program generates a property file in Go file from Unicode Character
+// Database auxiliary data files. The command line arguments are as follows:
+//
+// 1. The name of the Unicode data file (just the filename, without extension).
+// Can be "-" (to skip) if the emoji flag is included.
+// 2. The name of the locally generated Go file.
+// 3. The name of the slice mapping code points to properties.
+// 4. The name of the generator, for logging purposes.
+// 5. (Optional) Flags, comma-separated. The following flags are available:
+// - "emojis=<property>": include the specified emoji properties (e.g.
+// "Extended_Pictographic").
+// - "gencat": include general category properties.
+//
+//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis=Extended_Pictographic
+//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis=Extended_Pictographic
+//go:generate go run gen_properties.go auxiliary/SentenceBreakProperty sentenceproperties.go sentenceBreakCodePoints sentences
+//go:generate go run gen_properties.go LineBreak lineproperties.go lineBreakCodePoints lines gencat
+//go:generate go run gen_properties.go EastAsianWidth eastasianwidth.go eastAsianWidth eastasianwidth
+//go:generate go run gen_properties.go - emojipresentation.go emojiPresentation emojipresentation emojis=Emoji_Presentation
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "go/format"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "os"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// We want to test against a specific version rather than the latest. When the
+// package is upgraded to a new version, change these to generate new tests.
+const (
+ propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt`
+ emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt`
+)
+
+// The regular expression for a line containing a code point range property.
+var propertyPattern = regexp.MustCompile(`^([0-9A-F]{4,6})(\.\.([0-9A-F]{4,6}))?\s*;\s*([A-Za-z0-9_]+)\s*#\s(.+)$`)
+
+func main() {
+ if len(os.Args) < 5 {
+ fmt.Println("Not enough arguments, see code for details")
+ os.Exit(1)
+ }
+
+ log.SetPrefix("gen_properties (" + os.Args[4] + "): ")
+ log.SetFlags(0)
+
+ // Parse flags.
+ flags := make(map[string]string)
+ if len(os.Args) >= 6 {
+ for _, flag := range strings.Split(os.Args[5], ",") {
+ flagFields := strings.Split(flag, "=")
+ if len(flagFields) == 1 {
+ flags[flagFields[0]] = "yes"
+ } else {
+ flags[flagFields[0]] = flagFields[1]
+ }
+ }
+ }
+
+ // Parse the text file and generate Go source code from it.
+ _, includeGeneralCategory := flags["gencat"]
+ var mainURL string
+ if os.Args[1] != "-" {
+ mainURL = fmt.Sprintf(propertyURL, os.Args[1])
+ }
+ src, err := parse(mainURL, flags["emojis"], includeGeneralCategory)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Format the Go code.
+ formatted, err := format.Source([]byte(src))
+ if err != nil {
+ log.Fatal("gofmt:", err)
+ }
+
+ // Save it to the (local) target file.
+ log.Print("Writing to ", os.Args[2])
+ if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
+ log.Fatal(err)
+ }
+}
+
+// parse parses the Unicode Properties text files located at the given URLs and
+// returns their equivalent Go source code to be used in the uniseg package. If
+// "emojiProperty" is not an empty string, emoji code points for that emoji
+// property (e.g. "Extended_Pictographic") will be included. In those cases, you
+// may pass an empty "propertyURL" to skip parsing the main properties file. If
+// "includeGeneralCategory" is true, the Unicode General Category property will
+// be extracted from the comments and included in the output.
+func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (string, error) {
+ if propertyURL == "" && emojiProperty == "" {
+ return "", errors.New("no properties to parse")
+ }
+
+ // Temporary buffer to hold properties.
+ var properties [][4]string
+
+ // Open the first URL.
+ if propertyURL != "" {
+ log.Printf("Parsing %s", propertyURL)
+ res, err := http.Get(propertyURL)
+ if err != nil {
+ return "", err
+ }
+ in1 := res.Body
+ defer in1.Close()
+
+ // Parse it.
+ scanner := bufio.NewScanner(in1)
+ num := 0
+ for scanner.Scan() {
+ num++
+ line := strings.TrimSpace(scanner.Text())
+
+ // Skip comments and empty lines.
+ if strings.HasPrefix(line, "#") || line == "" {
+ continue
+ }
+
+ // Everything else must be a code point range, a property and a comment.
+ from, to, property, comment, err := parseProperty(line)
+ if err != nil {
+ return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err)
+ }
+ properties = append(properties, [4]string{from, to, property, comment})
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+ }
+
+ // Open the second URL.
+ if emojiProperty != "" {
+ log.Printf("Parsing %s", emojiURL)
+ res, err := http.Get(emojiURL)
+ if err != nil {
+ return "", err
+ }
+ in2 := res.Body
+ defer in2.Close()
+
+ // Parse it.
+ scanner := bufio.NewScanner(in2)
+ num := 0
+ for scanner.Scan() {
+ num++
+ line := scanner.Text()
+
+ // Skip comments, empty lines, and everything not containing
+ // "Extended_Pictographic".
+ if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, emojiProperty) {
+ continue
+ }
+
+ // Everything else must be a code point range, a property and a comment.
+ from, to, property, comment, err := parseProperty(line)
+ if err != nil {
+ return "", fmt.Errorf("emojis line %d: %v", num, err)
+ }
+ properties = append(properties, [4]string{from, to, property, comment})
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+ }
+
+ // Avoid overflow during binary search.
+ if len(properties) >= 1<<31 {
+ return "", errors.New("too many properties")
+ }
+
+ // Sort properties.
+ sort.Slice(properties, func(i, j int) bool {
+ left, _ := strconv.ParseUint(properties[i][0], 16, 64)
+ right, _ := strconv.ParseUint(properties[j][0], 16, 64)
+ return left < right
+ })
+
+ // Header.
+ var (
+ buf bytes.Buffer
+ emojiComment string
+ )
+ columns := 3
+ if includeGeneralCategory {
+ columns = 4
+ }
+ if emojiURL != "" {
+ emojiComment = `
+// and
+// ` + emojiURL + `
+// ("Extended_Pictographic" only)`
+ }
+ buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT.
+
+package uniseg
+
+// ` + os.Args[3] + ` are taken from
+// ` + propertyURL + emojiComment + `
+// on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode
+// license agreement.
+var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{
+ `)
+
+ // Properties.
+ for _, prop := range properties {
+ if includeGeneralCategory {
+ generalCategory := "gc" + prop[3][:2]
+ if generalCategory == "gcL&" {
+ generalCategory = "gcLC"
+ }
+ prop[3] = prop[3][3:]
+ fmt.Fprintf(&buf, "{0x%s,0x%s,%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), generalCategory, prop[3])
+ } else {
+ fmt.Fprintf(&buf, "{0x%s,0x%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), prop[3])
+ }
+ }
+
+ // Tail.
+ buf.WriteString("}")
+
+ return buf.String(), nil
+}
+
+// parseProperty parses a line of the Unicode properties text file containing a
+// property for a code point range and returns it along with its comment.
+func parseProperty(line string) (from, to, property, comment string, err error) {
+ fields := propertyPattern.FindStringSubmatch(line)
+ if fields == nil {
+ err = errors.New("no property found")
+ return
+ }
+ from = fields[1]
+ to = fields[3]
+ if to == "" {
+ to = from
+ }
+ property = fields[4]
+ comment = fields[5]
+ return
+}
+
+// translateProperty translates a property name as used in the Unicode data file
+// to a variable used in the Go code.
+func translateProperty(prefix, property string) string {
+ return prefix + strings.ReplaceAll(property, "_", "")
+}