diff options
| author | 2025-01-14 13:10:39 +0000 | |
|---|---|---|
| committer | 2025-01-14 13:10:39 +0000 | |
| commit | 4d423102c14de9e9328f1852db539d9561a3cad9 (patch) | |
| tree | 6df5905f53ad7eadbfa9840939989253bfb4b199 /vendor/github.com/gabriel-vasile | |
| parent | [bugfix] migration to cleanup dropped status edits (#3637) (diff) | |
| download | gotosocial-4d423102c14de9e9328f1852db539d9561a3cad9.tar.xz | |
[chore]: Bump github.com/gin-contrib/gzip from 1.0.1 to 1.1.0 (#3639)
Bumps [github.com/gin-contrib/gzip](https://github.com/gin-contrib/gzip) from 1.0.1 to 1.1.0.
- [Release notes](https://github.com/gin-contrib/gzip/releases)
- [Changelog](https://github.com/gin-contrib/gzip/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/gin-contrib/gzip/compare/v1.0.1...v1.1.0)
---
updated-dependencies:
- dependency-name: github.com/gin-contrib/gzip
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Diffstat (limited to 'vendor/github.com/gabriel-vasile')
15 files changed, 321 insertions, 219 deletions
diff --git a/vendor/github.com/gabriel-vasile/mimetype/LICENSE b/vendor/github.com/gabriel-vasile/mimetype/LICENSE index 6aac070c7..13b61daa5 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/LICENSE +++ b/vendor/github.com/gabriel-vasile/mimetype/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018-2020 Gabriel Vasile +Copyright (c) 2018 Gabriel Vasile Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/github.com/gabriel-vasile/mimetype/README.md b/vendor/github.com/gabriel-vasile/mimetype/README.md index 231b29190..aa88b4bda 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/README.md +++ b/vendor/github.com/gabriel-vasile/mimetype/README.md @@ -16,9 +16,6 @@ <a href="https://goreportcard.com/report/github.com/gabriel-vasile/mimetype"> <img alt="Go report card" src="https://goreportcard.com/badge/github.com/gabriel-vasile/mimetype"> </a> - <a href="https://codecov.io/gh/gabriel-vasile/mimetype"> - <img alt="Code coverage" src="https://codecov.io/gh/gabriel-vasile/mimetype/branch/master/graph/badge.svg?token=qcfJF1kkl2"/> - </a> <a href="LICENSE"> <img alt="License" src="https://img.shields.io/badge/License-MIT-green.svg"> </a> @@ -84,7 +81,7 @@ To prevent loading entire files into memory, when detecting from a or from a [file](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectFile) **mimetype** limits itself to reading only the header of the input. <div align="center"> - <img alt="structure" src="https://github.com/gabriel-vasile/mimetype/blob/420a05228c6a6efbb6e6f080168a25663414ff36/mimetype.gif?raw=true" width="88%"> + <img alt="how project is structured" src="https://raw.githubusercontent.com/gabriel-vasile/mimetype/master/testdata/gif.gif" width="88%"> </div> ## Performance diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go index ee39349ae..5b2ecee44 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go @@ -34,6 +34,7 @@ package json import ( "fmt" + "sync" ) type ( @@ -73,10 +74,31 @@ type ( } ) +var scannerPool = sync.Pool{ + New: func() any { + return &scanner{} + }, +} + +func newScanner() *scanner { + s := scannerPool.Get().(*scanner) + s.reset() + return s +} + +func freeScanner(s *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(s.parseState) > 1024 { + s.parseState = nil + } + scannerPool.Put(s) +} + // Scan returns the number of bytes scanned and if there was any error // in trying to reach the end of data. func Scan(data []byte) (int, error) { - s := &scanner{} + s := newScanner() + defer freeScanner(s) _ = checkValid(data, s) return s.index, s.err } @@ -84,7 +106,6 @@ func Scan(data []byte) (int, error) { // checkValid verifies that data is valid JSON-encoded data. // scan is passed in for use by checkValid to avoid an allocation. func checkValid(data []byte, scan *scanner) error { - scan.reset() for _, c := range data { scan.index++ if scan.step(scan, c) == scanError { @@ -105,6 +126,8 @@ func (s *scanner) reset() { s.step = stateBeginValue s.parseState = s.parseState[0:0] s.err = nil + s.endTop = false + s.index = 0 } // eof tells the scanner that the end of input has been reached. diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go index fec11f080..b59042c6f 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go @@ -74,51 +74,85 @@ func CRX(raw []byte, limit uint32) bool { } // Tar matches a (t)ape (ar)chive file. +// Tar files are divided into 512 bytes records. First record contains a 257 +// bytes header padded with NUL. func Tar(raw []byte, _ uint32) bool { - // The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives - // has the prefix "ustar". The values of the remaining bytes in this field vary - // by archiver implementation. - if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) { - return true + const sizeRecord = 512 + + // The structure of a tar header: + // type TarHeader struct { + // Name [100]byte + // Mode [8]byte + // Uid [8]byte + // Gid [8]byte + // Size [12]byte + // Mtime [12]byte + // Chksum [8]byte + // Linkflag byte + // Linkname [100]byte + // Magic [8]byte + // Uname [32]byte + // Gname [32]byte + // Devmajor [8]byte + // Devminor [8]byte + // } + + if len(raw) < sizeRecord { + return false } + raw = raw[:sizeRecord] - if len(raw) < 256 { + // First 100 bytes of the header represent the file name. + // Check if file looks like Gentoo GLEP binary package. + if bytes.Contains(raw[:100], []byte("/gpkg-1\x00")) { return false } - // The older v7 format has no "magic" field, and therefore must be identified - // with heuristics based on legal ranges of values for other header fields: - // https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures - rules := []struct { - min, max uint8 - i int - }{ - {0x21, 0xEF, 0}, - {0x30, 0x37, 105}, - {0x20, 0x37, 106}, - {0x00, 0x00, 107}, - {0x30, 0x37, 113}, - {0x20, 0x37, 114}, - {0x00, 0x00, 115}, - {0x30, 0x37, 121}, - {0x20, 0x37, 122}, - {0x00, 0x00, 123}, - {0x30, 0x37, 134}, - {0x30, 0x37, 146}, - {0x30, 0x37, 153}, - {0x00, 0x37, 154}, + // Get the checksum recorded into the file. + recsum := tarParseOctal(raw[148:156]) + if recsum == -1 { + return false } - for _, r := range rules { - if raw[r.i] < r.min || raw[r.i] > r.max { - return false + sum1, sum2 := tarChksum(raw) + return recsum == sum1 || recsum == sum2 +} + +// tarParseOctal converts octal string to decimal int. +func tarParseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need to skip leading NULs. + // Fields may also be padded with spaces or NULs. + // So we remove leading and trailing NULs and spaces to be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return -1 + } + ret := int64(0) + for _, b := range b { + if b == 0 { + break + } + if !(b >= '0' && b <= '7') { + return -1 } + ret = (ret << 3) | int64(b-'0') } + return ret +} - for _, i := range []uint8{135, 147, 155} { - if raw[i] != 0x00 && raw[i] != 0x20 { - return false +// tarChksum computes the checksum for the header block b. +// The actual checksum is written to same b block after it has been calculated. +// Before calculation the bytes from b reserved for checksum have placeholder +// value of ASCII space 0x20. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. We compute and return both. +func tarChksum(b []byte) (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. } + unsigned += int64(c) + signed += int64(int8(c)) } - - return true + return unsigned, signed } diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go index f1e944987..769732018 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go @@ -21,6 +21,10 @@ var ( SWF = prefix([]byte("CWS"), []byte("FWS"), []byte("ZWS")) // Torrent has bencoded text in the beginning. Torrent = prefix([]byte("d8:announce")) + // PAR1 matches a parquet file. + Par1 = prefix([]byte{0x50, 0x41, 0x52, 0x31}) + // CBOR matches a Concise Binary Object Representation https://cbor.io/ + CBOR = prefix([]byte{0xD9, 0xD9, 0xF7}) ) // Java bytecode and Mach-O binaries share the same magic number. @@ -32,7 +36,7 @@ func classOrMachOFat(in []byte) bool { return false } - return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE}) + return binary.BigEndian.Uint32(in) == macho.MagicFat } // Class matches a java class file. @@ -42,7 +46,7 @@ func Class(raw []byte, limit uint32) bool { // MachO matches Mach-O binaries format. func MachO(raw []byte, limit uint32) bool { - if classOrMachOFat(raw) && raw[7] < 20 { + if classOrMachOFat(raw) && raw[7] < 0x14 { return true } @@ -154,11 +158,11 @@ func Marc(raw []byte, limit uint32) bool { // the GL transmission Format (glTF). // GLB uses little endian and its header structure is as follows: // -// <-- 12-byte header --> -// | magic | version | length | -// | (uint32) | (uint32) | (uint32) | -// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... | -// | g l T F | 1 | ... | +// <-- 12-byte header --> +// | magic | version | length | +// | (uint32) | (uint32) | (uint32) | +// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... | +// | g l T F | 1 | ... | // // Visit [glTF specification] and [IANA glTF entry] for more details. // @@ -170,14 +174,15 @@ var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"), // TzIf matches a Time Zone Information Format (TZif) file. // See more: https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html#rfc.section.3 // Its header structure is shown below: -// +---------------+---+ -// | magic (4) | <-+-- version (1) -// +---------------+---+---------------------------------------+ -// | [unused - reserved for future use] (15) | -// +---------------+---------------+---------------+-----------+ -// | isutccnt (4) | isstdcnt (4) | leapcnt (4) | -// +---------------+---------------+---------------+ -// | timecnt (4) | typecnt (4) | charcnt (4) | +// +// +---------------+---+ +// | magic (4) | <-+-- version (1) +// +---------------+---+---------------------------------------+ +// | [unused - reserved for future use] (15) | +// +---------------+---------------+---------------+-----------+ +// | isutccnt (4) | isstdcnt (4) | leapcnt (4) | +// +---------------+---------------+---------------+ +// | timecnt (4) | typecnt (4) | charcnt (4) | func TzIf(raw []byte, limit uint32) bool { // File is at least 44 bytes (header size). if len(raw) < 44 { diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go index 6575b4aec..ac727139e 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go @@ -1,22 +1,14 @@ package magic -import "bytes" +import ( + "bytes" +) var ( // AVIF matches an AV1 Image File Format still or animated. // Wikipedia page seems outdated listing image/avif-sequence for animations. // https://github.com/AOMediaCodec/av1-avif/issues/59 AVIF = ftyp([]byte("avif"), []byte("avis")) - // Mp4 matches an MP4 file. - Mp4 = ftyp( - []byte("avc1"), []byte("dash"), []byte("iso2"), []byte("iso3"), - []byte("iso4"), []byte("iso5"), []byte("iso6"), []byte("isom"), - []byte("mmp4"), []byte("mp41"), []byte("mp42"), []byte("mp4v"), - []byte("mp71"), []byte("MSNV"), []byte("NDAS"), []byte("NDSC"), - []byte("NSDC"), []byte("NSDH"), []byte("NDSM"), []byte("NDSP"), - []byte("NDSS"), []byte("NDXC"), []byte("NDXH"), []byte("NDXM"), - []byte("NDXP"), []byte("NDXS"), []byte("F4V "), []byte("F4P "), - ) // ThreeGP matches a 3GPP file. ThreeGP = ftyp( []byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"), @@ -53,6 +45,17 @@ var ( Heif = ftyp([]byte("mif1"), []byte("heim"), []byte("heis"), []byte("avic")) // HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence. HeifSequence = ftyp([]byte("msf1"), []byte("hevm"), []byte("hevs"), []byte("avcs")) + // Mj2 matches a Motion JPEG 2000 file: https://en.wikipedia.org/wiki/Motion_JPEG_2000. + Mj2 = ftyp([]byte("mj2s"), []byte("mjp2"), []byte("MFSM"), []byte("MGSV")) + // Dvb matches a Digital Video Broadcasting file: https://dvb.org. + // https://cconcolato.github.io/mp4ra/filetype.html + // https://github.com/file/file/blob/512840337ead1076519332d24fefcaa8fac36e06/magic/Magdir/animation#L135-L154 + Dvb = ftyp( + []byte("dby1"), []byte("dsms"), []byte("dts1"), []byte("dts2"), + []byte("dts3"), []byte("dxo "), []byte("dmb1"), []byte("dmpf"), + []byte("drc1"), []byte("dv1a"), []byte("dv1b"), []byte("dv2a"), + []byte("dv2b"), []byte("dv3a"), []byte("dv3b"), []byte("dvr1"), + []byte("dvt1"), []byte("emsg")) // TODO: add support for remaining video formats at ftyps.com. ) @@ -86,3 +89,21 @@ func QuickTime(raw []byte, _ uint32) bool { } return bytes.Equal(raw[:8], []byte("\x00\x00\x00\x08wide")) } + +// Mp4 detects an .mp4 file. Mp4 detections only does a basic ftyp check. +// Mp4 has many registered and unregistered code points so it's hard to keep track +// of all. Detection will default on video/mp4 for all ftyp files. +// ISO_IEC_14496-12 is the specification for the iso container. +func Mp4(raw []byte, _ uint32) bool { + if len(raw) < 12 { + return false + } + // ftyps are made out of boxes. The first 4 bytes of the box represent + // its size in big-endian uint32. First box is the ftyp box and it is small + // in size. Check most significant byte is 0 to filter out false positive + // text files that happen to contain the string "ftyp" at index 4. + if raw[0] != 0 { + return false + } + return bytes.Equal(raw[4:8], []byte("ftyp")) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go index 34b84f401..a34c60984 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go @@ -154,7 +154,7 @@ func ftyp(sigs ...[]byte) Detector { return false } for _, s := range sigs { - if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) { + if bytes.Equal(raw[8:12], s) { return true } } @@ -239,3 +239,13 @@ func min(a, b int) int { } return b } + +type readBuf []byte + +func (b *readBuf) advance(n int) bool { + if n < 0 || len(*b) < n { + return false + } + *b = (*b)[n:] + return true +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go index 5964ce596..7d60e22e2 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go @@ -5,58 +5,19 @@ import ( "encoding/binary" ) -var ( - xlsxSigFiles = []string{ - "xl/worksheets/", - "xl/drawings/", - "xl/theme/", - "xl/_rels/", - "xl/styles.xml", - "xl/workbook.xml", - "xl/sharedStrings.xml", - } - docxSigFiles = []string{ - "word/media/", - "word/_rels/document.xml.rels", - "word/document.xml", - "word/styles.xml", - "word/fontTable.xml", - "word/settings.xml", - "word/numbering.xml", - "word/header", - "word/footer", - } - pptxSigFiles = []string{ - "ppt/slides/", - "ppt/media/", - "ppt/slideLayouts/", - "ppt/theme/", - "ppt/slideMasters/", - "ppt/tags/", - "ppt/notesMasters/", - "ppt/_rels/", - "ppt/handoutMasters/", - "ppt/notesSlides/", - "ppt/presentation.xml", - "ppt/tableStyles.xml", - "ppt/presProps.xml", - "ppt/viewProps.xml", - } -) - // Xlsx matches a Microsoft Excel 2007 file. func Xlsx(raw []byte, limit uint32) bool { - return zipContains(raw, xlsxSigFiles...) + return zipContains(raw, []byte("xl/"), true) } // Docx matches a Microsoft Word 2007 file. func Docx(raw []byte, limit uint32) bool { - return zipContains(raw, docxSigFiles...) + return zipContains(raw, []byte("word/"), true) } // Pptx matches a Microsoft PowerPoint 2007 file. func Pptx(raw []byte, limit uint32) bool { - return zipContains(raw, pptxSigFiles...) + return zipContains(raw, []byte("ppt/"), true) } // Ole matches an Open Linking and Embedding file. diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go index e2a03caf5..cf6446397 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go @@ -1,7 +1,6 @@ package magic import ( - "bufio" "bytes" "strings" "time" @@ -121,7 +120,7 @@ var ( []byte("/usr/bin/env wish"), ) // Rtf matches a Rich Text Format file. - Rtf = prefix([]byte("{\\rtf1")) + Rtf = prefix([]byte("{\\rtf")) ) // Text matches a plain text file. @@ -234,9 +233,10 @@ func GeoJSON(raw []byte, limit uint32) bool { // types. func NdJSON(raw []byte, limit uint32) bool { lCount, hasObjOrArr := 0, false - sc := bufio.NewScanner(dropLastLine(raw, limit)) - for sc.Scan() { - l := sc.Bytes() + raw = dropLastLine(raw, limit) + var l []byte + for len(raw) != 0 { + l, raw = scanLine(raw) // Empty lines are allowed in NDJSON. if l = trimRWS(trimLWS(l)); len(l) == 0 { continue @@ -301,20 +301,15 @@ func Svg(raw []byte, limit uint32) bool { } // Srt matches a SubRip file. -func Srt(in []byte, _ uint32) bool { - s := bufio.NewScanner(bytes.NewReader(in)) - if !s.Scan() { - return false - } - // First line must be 1. - if s.Text() != "1" { - return false - } +func Srt(raw []byte, _ uint32) bool { + line, raw := scanLine(raw) - if !s.Scan() { + // First line must be 1. + if string(line) != "1" { return false } - secondLine := s.Text() + line, raw = scanLine(raw) + secondLine := string(line) // Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine // length to exactly 29 characters. if len(secondLine) != 29 { @@ -325,14 +320,12 @@ func Srt(in []byte, _ uint32) bool { if strings.Contains(secondLine, ".") { return false } - // For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`. - secondLine = strings.ReplaceAll(secondLine, ",", ".") // Second line must be a time range. ts := strings.Split(secondLine, " --> ") if len(ts) != 2 { return false } - const layout = "15:04:05.000" + const layout = "15:04:05,000" t0, err := time.Parse(layout, ts[0]) if err != nil { return false @@ -345,8 +338,9 @@ func Srt(in []byte, _ uint32) bool { return false } + line, _ = scanLine(raw) // A third line must exist and not be empty. This is the actual subtitle text. - return s.Scan() && len(s.Bytes()) != 0 + return len(line) != 0 } // Vtt matches a Web Video Text Tracks (WebVTT) file. See @@ -373,3 +367,15 @@ func Vtt(raw []byte, limit uint32) bool { return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT" bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT" } + +// dropCR drops a terminal \r from the data. +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} +func scanLine(b []byte) (line, remainder []byte) { + line, remainder, _ = bytes.Cut(b, []byte("\n")) + return dropCR(line), remainder +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go index 84ed64928..6083ba8c0 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go @@ -1,12 +1,28 @@ package magic import ( + "bufio" "bytes" "encoding/csv" "errors" "io" + "sync" ) +// A bufio.Reader pool to alleviate problems with memory allocations. +var readerPool = sync.Pool{ + New: func() any { + // Initiate with empty source reader. + return bufio.NewReader(nil) + }, +} + +func newReader(r io.Reader) *bufio.Reader { + br := readerPool.Get().(*bufio.Reader) + br.Reset(r) + return br +} + // Csv matches a comma-separated values file. func Csv(raw []byte, limit uint32) bool { return sv(raw, ',', limit) @@ -18,7 +34,11 @@ func Tsv(raw []byte, limit uint32) bool { } func sv(in []byte, comma rune, limit uint32) bool { - r := csv.NewReader(dropLastLine(in, limit)) + in = dropLastLine(in, limit) + + br := newReader(bytes.NewReader(in)) + defer readerPool.Put(br) + r := csv.NewReader(br) r.Comma = comma r.ReuseRecord = true r.LazyQuotes = true @@ -44,20 +64,14 @@ func sv(in []byte, comma rune, limit uint32) bool { // mimetype limits itself to ReadLimit bytes when performing a detection. // This means, for file formats like CSV for NDJSON, the last line of the input // can be an incomplete line. -func dropLastLine(b []byte, cutAt uint32) io.Reader { - if cutAt == 0 { - return bytes.NewReader(b) +func dropLastLine(b []byte, readLimit uint32) []byte { + if readLimit == 0 || uint32(len(b)) < readLimit { + return b } - if uint32(len(b)) >= cutAt { - for i := cutAt - 1; i > 0; i-- { - if b[i] == '\n' { - return bytes.NewReader(b[:i]) - } + for i := len(b) - 1; i > 0; i-- { + if b[i] == '\n' { + return b[:i] } - - // No newline was found between the 0 index and cutAt. - return bytes.NewReader(b[:cutAt]) } - - return bytes.NewReader(b) + return b } diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/zip.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/zip.go index dabee947b..f866113fd 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/zip.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/zip.go @@ -3,7 +3,6 @@ package magic import ( "bytes" "encoding/binary" - "strings" ) var ( @@ -43,50 +42,71 @@ func Zip(raw []byte, limit uint32) bool { // Jar matches a Java archive file. func Jar(raw []byte, limit uint32) bool { - return zipContains(raw, "META-INF/MANIFEST.MF") + return zipContains(raw, []byte("META-INF/MANIFEST.MF"), false) } -// zipTokenizer holds the source zip file and scanned index. -type zipTokenizer struct { - in []byte - i int // current index -} +func zipContains(raw, sig []byte, msoCheck bool) bool { + b := readBuf(raw) + pk := []byte("PK\003\004") + if len(b) < 0x1E { + return false + } -// next returns the next file name from the zip headers. -// https://web.archive.org/web/20191129114319/https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html -func (t *zipTokenizer) next() (fileName string) { - if t.i > len(t.in) { - return + if !b.advance(0x1E) { + return false } - in := t.in[t.i:] - // pkSig is the signature of the zip local file header. - pkSig := []byte("PK\003\004") - pkIndex := bytes.Index(in, pkSig) - // 30 is the offset of the file name in the header. - fNameOffset := pkIndex + 30 - // end if signature not found or file name offset outside of file. - if pkIndex == -1 || fNameOffset > len(in) { - return + if bytes.HasPrefix(b, sig) { + return true } - fNameLen := int(binary.LittleEndian.Uint16(in[pkIndex+26 : pkIndex+28])) - if fNameLen <= 0 || fNameOffset+fNameLen > len(in) { - return - } - t.i += fNameOffset + fNameLen - return string(in[fNameOffset : fNameOffset+fNameLen]) -} + if msoCheck { + skipFiles := [][]byte{ + []byte("[Content_Types].xml"), + []byte("_rels/.rels"), + []byte("docProps"), + []byte("customXml"), + []byte("[trash]"), + } -// zipContains returns true if the zip file headers from in contain any of the paths. -func zipContains(in []byte, paths ...string) bool { - t := zipTokenizer{in: in} - for i, tok := 0, t.next(); tok != ""; i, tok = i+1, t.next() { - for p := range paths { - if strings.HasPrefix(tok, paths[p]) { - return true + hasSkipFile := false + for _, sf := range skipFiles { + if bytes.HasPrefix(b, sf) { + hasSkipFile = true + break } } + if !hasSkipFile { + return false + } + } + + searchOffset := binary.LittleEndian.Uint32(raw[18:]) + 49 + if !b.advance(int(searchOffset)) { + return false } + nextHeader := bytes.Index(raw[searchOffset:], pk) + if !b.advance(nextHeader) { + return false + } + if bytes.HasPrefix(b, sig) { + return true + } + + for i := 0; i < 4; i++ { + if !b.advance(0x1A) { + return false + } + nextHeader = bytes.Index(b, pk) + if nextHeader == -1 { + return false + } + if !b.advance(nextHeader + 0x1E) { + return false + } + if bytes.HasPrefix(b, sig) { + return true + } + } return false } diff --git a/vendor/github.com/gabriel-vasile/mimetype/mimetype.gif b/vendor/github.com/gabriel-vasile/mimetype/mimetype.gif Binary files differdeleted file mode 100644 index c3e808767..000000000 --- a/vendor/github.com/gabriel-vasile/mimetype/mimetype.gif +++ /dev/null diff --git a/vendor/github.com/gabriel-vasile/mimetype/mimetype.go b/vendor/github.com/gabriel-vasile/mimetype/mimetype.go index 1b5909b75..d8d512b80 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/mimetype.go +++ b/vendor/github.com/gabriel-vasile/mimetype/mimetype.go @@ -7,14 +7,15 @@ package mimetype import ( "io" - "io/ioutil" "mime" "os" "sync/atomic" ) +var defaultLimit uint32 = 3072 + // readLimit is the maximum number of bytes from the input used when detecting. -var readLimit uint32 = 3072 +var readLimit uint32 = defaultLimit // Detect returns the MIME type found from the provided byte slice. // @@ -48,7 +49,7 @@ func DetectReader(r io.Reader) (*MIME, error) { // Using atomic because readLimit can be written at the same time in other goroutine. l := atomic.LoadUint32(&readLimit) if l == 0 { - in, err = ioutil.ReadAll(r) + in, err = io.ReadAll(r) if err != nil { return errMIME, err } @@ -103,6 +104,7 @@ func EqualsAny(s string, mimes ...string) bool { // SetLimit sets the maximum number of bytes read from input when detecting the MIME type. // Increasing the limit provides better detection for file formats which store // their magical numbers towards the end of the file: docx, pptx, xlsx, etc. +// During detection data is read in a single block of size limit, i.e. it is not buffered. // A limit of 0 means the whole input file will be used. func SetLimit(limit uint32) { // Using atomic because readLimit can be read at the same time in other goroutine. diff --git a/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md index 5ec6f6b65..a45a3021b 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md +++ b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md @@ -1,4 +1,4 @@ -## 173 Supported MIME types +## 177 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -75,21 +75,28 @@ Extension | MIME type | Aliases **.au** | audio/basic | - **.mpeg** | video/mpeg | - **.mov** | video/quicktime | - -**.mqv** | video/quicktime | - **.mp4** | video/mp4 | - -**.webm** | video/webm | audio/webm +**.avif** | image/avif | - **.3gp** | video/3gpp | video/3gp, audio/3gpp **.3g2** | video/3gpp2 | video/3g2, audio/3gpp2 +**.mp4** | audio/mp4 | audio/x-mp4a +**.mqv** | video/quicktime | - +**.m4a** | audio/x-m4a | - +**.m4v** | video/x-m4v | - +**.heic** | image/heic | - +**.heic** | image/heic-sequence | - +**.heif** | image/heif | - +**.heif** | image/heif-sequence | - +**.mj2** | video/mj2 | - +**.dvb** | video/vnd.dvb.file | - +**.webm** | video/webm | audio/webm **.avi** | video/x-msvideo | video/avi, video/msvideo **.flv** | video/x-flv | - **.mkv** | video/x-matroska | - **.asf** | video/x-ms-asf | video/asf, video/x-ms-wmv **.aac** | audio/aac | - **.voc** | audio/x-unknown | - -**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a -**.m4a** | audio/x-m4a | - **.m3u** | application/vnd.apple.mpegurl | audio/mpegurl -**.m4v** | video/x-m4v | - **.rmvb** | application/vnd.rn-realmedia-vbr | - **.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document **.class** | application/x-java-applet | - @@ -111,6 +118,7 @@ Extension | MIME type | Aliases **.mobi** | application/x-mobipocket-ebook | - **.lit** | application/x-ms-reader | - **.bpg** | image/bpg | - +**.cbor** | application/cbor | - **.sqlite** | application/vnd.sqlite3 | application/x-sqlite3 **.dwg** | image/vnd.dwg | image/x-dwg, application/acad, application/x-acad, application/autocad_dwg, application/dwg, application/x-dwg, application/x-autocad, drawing/dwg **.nes** | application/vnd.nintendo.snes.rom | - @@ -118,10 +126,6 @@ Extension | MIME type | Aliases **.macho** | application/x-mach-binary | - **.qcp** | audio/qcelp | - **.icns** | image/x-icns | - -**.heic** | image/heic | - -**.heic** | image/heic-sequence | - -**.heif** | image/heif | - -**.heif** | image/heif-sequence | - **.hdr** | image/vnd.radiance | - **.mrc** | application/marc | - **.mdb** | application/x-msaccess | - @@ -138,13 +142,13 @@ Extension | MIME type | Aliases **.pat** | image/x-gimp-pat | - **.gbr** | image/x-gimp-gbr | - **.glb** | model/gltf-binary | - -**.avif** | image/avif | - **.cab** | application/x-installshield | - **.jxr** | image/jxr | image/vnd.ms-photo +**.parquet** | application/vnd.apache.parquet | application/x-parquet **.txt** | text/plain | - **.html** | text/html | - **.svg** | image/svg+xml | - -**.xml** | text/xml | - +**.xml** | text/xml | application/xml **.rss** | application/rss+xml | text/rss **.atom** | application/atom+xml | - **.x3d** | model/x3d+xml | - @@ -159,7 +163,7 @@ Extension | MIME type | Aliases **.xfdf** | application/vnd.adobe.xfdf | - **.owl** | application/owl+xml | - **.php** | text/x-php | - -**.js** | application/javascript | application/x-javascript, text/javascript +**.js** | text/javascript | application/x-javascript, application/javascript **.lua** | text/x-lua | - **.pl** | text/x-perl | - **.py** | text/x-python | text/x-script.python, application/x-python @@ -167,7 +171,7 @@ Extension | MIME type | Aliases **.geojson** | application/geo+json | - **.har** | application/json | - **.ndjson** | application/x-ndjson | - -**.rtf** | text/rtf | - +**.rtf** | text/rtf | application/rtf **.srt** | application/x-subrip | application/x-srt, text/x-srt **.tcl** | text/x-tcl | application/x-tcl **.csv** | text/csv | - diff --git a/vendor/github.com/gabriel-vasile/mimetype/tree.go b/vendor/github.com/gabriel-vasile/mimetype/tree.go index 253bd0064..771e302bc 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/tree.go +++ b/vendor/github.com/gabriel-vasile/mimetype/tree.go @@ -18,14 +18,13 @@ import ( var root = newMIME("application/octet-stream", "", func([]byte, uint32) bool { return true }, xpm, sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jxl, jp2, jpx, - jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, - midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, - threeGP, threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, m3u, m4v, rmvb, - gzip, class, swf, crx, ttf, woff, woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, - djvu, mobi, lit, bpg, sqlite3, dwg, nes, lnk, macho, qcp, icns, heic, - heicSeq, heif, heifSeq, hdr, mrc, mdb, accdb, zstd, cab, rpm, xz, lzip, - torrent, cpio, tzif, xcf, pat, gbr, glb, avif, cabIS, jxr, - // Keep text last because it is the slowest check + jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, + flac, midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mp4, webM, + avi, flv, mkv, asf, aac, voc, m3u, rmvb, gzip, class, swf, crx, ttf, woff, + woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, cbor, + sqlite3, dwg, nes, lnk, macho, qcp, icns, hdr, mrc, mdb, accdb, zstd, cab, + rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr, parquet, + // Keep text last because it is the slowest check. text, ) @@ -77,18 +76,19 @@ var ( oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo) text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt) - xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2) - json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) - har = newMIME("application/json", ".har", magic.HAR) - csv = newMIME("text/csv", ".csv", magic.Csv) - tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv) - geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON) - ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON) - html = newMIME("text/html", ".html", magic.HTML) - php = newMIME("text/x-php", ".php", magic.Php) - rtf = newMIME("text/rtf", ".rtf", magic.Rtf) - js = newMIME("application/javascript", ".js", magic.Js). - alias("application/x-javascript", "text/javascript") + xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2). + alias("application/xml") + json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) + har = newMIME("application/json", ".har", magic.HAR) + csv = newMIME("text/csv", ".csv", magic.Csv) + tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv) + geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON) + ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON) + html = newMIME("text/html", ".html", magic.HTML) + php = newMIME("text/x-php", ".php", magic.Php) + rtf = newMIME("text/rtf", ".rtf", magic.Rtf).alias("application/rtf") + js = newMIME("text/javascript", ".js", magic.Js). + alias("application/x-javascript", "application/javascript") srt = newMIME("application/x-subrip", ".srt", magic.Srt). alias("application/x-srt", "text/x-srt") vtt = newMIME("text/vtt", ".vtt", magic.Vtt) @@ -156,12 +156,14 @@ var ( aac = newMIME("audio/aac", ".aac", magic.AAC) voc = newMIME("audio/x-unknown", ".voc", magic.Voc) aMp4 = newMIME("audio/mp4", ".mp4", magic.AMp4). - alias("audio/x-m4a", "audio/x-mp4a") + alias("audio/x-mp4a") m4a = newMIME("audio/x-m4a", ".m4a", magic.M4a) m3u = newMIME("application/vnd.apple.mpegurl", ".m3u", magic.M3u). alias("audio/mpegurl") m4v = newMIME("video/x-m4v", ".m4v", magic.M4v) - mp4 = newMIME("video/mp4", ".mp4", magic.Mp4) + mj2 = newMIME("video/mj2", ".mj2", magic.Mj2) + dvb = newMIME("video/vnd.dvb.file", ".dvb", magic.Dvb) + mp4 = newMIME("video/mp4", ".mp4", magic.Mp4, avif, threeGP, threeG2, aMp4, mqv, m4a, m4v, heic, heicSeq, heif, heifSeq, mj2, dvb) webM = newMIME("video/webm", ".webm", magic.WebM). alias("audio/webm") mpeg = newMIME("video/mpeg", ".mpeg", magic.Mpeg) @@ -257,4 +259,7 @@ var ( xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf) glb = newMIME("model/gltf-binary", ".glb", magic.Glb) jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo") + parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1). + alias("application/x-parquet") + cbor = newMIME("application/cbor", ".cbor", magic.CBOR) ) |
