+
+## Features
+- fast and precise MIME type and file extension detection
+- long list of [supported MIME types](supported_mimes.md)
+- possibility to [extend](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-Extend) with other file formats
+- common file formats are prioritized
+- [text vs. binary files differentiation](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-TextVsBinary)
+- no external dependencies
+- safe for concurrent usage
+
+## Install
+```bash
+go get github.com/gabriel-vasile/mimetype
+```
+
+## Usage
+```go
+mtype := mimetype.Detect([]byte)
+// OR
+mtype, err := mimetype.DetectReader(io.Reader)
+// OR
+mtype, err := mimetype.DetectFile("/path/to/file")
+fmt.Println(mtype.String(), mtype.Extension())
+```
+See the [runnable Go Playground examples](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#pkg-overview).
+
+Caution: only use libraries like **mimetype** as a last resort. Content type detection
+using magic numbers is slow, inaccurate, and non-standard. Most of the times
+protocols have methods for specifying such metadata; e.g., `Content-Type` header
+in HTTP and SMTP.
+
+## FAQ
+Q: My file is in the list of [supported MIME types](supported_mimes.md) but
+it is not correctly detected. What should I do?
+
+A: Some file formats (often Microsoft Office documents) keep their signatures
+towards the end of the file. Try increasing the number of bytes used for detection
+with:
+```go
+mimetype.SetLimit(1024*1024) // Set limit to 1MB.
+// or
+mimetype.SetLimit(0) // No limit, whole file content used.
+mimetype.DetectFile("file.doc")
+```
+If increasing the limit does not help, please
+[open an issue](https://github.com/gabriel-vasile/mimetype/issues/new?assignees=&labels=&template=mismatched-mime-type-detected.md&title=).
+
+## Tests
+In addition to unit tests,
+[mimetype_tests](https://github.com/gabriel-vasile/mimetype_tests) compares the
+library with the [Unix file utility](https://en.wikipedia.org/wiki/File_(command))
+for around 50 000 sample files. Check the latest comparison results
+[here](https://github.com/gabriel-vasile/mimetype_tests/actions).
+
+## Benchmarks
+Benchmarks for each file format are performed when a PR is open. The results can
+be seen on the [workflows page](https://github.com/gabriel-vasile/mimetype/actions/workflows/benchmark.yml).
+Performance improvements are welcome but correctness is prioritized.
+
+## Structure
+**mimetype** uses a hierarchical structure to keep the MIME type detection logic.
+This reduces the number of calls needed for detecting the file type. The reason
+behind this choice is that there are file formats used as containers for other
+file formats. For example, Microsoft Office files are just zip archives,
+containing specific metadata files. Once a file has been identified as a
+zip, there is no need to check if it is a text file, but it is worth checking if
+it is an Microsoft Office file.
+
+To prevent loading entire files into memory, when detecting from a
+[reader](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectReader)
+or from a [file](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectFile)
+**mimetype** limits itself to reading only the header of the input.
+
+
+
+
+## Contributing
+Contributions are unexpected but welcome. When submitting a PR for detection of
+a new file format, please make sure to add a record to the list of testcases
+from [mimetype_test.go](mimetype_test.go). For complex files a record can be added
+in the [testdata](testdata) directory.
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go b/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go
new file mode 100644
index 0000000..8c5a05e
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/charset/charset.go
@@ -0,0 +1,283 @@
+package charset
+
+import (
+ "bytes"
+ "unicode/utf8"
+
+ "github.com/gabriel-vasile/mimetype/internal/markup"
+ "github.com/gabriel-vasile/mimetype/internal/scan"
+)
+
+const (
+ F = 0 /* character never appears in text */
+ T = 1 /* character appears in plain ASCII text */
+ I = 2 /* character appears in ISO-8859 text */
+ X = 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+)
+
+var (
+ boms = []struct {
+ bom []byte
+ enc string
+ }{
+ {[]byte{0xEF, 0xBB, 0xBF}, "utf-8"},
+ {[]byte{0x00, 0x00, 0xFE, 0xFF}, "utf-32be"},
+ {[]byte{0xFF, 0xFE, 0x00, 0x00}, "utf-32le"},
+ {[]byte{0xFE, 0xFF}, "utf-16be"},
+ {[]byte{0xFF, 0xFE}, "utf-16le"},
+ }
+
+ // https://github.com/file/file/blob/fa93fb9f7d21935f1c7644c47d2975d31f12b812/src/encoding.c#L241
+ textChars = [256]byte{
+ /* BEL BS HT LF VT FF CR */
+ F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */
+ /* ESC */
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
+ /* NEL */
+ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xfX */
+ }
+)
+
+// FromBOM returns the charset declared in the BOM of content.
+func FromBOM(content []byte) string {
+ for _, b := range boms {
+ if bytes.HasPrefix(content, b.bom) {
+ return b.enc
+ }
+ }
+ return ""
+}
+
+// FromPlain returns the charset of a plain text. It relies on BOM presence
+// and it falls back on checking each byte in content.
+func FromPlain(content []byte) string {
+ if len(content) == 0 {
+ return ""
+ }
+ if cset := FromBOM(content); cset != "" {
+ return cset
+ }
+ origContent := content
+ // Try to detect UTF-8.
+ // First eliminate any partial rune at the end.
+ for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
+ b := content[i]
+ if b < 0x80 {
+ break
+ }
+ if utf8.RuneStart(b) {
+ content = content[:i]
+ break
+ }
+ }
+ hasHighBit := false
+ for _, c := range content {
+ if c >= 0x80 {
+ hasHighBit = true
+ break
+ }
+ }
+ if hasHighBit && utf8.Valid(content) {
+ return "utf-8"
+ }
+
+ // ASCII is a subset of UTF8. Follow W3C recommendation and replace with UTF8.
+ if ascii(origContent) {
+ return "utf-8"
+ }
+
+ return latin(origContent)
+}
+
+func latin(content []byte) string {
+ hasControlBytes := false
+ for _, b := range content {
+ t := textChars[b]
+ if t != T && t != I {
+ return ""
+ }
+ if b >= 0x80 && b <= 0x9F {
+ hasControlBytes = true
+ }
+ }
+ // Code range 0x80 to 0x9F is reserved for control characters in ISO-8859-1
+ // (so-called C1 Controls). Windows 1252, however, has printable punctuation
+ // characters in this range.
+ if hasControlBytes {
+ return "windows-1252"
+ }
+ return "iso-8859-1"
+}
+
+func ascii(content []byte) bool {
+ for _, b := range content {
+ if textChars[b] != T {
+ return false
+ }
+ }
+ return true
+}
+
+// FromXML returns the charset of an XML document. It relies on the XML
+// header and falls back on the plain
+// text content.
+func FromXML(content []byte) string {
+ if cset := fromXML(content); cset != "" {
+ return cset
+ }
+ return FromPlain(content)
+}
+func fromXML(s scan.Bytes) string {
+ xml := []byte(" and falls back on the
+// plain text content.
+func FromHTML(content []byte) string {
+ if cset := FromBOM(content); cset != "" {
+ return cset
+ }
+ if cset := fromHTML(content); cset != "" {
+ return cset
+ }
+ return FromPlain(content)
+}
+
+func fromHTML(s scan.Bytes) string {
+ const (
+ dontKnow = iota
+ doNeedPragma
+ doNotNeedPragma
+ )
+ meta := []byte(" 0 && line[n-1] == '\r' {
+ return line[:n-1], false // drop \r at end of line
+ }
+
+ // This line is problematic. The logic from CountFields comes from
+ // encoding/csv.Reader which relies on mutating the input bytes.
+ // https://github.com/golang/go/blob/b3251514531123d7fd007682389bce7428d159a0/src/encoding/csv/reader.go#L275-L279
+ // To avoid mutating the input, we return cutShort. #680
+ if n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
+ return line[:n-2], true
+ }
+ return line, false
+}
+
+// CountFields reads one CSV line and counts how many records that line contained.
+// hasMore reports whether there are more lines in the input.
+// collectIndexes makes CountFields return a list of indexes where CSV fields
+// start in the line. These indexes are used to test the correctness against the
+// encoding/csv parser.
+func (r *Parser) CountFields(collectIndexes bool) (fields int, fieldPos []int, hasMore bool) {
+ finished := false
+ var line scan.Bytes
+ cutShort := false
+ for {
+ line, cutShort = r.readLine()
+ if finished {
+ return 0, nil, false
+ }
+ finished = len(r.s) == 0 && len(line) == 0
+ if len(line) == lengthNL(line) {
+ line = nil
+ continue // Skip empty lines.
+ }
+ if len(line) > 0 && line[0] == r.comment {
+ line = nil
+ continue
+ }
+ break
+ }
+
+ indexes := []int{}
+ originalLine := line
+parseField:
+ for {
+ if len(line) == 0 || line[0] != '"' { // non-quoted string field
+ fields++
+ if collectIndexes {
+ indexes = append(indexes, len(originalLine)-len(line))
+ }
+ i := bytes.IndexByte(line, r.comma)
+ if i >= 0 {
+ line.Advance(i + 1) // 1 to get over ending comma
+ continue parseField
+ }
+ break parseField
+ } else { // Quoted string field.
+ if collectIndexes {
+ indexes = append(indexes, len(originalLine)-len(line))
+ }
+ line.Advance(1) // get over starting quote
+ for {
+ i := bytes.IndexByte(line, '"')
+ if i >= 0 {
+ line.Advance(i + 1) // 1 for ending quote
+ switch rn := line.Peek(); {
+ case rn == '"':
+ line.Advance(1)
+ case rn == r.comma:
+ line.Advance(1)
+ fields++
+ continue parseField
+ case lengthNL(line) == len(line):
+ fields++
+ break parseField
+ }
+ } else if len(line) > 0 || cutShort {
+ line, cutShort = r.readLine()
+ originalLine = line
+ } else {
+ fields++
+ break parseField
+ }
+ }
+ }
+ }
+
+ return fields, indexes, fields != 0
+}
+
+// lengthNL reports the number of bytes for the trailing \n.
+func lengthNL(b []byte) int {
+ if len(b) > 0 && b[len(b)-1] == '\n' {
+ return 1
+ }
+ return 0
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go
new file mode 100644
index 0000000..4bc8617
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/parser.go
@@ -0,0 +1,478 @@
+package json
+
+import (
+ "bytes"
+ "sync"
+)
+
+const (
+ QueryNone = "json"
+ QueryGeo = "geo"
+ QueryHAR = "har"
+ QueryGLTF = "gltf"
+ maxRecursion = 4096
+)
+
+var queries = map[string][]query{
+ QueryNone: nil,
+ QueryGeo: {{
+ SearchPath: [][]byte{[]byte("type")},
+ SearchVals: [][]byte{
+ []byte(`"Feature"`),
+ []byte(`"FeatureCollection"`),
+ []byte(`"Point"`),
+ []byte(`"LineString"`),
+ []byte(`"Polygon"`),
+ []byte(`"MultiPoint"`),
+ []byte(`"MultiLineString"`),
+ []byte(`"MultiPolygon"`),
+ []byte(`"GeometryCollection"`),
+ },
+ }},
+ QueryHAR: {{
+ SearchPath: [][]byte{[]byte("log"), []byte("version")},
+ }, {
+ SearchPath: [][]byte{[]byte("log"), []byte("creator")},
+ }, {
+ SearchPath: [][]byte{[]byte("log"), []byte("entries")},
+ }},
+ QueryGLTF: {{
+ SearchPath: [][]byte{[]byte("asset"), []byte("version")},
+ SearchVals: [][]byte{[]byte(`"1.0"`), []byte(`"2.0"`)},
+ }},
+}
+
+var parserPool = sync.Pool{
+ New: func() any {
+ return &parserState{maxRecursion: maxRecursion}
+ },
+}
+
+// parserState holds the state of JSON parsing. The number of inspected bytes,
+// the current path inside the JSON object, etc.
+type parserState struct {
+ // ib represents the number of inspected bytes.
+ // Because mimetype limits itself to only reading the header of the file,
+ // it means sometimes the input JSON can be truncated. In that case, we want
+ // to still detect it as JSON, even if it's invalid/truncated.
+ // When ib == len(input) it means the JSON was valid (at least the header).
+ ib int
+ maxRecursion int
+ // currPath keeps a track of the JSON keys parsed up.
+ // It works only for JSON objects. JSON arrays are ignored
+ // mainly because the functionality is not needed.
+ currPath [][]byte
+ // firstToken stores the first JSON token encountered in input.
+ // TODO: performance would be better if we would stop parsing as soon
+ // as we see that first token is not what we are interested in.
+ firstToken int
+ // querySatisfied is true if both path and value of any queries passed to
+ // consumeAny are satisfied.
+ querySatisfied bool
+}
+
+// query holds information about a combination of {"key": "val"} that we're trying
+// to search for inside the JSON.
+type query struct {
+ // SearchPath represents the whole path to look for inside the JSON.
+ // ex: [][]byte{[]byte("foo"), []byte("bar")} matches {"foo": {"bar": "baz"}}
+ SearchPath [][]byte
+ // SearchVals represents values to look for when the SearchPath is found.
+ // Each SearchVal element is tried until one of them matches (logical OR.)
+ SearchVals [][]byte
+}
+
+func eq(path1, path2 [][]byte) bool {
+ if len(path1) != len(path2) {
+ return false
+ }
+ for i := range path1 {
+ if !bytes.Equal(path1[i], path2[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+// LooksLikeObjectOrArray reports if first non white space character from raw
+// is either { or [. Parsing raw as JSON is a heavy operation. When receiving some
+// text input we can skip parsing if the input does not even look like JSON.
+func LooksLikeObjectOrArray(raw []byte) bool {
+ for i := range raw {
+ if isSpace(raw[i]) {
+ continue
+ }
+ return raw[i] == '{' || raw[i] == '['
+ }
+
+ return false
+}
+
+// Parse will take out a parser from the pool depending on queryType and tries
+// to parse raw bytes as JSON.
+func Parse(queryType string, raw []byte) (parsed, inspected, firstToken int, querySatisfied bool) {
+ p := parserPool.Get().(*parserState)
+ defer func() {
+ // Avoid hanging on to too much memory in extreme input cases.
+ if len(p.currPath) > 128 {
+ p.currPath = nil
+ }
+ parserPool.Put(p)
+ }()
+ p.reset()
+
+ qs := queries[queryType]
+ got := p.consumeAny(raw, qs, 0)
+ return got, p.ib, p.firstToken, p.querySatisfied
+}
+
+func (p *parserState) reset() {
+ p.ib = 0
+ p.currPath = p.currPath[0:0]
+ p.firstToken = TokInvalid
+ p.querySatisfied = false
+}
+
+func (p *parserState) consumeSpace(b []byte) (n int) {
+ for len(b) > 0 && isSpace(b[0]) {
+ b = b[1:]
+ n++
+ p.ib++
+ }
+ return n
+}
+
+func (p *parserState) consumeConst(b, cnst []byte) int {
+ lb := len(b)
+ for i, c := range cnst {
+ if lb > i && b[i] == c {
+ p.ib++
+ } else {
+ return 0
+ }
+ }
+ return len(cnst)
+}
+
+func (p *parserState) consumeString(b []byte) (n int) {
+ var c byte
+ for len(b[n:]) > 0 {
+ c, n = b[n], n+1
+ p.ib++
+ switch c {
+ case '\\':
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ switch b[n] {
+ case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+ n++
+ p.ib++
+ continue
+ case 'u':
+ n++
+ p.ib++
+ for j := 0; j < 4 && len(b[n:]) > 0; j++ {
+ if !isXDigit(b[n]) {
+ return 0
+ }
+ n++
+ p.ib++
+ }
+ continue
+ default:
+ return 0
+ }
+ case '"':
+ return n
+ default:
+ continue
+ }
+ }
+ return 0
+}
+
+func (p *parserState) consumeNumber(b []byte) (n int) {
+ got := false
+ var i int
+
+ if len(b) == 0 {
+ goto out
+ }
+ if b[0] == '-' {
+ b, i = b[1:], i+1
+ p.ib++
+ }
+
+ for len(b) > 0 {
+ if !isDigit(b[0]) {
+ break
+ }
+ got = true
+ b, i = b[1:], i+1
+ p.ib++
+ }
+ if len(b) == 0 {
+ goto out
+ }
+ if b[0] == '.' {
+ b, i = b[1:], i+1
+ p.ib++
+ }
+ for len(b) > 0 {
+ if !isDigit(b[0]) {
+ break
+ }
+ got = true
+ b, i = b[1:], i+1
+ p.ib++
+ }
+ if len(b) == 0 {
+ goto out
+ }
+ if got && (b[0] == 'e' || b[0] == 'E') {
+ b, i = b[1:], i+1
+ p.ib++
+ got = false
+ if len(b) == 0 {
+ goto out
+ }
+ if b[0] == '+' || b[0] == '-' {
+ b, i = b[1:], i+1
+ p.ib++
+ }
+ for len(b) > 0 {
+ if !isDigit(b[0]) {
+ break
+ }
+ got = true
+ b, i = b[1:], i+1
+ p.ib++
+ }
+ }
+out:
+ if got {
+ return i
+ }
+ return 0
+}
+
+func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) {
+ p.appendPath([]byte{'['}, qs)
+ if len(b) == 0 {
+ return 0
+ }
+
+ for n < len(b) {
+ n += p.consumeSpace(b[n:])
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ if b[n] == ']' {
+ p.ib++
+ p.popLastPath(qs)
+ return n + 1
+ }
+ innerParsed := p.consumeAny(b[n:], qs, lvl)
+ if innerParsed == 0 {
+ return 0
+ }
+ n += innerParsed
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ switch b[n] {
+ case ',':
+ n += 1
+ p.ib++
+ continue
+ case ']':
+ p.ib++
+ return n + 1
+ default:
+ return 0
+ }
+ }
+ return 0
+}
+
+func queryPathMatch(qs []query, path [][]byte) int {
+ for i := range qs {
+ if eq(qs[i].SearchPath, path) {
+ return i
+ }
+ }
+ return -1
+}
+
+// appendPath will append a path fragment if queries is not empty.
+// If we don't need query functionality (just checking if a JSON is valid),
+// then we can skip keeping track of the path we're currently in.
+func (p *parserState) appendPath(path []byte, qs []query) {
+ if len(qs) != 0 {
+ p.currPath = append(p.currPath, path)
+ }
+}
+func (p *parserState) popLastPath(qs []query) {
+ if len(qs) != 0 {
+ p.currPath = p.currPath[:len(p.currPath)-1]
+ }
+}
+
+func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
+ for n < len(b) {
+ n += p.consumeSpace(b[n:])
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ if b[n] == '}' {
+ p.ib++
+ return n + 1
+ }
+ if b[n] != '"' {
+ return 0
+ } else {
+ n += 1
+ p.ib++
+ }
+ // queryMatched stores the index of the query satisfying the current path.
+ queryMatched := -1
+ if keyLen := p.consumeString(b[n:]); keyLen == 0 {
+ return 0
+ } else {
+ p.appendPath(b[n:n+keyLen-1], qs)
+ if !p.querySatisfied {
+ queryMatched = queryPathMatch(qs, p.currPath)
+ }
+ n += keyLen
+ }
+ n += p.consumeSpace(b[n:])
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ if b[n] != ':' {
+ return 0
+ } else {
+ n += 1
+ p.ib++
+ }
+ n += p.consumeSpace(b[n:])
+ if len(b[n:]) == 0 {
+ return 0
+ }
+
+ if valLen := p.consumeAny(b[n:], qs, lvl); valLen == 0 {
+ return 0
+ } else {
+ if queryMatched != -1 {
+ q := qs[queryMatched]
+ if len(q.SearchVals) == 0 {
+ p.querySatisfied = true
+ }
+ for _, val := range q.SearchVals {
+ if bytes.Equal(val, bytes.TrimSpace(b[n:n+valLen])) {
+ p.querySatisfied = true
+ }
+ }
+ }
+ n += valLen
+ }
+ if len(b[n:]) == 0 {
+ return 0
+ }
+ switch b[n] {
+ case ',':
+ p.popLastPath(qs)
+ n++
+ p.ib++
+ continue
+ case '}':
+ p.popLastPath(qs)
+ p.ib++
+ return n + 1
+ default:
+ return 0
+ }
+ }
+ return 0
+}
+
+func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) {
+ // Avoid too much recursion.
+ if p.maxRecursion != 0 && lvl > p.maxRecursion {
+ return 0
+ }
+ if len(qs) == 0 {
+ p.querySatisfied = true
+ }
+ n += p.consumeSpace(b)
+ if len(b[n:]) == 0 {
+ return 0
+ }
+
+ var t, rv int
+ switch b[n] {
+ case '"':
+ n++
+ p.ib++
+ rv = p.consumeString(b[n:])
+ t = TokString
+ case '[':
+ n++
+ p.ib++
+ rv = p.consumeArray(b[n:], qs, lvl+1)
+ t = TokArray
+ case '{':
+ n++
+ p.ib++
+ rv = p.consumeObject(b[n:], qs, lvl+1)
+ t = TokObject
+ case 't':
+ rv = p.consumeConst(b[n:], []byte("true"))
+ t = TokTrue
+ case 'f':
+ rv = p.consumeConst(b[n:], []byte("false"))
+ t = TokFalse
+ case 'n':
+ rv = p.consumeConst(b[n:], []byte("null"))
+ t = TokNull
+ default:
+ rv = p.consumeNumber(b[n:])
+ t = TokNumber
+ }
+ if lvl == 0 {
+ p.firstToken = t
+ }
+ if rv <= 0 {
+ return n
+ }
+ n += rv
+ n += p.consumeSpace(b[n:])
+ return n
+}
+
+func isSpace(c byte) bool {
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n'
+}
+func isDigit(c byte) bool {
+ return '0' <= c && c <= '9'
+}
+
+func isXDigit(c byte) bool {
+ if isDigit(c) {
+ return true
+ }
+ return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+const (
+ TokInvalid = 0
+ TokNull = 1 << iota
+ TokTrue
+ TokFalse
+ TokNumber
+ TokString
+ TokArray
+ TokObject
+ TokComma
+)
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go
new file mode 100644
index 0000000..dd7f241
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go
@@ -0,0 +1,163 @@
+package magic
+
+import (
+ "bytes"
+ "encoding/binary"
+)
+
+var (
+ // SevenZ matches a 7z archive.
+ SevenZ = prefix([]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
+ // Gzip matches gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
+ Gzip = prefix([]byte{0x1f, 0x8b})
+ // Fits matches an Flexible Image Transport System file.
+ Fits = prefix([]byte{
+ 0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
+ })
+ // Xar matches an eXtensible ARchive format file.
+ Xar = prefix([]byte{0x78, 0x61, 0x72, 0x21})
+ // Bz2 matches a bzip2 file.
+ Bz2 = prefix([]byte{0x42, 0x5A, 0x68})
+ // Ar matches an ar (Unix) archive file.
+ Ar = prefix([]byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
+ // Deb matches a Debian package file.
+ Deb = offset([]byte{
+ 0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
+ 0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
+ }, 8)
+ // Warc matches a Web ARChive file.
+ Warc = prefix([]byte("WARC/1.0"), []byte("WARC/1.1"))
+ // Cab matches a Microsoft Cabinet archive file.
+ Cab = prefix([]byte("MSCF\x00\x00\x00\x00"))
+ // Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
+ Xz = prefix([]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
+ // Lzip matches an Lzip compressed file.
+ Lzip = prefix([]byte{0x4c, 0x5a, 0x49, 0x50})
+ // RPM matches an RPM or Delta RPM package file.
+ RPM = prefix([]byte{0xed, 0xab, 0xee, 0xdb}, []byte("drpm"))
+ // Cpio matches a cpio archive file.
+ Cpio = prefix([]byte("070707"), []byte("070701"), []byte("070702"))
+ // RAR matches a RAR archive file.
+ RAR = prefix([]byte("Rar!\x1A\x07\x00"), []byte("Rar!\x1A\x07\x01\x00"))
+)
+
+// InstallShieldCab matches an InstallShield Cabinet archive file.
+func InstallShieldCab(raw []byte, _ uint32) bool {
+ return len(raw) > 7 &&
+ bytes.Equal(raw[0:4], []byte("ISc(")) &&
+ raw[6] == 0 &&
+ (raw[7] == 1 || raw[7] == 2 || raw[7] == 4)
+}
+
+// Zstd matches a Zstandard archive file.
+// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+func Zstd(raw []byte, limit uint32) bool {
+ if len(raw) < 4 {
+ return false
+ }
+ sig := binary.LittleEndian.Uint32(raw)
+ // Check for Zstandard frames and skippable frames.
+ return (sig >= 0xFD2FB522 && sig <= 0xFD2FB528) ||
+ (sig >= 0x184D2A50 && sig <= 0x184D2A5F)
+}
+
+// CRX matches a Chrome extension file: a zip archive prepended by a package header.
+func CRX(raw []byte, limit uint32) bool {
+ const minHeaderLen = 16
+ if len(raw) < minHeaderLen || !bytes.HasPrefix(raw, []byte("Cr24")) {
+ return false
+ }
+ pubkeyLen := binary.LittleEndian.Uint32(raw[8:12])
+ sigLen := binary.LittleEndian.Uint32(raw[12:16])
+ zipOffset := minHeaderLen + pubkeyLen + sigLen
+ if uint32(len(raw)) < zipOffset {
+ return false
+ }
+ return Zip(raw[zipOffset:], limit)
+}
+
+// Tar matches a (t)ape (ar)chive file.
+// Tar files are divided into 512 bytes records. First record contains a 257
+// bytes header padded with NUL.
+func Tar(raw []byte, _ uint32) bool {
+ const sizeRecord = 512
+
+ // The structure of a tar header:
+ // type TarHeader struct {
+ // Name [100]byte
+ // Mode [8]byte
+ // Uid [8]byte
+ // Gid [8]byte
+ // Size [12]byte
+ // Mtime [12]byte
+ // Chksum [8]byte
+ // Linkflag byte
+ // Linkname [100]byte
+ // Magic [8]byte
+ // Uname [32]byte
+ // Gname [32]byte
+ // Devmajor [8]byte
+ // Devminor [8]byte
+ // }
+
+ if len(raw) < sizeRecord {
+ return false
+ }
+ raw = raw[:sizeRecord]
+
+ // First 100 bytes of the header represent the file name.
+ // Check if file looks like Gentoo GLEP binary package.
+ if bytes.Contains(raw[:100], []byte("/gpkg-1\x00")) {
+ return false
+ }
+
+ // Get the checksum recorded into the file.
+ recsum := tarParseOctal(raw[148:156])
+ if recsum == -1 {
+ return false
+ }
+ sum1, sum2 := tarChksum(raw)
+ return recsum == sum1 || recsum == sum2
+}
+
+// tarParseOctal converts octal string to decimal int.
+func tarParseOctal(b []byte) int64 {
+ // Because unused fields are filled with NULs, we need to skip leading NULs.
+ // Fields may also be padded with spaces or NULs.
+ // So we remove leading and trailing NULs and spaces to be sure.
+ b = bytes.Trim(b, " \x00")
+
+ if len(b) == 0 {
+ return -1
+ }
+ ret := int64(0)
+ for _, b := range b {
+ if b == 0 {
+ break
+ }
+ if b < '0' || b > '7' {
+ return -1
+ }
+ ret = (ret << 3) | int64(b-'0')
+ }
+ return ret
+}
+
+// tarChksum computes the checksum for the header block b.
+// The actual checksum is written to same b block after it has been calculated.
+// Before calculation the bytes from b reserved for checksum have placeholder
+// value of ASCII space 0x20.
+// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
+// signed byte values. We compute and return both.
+func tarChksum(b []byte) (unsigned, signed int64) {
+ for i, c := range b {
+ if 148 <= i && i < 156 {
+ c = ' ' // Treat the checksum field itself as all spaces.
+ }
+ unsigned += int64(c)
+ signed += int64(int8(c))
+ }
+ return unsigned, signed
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/audio.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/audio.go
new file mode 100644
index 0000000..d17e324
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/audio.go
@@ -0,0 +1,76 @@
+package magic
+
+import (
+ "bytes"
+ "encoding/binary"
+)
+
+var (
+ // Flac matches a Free Lossless Audio Codec file.
+ Flac = prefix([]byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
+ // Midi matches a Musical Instrument Digital Interface file.
+ Midi = prefix([]byte("\x4D\x54\x68\x64"))
+ // Ape matches a Monkey's Audio file.
+ Ape = prefix([]byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
+ // MusePack matches a Musepack file.
+ MusePack = prefix([]byte("MPCK"))
+ // Au matches a Sun Microsystems au file.
+ Au = prefix([]byte("\x2E\x73\x6E\x64"))
+ // Amr matches an Adaptive Multi-Rate file.
+ Amr = prefix([]byte("\x23\x21\x41\x4D\x52"))
+ // Voc matches a Creative Voice file.
+ Voc = prefix([]byte("Creative Voice File"))
+ // M3u matches a Playlist file.
+ M3u = prefix([]byte("#EXTM3U"))
+ // AAC matches an Advanced Audio Coding file.
+ AAC = prefix([]byte{0xFF, 0xF1}, []byte{0xFF, 0xF9})
+)
+
+// Mp3 matches an mp3 file.
+func Mp3(raw []byte, limit uint32) bool {
+ if len(raw) < 3 {
+ return false
+ }
+
+ if bytes.HasPrefix(raw, []byte("ID3")) {
+ // MP3s with an ID3v2 tag will start with "ID3"
+ // ID3v1 tags, however appear at the end of the file.
+ return true
+ }
+
+ // Match MP3 files without tags
+ switch binary.BigEndian.Uint16(raw[:2]) & 0xFFFE {
+ case 0xFFFA:
+ // MPEG ADTS, layer III, v1
+ return true
+ case 0xFFF2:
+ // MPEG ADTS, layer III, v2
+ return true
+ case 0xFFE2:
+ // MPEG ADTS, layer III, v2.5
+ return true
+ }
+
+ return false
+}
+
+// Wav matches a Waveform Audio File Format file.
+func Wav(raw []byte, limit uint32) bool {
+ return len(raw) > 12 &&
+ bytes.Equal(raw[:4], []byte("RIFF")) &&
+ bytes.Equal(raw[8:12], []byte{0x57, 0x41, 0x56, 0x45})
+}
+
+// Aiff matches Audio Interchange File Format file.
+func Aiff(raw []byte, limit uint32) bool {
+ return len(raw) > 12 &&
+ bytes.Equal(raw[:4], []byte{0x46, 0x4F, 0x52, 0x4D}) &&
+ bytes.Equal(raw[8:12], []byte{0x41, 0x49, 0x46, 0x46})
+}
+
+// Qcp matches a Qualcomm Pure Voice file.
+func Qcp(raw []byte, limit uint32) bool {
+ return len(raw) > 12 &&
+ bytes.Equal(raw[:4], []byte("RIFF")) &&
+ bytes.Equal(raw[8:12], []byte("QLCM"))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go
new file mode 100644
index 0000000..70599b3
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/binary.go
@@ -0,0 +1,203 @@
+package magic
+
+import (
+ "bytes"
+ "debug/macho"
+ "encoding/binary"
+)
+
+var (
+ // Lnk matches Microsoft lnk binary format.
+ Lnk = prefix([]byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00})
+ // Wasm matches a web assembly File Format file.
+ Wasm = prefix([]byte{0x00, 0x61, 0x73, 0x6D})
+ // Exe matches a Windows/DOS executable file.
+ Exe = prefix([]byte{0x4D, 0x5A})
+ // Elf matches an Executable and Linkable Format file.
+ Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46})
+ // Nes matches a Nintendo Entertainment system ROM file.
+ Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A})
+ // SWF matches an Adobe Flash swf file.
+ SWF = prefix([]byte("CWS"), []byte("FWS"), []byte("ZWS"))
+ // Torrent has bencoded text in the beginning.
+ Torrent = prefix([]byte("d8:announce"))
+ // PAR1 matches a parquet file.
+ Par1 = prefix([]byte{0x50, 0x41, 0x52, 0x31})
+ // CBOR matches a Concise Binary Object Representation https://cbor.io/
+ CBOR = prefix([]byte{0xD9, 0xD9, 0xF7})
+)
+
+// Java bytecode and Mach-O binaries share the same magic number.
+// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
+func classOrMachOFat(in []byte) bool {
+ // There should be at least 8 bytes for both of them because the only way to
+ // quickly distinguish them is by comparing byte at position 7
+ if len(in) < 8 {
+ return false
+ }
+
+ return binary.BigEndian.Uint32(in) == macho.MagicFat
+}
+
+// Class matches a java class file.
+func Class(raw []byte, limit uint32) bool {
+ return classOrMachOFat(raw) && raw[7] > 30
+}
+
+// MachO matches Mach-O binaries format.
+func MachO(raw []byte, limit uint32) bool {
+ if classOrMachOFat(raw) && raw[7] < 0x14 {
+ return true
+ }
+
+ if len(raw) < 4 {
+ return false
+ }
+
+ be := binary.BigEndian.Uint32(raw)
+ le := binary.LittleEndian.Uint32(raw)
+
+ return be == macho.Magic32 ||
+ le == macho.Magic32 ||
+ be == macho.Magic64 ||
+ le == macho.Magic64
+}
+
+// Dbf matches a dBase file.
+// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
+func Dbf(raw []byte, limit uint32) bool {
+ if len(raw) < 68 {
+ return false
+ }
+
+ // 3rd and 4th bytes contain the last update month and day of month.
+ if raw[2] == 0 || raw[2] > 12 || raw[3] == 0 || raw[3] > 31 {
+ return false
+ }
+
+ // 12, 13, 30, 31 are reserved bytes and always filled with 0x00.
+ if raw[12] != 0x00 || raw[13] != 0x00 || raw[30] != 0x00 || raw[31] != 0x00 {
+ return false
+ }
+ // Production MDX flag;
+ // 0x01 if a production .MDX file exists for this table;
+ // 0x00 if no .MDX file exists.
+ if raw[28] > 0x01 {
+ return false
+ }
+
+ // dbf type is dictated by the first byte.
+ dbfTypes := []byte{
+ 0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
+ 0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
+ }
+ for _, b := range dbfTypes {
+ if raw[0] == b {
+ return true
+ }
+ }
+
+ return false
+}
+
+// ElfObj matches an object file.
+func ElfObj(raw []byte, limit uint32) bool {
+ return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) ||
+ (raw[16] == 0x00 && raw[17] == 0x01))
+}
+
+// ElfExe matches an executable file.
+func ElfExe(raw []byte, limit uint32) bool {
+ return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) ||
+ (raw[16] == 0x00 && raw[17] == 0x02))
+}
+
+// ElfLib matches a shared library file.
+func ElfLib(raw []byte, limit uint32) bool {
+ return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) ||
+ (raw[16] == 0x00 && raw[17] == 0x03))
+}
+
+// ElfDump matches a core dump file.
+func ElfDump(raw []byte, limit uint32) bool {
+ return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) ||
+ (raw[16] == 0x00 && raw[17] == 0x04))
+}
+
+// Dcm matches a DICOM medical format file.
+func Dcm(raw []byte, limit uint32) bool {
+ return len(raw) > 131 &&
+ bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
+}
+
+// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
+func Marc(raw []byte, limit uint32) bool {
+ // File is at least 24 bytes ("leader" field size).
+ if len(raw) < 24 {
+ return false
+ }
+
+ // Fixed bytes at offset 20.
+ if !bytes.Equal(raw[20:24], []byte("4500")) {
+ return false
+ }
+
+ // First 5 bytes are ASCII digits.
+ for i := 0; i < 5; i++ {
+ if raw[i] < '0' || raw[i] > '9' {
+ return false
+ }
+ }
+
+ // Field terminator is present in first 2048 bytes.
+ return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E})
+}
+
+// GLB matches a glTF model format file.
+// GLB is the binary file format representation of 3D models saved in
+// the GL transmission Format (glTF).
+// GLB uses little endian and its header structure is as follows:
+//
+// <-- 12-byte header -->
+// | magic | version | length |
+// | (uint32) | (uint32) | (uint32) |
+// | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... |
+// | g l T F | 1 | ... |
+//
+// Visit [glTF specification] and [IANA glTF entry] for more details.
+//
+// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
+// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary
+var GLB = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
+ []byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
+
+// TzIf matches a Time Zone Information Format (TZif) file.
+// See more: https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html#rfc.section.3
+// Its header structure is shown below:
+//
+// +---------------+---+
+// | magic (4) | <-+-- version (1)
+// +---------------+---+---------------------------------------+
+// | [unused - reserved for future use] (15) |
+// +---------------+---------------+---------------+-----------+
+// | isutccnt (4) | isstdcnt (4) | leapcnt (4) |
+// +---------------+---------------+---------------+
+// | timecnt (4) | typecnt (4) | charcnt (4) |
+func TzIf(raw []byte, limit uint32) bool {
+ // File is at least 44 bytes (header size).
+ if len(raw) < 44 {
+ return false
+ }
+
+ if !bytes.HasPrefix(raw, []byte("TZif")) {
+ return false
+ }
+
+ // Field "typecnt" MUST not be zero.
+ if binary.BigEndian.Uint32(raw[36:40]) == 0 {
+ return false
+ }
+
+ // Version has to be NUL (0x00), '2' (0x32) or '3' (0x33).
+ return raw[4] == 0x00 || raw[4] == 0x32 || raw[4] == 0x33
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/database.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/database.go
new file mode 100644
index 0000000..cb1fed1
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/database.go
@@ -0,0 +1,13 @@
+package magic
+
+var (
+ // Sqlite matches an SQLite database file.
+ Sqlite = prefix([]byte{
+ 0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
+ 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
+ })
+ // MsAccessAce matches Microsoft Access dababase file.
+ MsAccessAce = offset([]byte("Standard ACE DB"), 4)
+ // MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
+ MsAccessMdb = offset([]byte("Standard Jet DB"), 4)
+)
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go
new file mode 100644
index 0000000..7f9308d
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/document.go
@@ -0,0 +1,83 @@
+package magic
+
+import (
+ "bytes"
+ "encoding/binary"
+)
+
+var (
+ // Fdf matches a Forms Data Format file.
+ Fdf = prefix([]byte("%FDF"))
+ // Mobi matches a Mobi file.
+ Mobi = offset([]byte("BOOKMOBI"), 60)
+ // Lit matches a Microsoft Lit file.
+ Lit = prefix([]byte("ITOLITLS"))
+)
+
+// PDF matches a Portable Document Format file.
+// The %PDF- header should be the first thing inside the file but many
+// implementations don't follow the rule. The PDF spec at Appendix H says the
+// signature can be prepended by anything.
+// https://bugs.astron.com/view.php?id=446
+func PDF(raw []byte, _ uint32) bool {
+ raw = raw[:min(len(raw), 1024)]
+ return bytes.Contains(raw, []byte("%PDF-"))
+}
+
+// DjVu matches a DjVu file.
+func DjVu(raw []byte, _ uint32) bool {
+ if len(raw) < 12 {
+ return false
+ }
+ if !bytes.HasPrefix(raw, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
+ return false
+ }
+ return bytes.HasPrefix(raw[12:], []byte("DJVM")) ||
+ bytes.HasPrefix(raw[12:], []byte("DJVU")) ||
+ bytes.HasPrefix(raw[12:], []byte("DJVI")) ||
+ bytes.HasPrefix(raw[12:], []byte("THUM"))
+}
+
+// P7s matches an .p7s signature File (PEM, Base64).
+func P7s(raw []byte, _ uint32) bool {
+ // Check for PEM Encoding.
+ if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) {
+ return true
+ }
+ // Check if DER Encoding is long enough.
+ if len(raw) < 20 {
+ return false
+ }
+ // Magic Bytes for the signedData ASN.1 encoding.
+ startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}}
+ signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07}
+ // Check if Header is correct. There are multiple valid headers.
+ for i, match := range startHeader {
+ // If first bytes match, then check for ASN.1 Object Type.
+ if bytes.HasPrefix(raw, match) {
+ if bytes.HasPrefix(raw[i+2:], signedDataMatch) {
+ return true
+ }
+ }
+ }
+
+ return false
+}
+
+// Lotus123 matches a Lotus 1-2-3 spreadsheet document.
+func Lotus123(raw []byte, _ uint32) bool {
+ if len(raw) <= 20 {
+ return false
+ }
+ version := binary.BigEndian.Uint32(raw)
+ if version == 0x00000200 {
+ return raw[6] != 0 && raw[7] == 0
+ }
+
+ return version == 0x00001a00 && raw[20] > 0 && raw[20] < 32
+}
+
+// CHM matches a Microsoft Compiled HTML Help file.
+func CHM(raw []byte, _ uint32) bool {
+ return bytes.HasPrefix(raw, []byte("ITSF\003\000\000\000\x60\000\000\000"))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/font.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/font.go
new file mode 100644
index 0000000..43af282
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/font.go
@@ -0,0 +1,39 @@
+package magic
+
+import (
+ "bytes"
+)
+
+var (
+ // Woff matches a Web Open Font Format file.
+ Woff = prefix([]byte("wOFF"))
+ // Woff2 matches a Web Open Font Format version 2 file.
+ Woff2 = prefix([]byte("wOF2"))
+ // Otf matches an OpenType font file.
+ Otf = prefix([]byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
+)
+
+// Ttf matches a TrueType font file.
+func Ttf(raw []byte, limit uint32) bool {
+ if !bytes.HasPrefix(raw, []byte{0x00, 0x01, 0x00, 0x00}) {
+ return false
+ }
+ return !MsAccessAce(raw, limit) && !MsAccessMdb(raw, limit)
+}
+
+// Eot matches an Embedded OpenType font file.
+func Eot(raw []byte, limit uint32) bool {
+ return len(raw) > 35 &&
+ bytes.Equal(raw[34:36], []byte{0x4C, 0x50}) &&
+ (bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x01}) ||
+ bytes.Equal(raw[8:11], []byte{0x01, 0x00, 0x00}) ||
+ bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x02}))
+}
+
+// Ttc matches a TrueType Collection font file.
+func Ttc(raw []byte, limit uint32) bool {
+ return len(raw) > 7 &&
+ bytes.HasPrefix(raw, []byte("ttcf")) &&
+ (bytes.Equal(raw[4:8], []byte{0x00, 0x01, 0x00, 0x00}) ||
+ bytes.Equal(raw[4:8], []byte{0x00, 0x02, 0x00, 0x00}))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go
new file mode 100644
index 0000000..ac72713
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ftyp.go
@@ -0,0 +1,109 @@
+package magic
+
+import (
+ "bytes"
+)
+
+var (
+ // AVIF matches an AV1 Image File Format still or animated.
+ // Wikipedia page seems outdated listing image/avif-sequence for animations.
+ // https://github.com/AOMediaCodec/av1-avif/issues/59
+ AVIF = ftyp([]byte("avif"), []byte("avis"))
+ // ThreeGP matches a 3GPP file.
+ ThreeGP = ftyp(
+ []byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"),
+ []byte("3gp5"), []byte("3gp6"), []byte("3gp7"), []byte("3gs7"),
+ []byte("3ge6"), []byte("3ge7"), []byte("3gg6"),
+ )
+ // ThreeG2 matches a 3GPP2 file.
+ ThreeG2 = ftyp(
+ []byte("3g24"), []byte("3g25"), []byte("3g26"), []byte("3g2a"),
+ []byte("3g2b"), []byte("3g2c"), []byte("KDDI"),
+ )
+ // AMp4 matches an audio MP4 file.
+ AMp4 = ftyp(
+ // audio for Adobe Flash Player 9+
+ []byte("F4A "), []byte("F4B "),
+ // Apple iTunes AAC-LC (.M4A) Audio
+ []byte("M4B "), []byte("M4P "),
+ // MPEG-4 (.MP4) for SonyPSP
+ []byte("MSNV"),
+ // Nero Digital AAC Audio
+ []byte("NDAS"),
+ )
+ // Mqv matches a Sony / Mobile QuickTime file.
+ Mqv = ftyp([]byte("mqt "))
+ // M4a matches an audio M4A file.
+ M4a = ftyp([]byte("M4A "))
+ // M4v matches an Appl4 M4V video file.
+ M4v = ftyp([]byte("M4V "), []byte("M4VH"), []byte("M4VP"))
+ // Heic matches a High Efficiency Image Coding (HEIC) file.
+ Heic = ftyp([]byte("heic"), []byte("heix"))
+ // HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
+ HeicSequence = ftyp([]byte("hevc"), []byte("hevx"))
+ // Heif matches a High Efficiency Image File Format (HEIF) file.
+ Heif = ftyp([]byte("mif1"), []byte("heim"), []byte("heis"), []byte("avic"))
+ // HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
+ HeifSequence = ftyp([]byte("msf1"), []byte("hevm"), []byte("hevs"), []byte("avcs"))
+ // Mj2 matches a Motion JPEG 2000 file: https://en.wikipedia.org/wiki/Motion_JPEG_2000.
+ Mj2 = ftyp([]byte("mj2s"), []byte("mjp2"), []byte("MFSM"), []byte("MGSV"))
+ // Dvb matches a Digital Video Broadcasting file: https://dvb.org.
+ // https://cconcolato.github.io/mp4ra/filetype.html
+ // https://github.com/file/file/blob/512840337ead1076519332d24fefcaa8fac36e06/magic/Magdir/animation#L135-L154
+ Dvb = ftyp(
+ []byte("dby1"), []byte("dsms"), []byte("dts1"), []byte("dts2"),
+ []byte("dts3"), []byte("dxo "), []byte("dmb1"), []byte("dmpf"),
+ []byte("drc1"), []byte("dv1a"), []byte("dv1b"), []byte("dv2a"),
+ []byte("dv2b"), []byte("dv3a"), []byte("dv3b"), []byte("dvr1"),
+ []byte("dvt1"), []byte("emsg"))
+ // TODO: add support for remaining video formats at ftyps.com.
+)
+
+// QuickTime matches a QuickTime File Format file.
+// https://www.loc.gov/preservation/digital/formats/fdd/fdd000052.shtml
+// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap1/qtff1.html#//apple_ref/doc/uid/TP40000939-CH203-38190
+// https://github.com/apache/tika/blob/0f5570691133c75ac4472c3340354a6c4080b104/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml#L7758-L7777
+func QuickTime(raw []byte, _ uint32) bool {
+ if len(raw) < 12 {
+ return false
+ }
+ // First 4 bytes represent the size of the atom as unsigned int.
+ // Next 4 bytes are the type of the atom.
+ // For `ftyp` atoms check if first byte in size is 0, otherwise, a text file
+ // which happens to contain 'ftypqt ' at index 4 will trigger a false positive.
+ if bytes.Equal(raw[4:12], []byte("ftypqt ")) ||
+ bytes.Equal(raw[4:12], []byte("ftypmoov")) {
+ return raw[0] == 0x00
+ }
+ basicAtomTypes := [][]byte{
+ []byte("moov\x00"),
+ []byte("mdat\x00"),
+ []byte("free\x00"),
+ []byte("skip\x00"),
+ []byte("pnot\x00"),
+ }
+ for _, a := range basicAtomTypes {
+ if bytes.Equal(raw[4:9], a) {
+ return true
+ }
+ }
+ return bytes.Equal(raw[:8], []byte("\x00\x00\x00\x08wide"))
+}
+
+// Mp4 detects an .mp4 file. Mp4 detections only does a basic ftyp check.
+// Mp4 has many registered and unregistered code points so it's hard to keep track
+// of all. Detection will default on video/mp4 for all ftyp files.
+// ISO_IEC_14496-12 is the specification for the iso container.
+func Mp4(raw []byte, _ uint32) bool {
+ if len(raw) < 12 {
+ return false
+ }
+ // ftyps are made out of boxes. The first 4 bytes of the box represent
+ // its size in big-endian uint32. First box is the ftyp box and it is small
+ // in size. Check most significant byte is 0 to filter out false positive
+ // text files that happen to contain the string "ftyp" at index 4.
+ if raw[0] != 0 {
+ return false
+ }
+ return bytes.Equal(raw[4:8], []byte("ftyp"))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go
new file mode 100644
index 0000000..cade91f
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/geo.go
@@ -0,0 +1,55 @@
+package magic
+
+import (
+ "bytes"
+ "encoding/binary"
+)
+
+// Shp matches a shape format file.
+// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
+func Shp(raw []byte, limit uint32) bool {
+ if len(raw) < 112 {
+ return false
+ }
+
+ if binary.BigEndian.Uint32(raw[0:4]) != 9994 ||
+ binary.BigEndian.Uint32(raw[4:8]) != 0 ||
+ binary.BigEndian.Uint32(raw[8:12]) != 0 ||
+ binary.BigEndian.Uint32(raw[12:16]) != 0 ||
+ binary.BigEndian.Uint32(raw[16:20]) != 0 ||
+ binary.BigEndian.Uint32(raw[20:24]) != 0 ||
+ binary.LittleEndian.Uint32(raw[28:32]) != 1000 {
+ return false
+ }
+
+ shapeTypes := []int{
+ 0, // Null shape
+ 1, // Point
+ 3, // Polyline
+ 5, // Polygon
+ 8, // MultiPoint
+ 11, // PointZ
+ 13, // PolylineZ
+ 15, // PolygonZ
+ 18, // MultiPointZ
+ 21, // PointM
+ 23, // PolylineM
+ 25, // PolygonM
+ 28, // MultiPointM
+ 31, // MultiPatch
+ }
+
+ for _, st := range shapeTypes {
+ if st == int(binary.LittleEndian.Uint32(raw[108:112])) {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Shx matches a shape index format file.
+// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
+func Shx(raw []byte, limit uint32) bool {
+ return bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x27, 0x0A})
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/image.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/image.go
new file mode 100644
index 0000000..0eb7e95
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/image.go
@@ -0,0 +1,110 @@
+package magic
+
+import "bytes"
+
+var (
+ // Png matches a Portable Network Graphics file.
+ // https://www.w3.org/TR/PNG/
+ Png = prefix([]byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
+ // Apng matches an Animated Portable Network Graphics file.
+ // https://wiki.mozilla.org/APNG_Specification
+ Apng = offset([]byte("acTL"), 37)
+ // Jpg matches a Joint Photographic Experts Group file.
+ Jpg = prefix([]byte{0xFF, 0xD8, 0xFF})
+ // Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
+ Jp2 = jpeg2k([]byte{0x6a, 0x70, 0x32, 0x20})
+ // Jpx matches a JPEG 2000 Image file (ISO 15444-2).
+ Jpx = jpeg2k([]byte{0x6a, 0x70, 0x78, 0x20})
+ // Jpm matches a JPEG 2000 Image file (ISO 15444-6).
+ Jpm = jpeg2k([]byte{0x6a, 0x70, 0x6D, 0x20})
+ // Gif matches a Graphics Interchange Format file.
+ Gif = prefix([]byte("GIF87a"), []byte("GIF89a"))
+ // Bmp matches a bitmap image file.
+ Bmp = prefix([]byte{0x42, 0x4D})
+ // Ps matches a PostScript file.
+ Ps = prefix([]byte("%!PS-Adobe-"))
+ // Psd matches a Photoshop Document file.
+ Psd = prefix([]byte("8BPS"))
+ // Ico matches an ICO file.
+ Ico = prefix([]byte{0x00, 0x00, 0x01, 0x00}, []byte{0x00, 0x00, 0x02, 0x00})
+ // Icns matches an ICNS (Apple Icon Image format) file.
+ Icns = prefix([]byte("icns"))
+ // Tiff matches a Tagged Image File Format file.
+ Tiff = prefix([]byte{0x49, 0x49, 0x2A, 0x00}, []byte{0x4D, 0x4D, 0x00, 0x2A})
+ // Bpg matches a Better Portable Graphics file.
+ Bpg = prefix([]byte{0x42, 0x50, 0x47, 0xFB})
+ // Xcf matches GIMP image data.
+ Xcf = prefix([]byte("gimp xcf"))
+ // Pat matches GIMP pattern data.
+ Pat = offset([]byte("GPAT"), 20)
+ // Gbr matches GIMP brush data.
+ Gbr = offset([]byte("GIMP"), 20)
+ // Hdr matches Radiance HDR image.
+ // https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/
+ Hdr = prefix([]byte("#?RADIANCE\n"))
+ // Xpm matches X PixMap image data.
+ Xpm = prefix([]byte{0x2F, 0x2A, 0x20, 0x58, 0x50, 0x4D, 0x20, 0x2A, 0x2F})
+ // Jxs matches a JPEG XS coded image file (ISO/IEC 21122-3).
+ Jxs = prefix([]byte{0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x53, 0x20, 0x0D, 0x0A, 0x87, 0x0A})
+ // Jxr matches Microsoft HD JXR photo file.
+ Jxr = prefix([]byte{0x49, 0x49, 0xBC, 0x01})
+)
+
+func jpeg2k(sig []byte) Detector {
+ return func(raw []byte, _ uint32) bool {
+ if len(raw) < 24 {
+ return false
+ }
+
+ if !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x20, 0x20}) &&
+ !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x32, 0x20}) {
+ return false
+ }
+ return bytes.Equal(raw[20:24], sig)
+ }
+}
+
+// Webp matches a WebP file.
+func Webp(raw []byte, _ uint32) bool {
+ return len(raw) > 12 &&
+ bytes.Equal(raw[0:4], []byte("RIFF")) &&
+ bytes.Equal(raw[8:12], []byte{0x57, 0x45, 0x42, 0x50})
+}
+
+// Dwg matches a CAD drawing file.
+func Dwg(raw []byte, _ uint32) bool {
+ if len(raw) < 6 || raw[0] != 0x41 || raw[1] != 0x43 {
+ return false
+ }
+ dwgVersions := [][]byte{
+ {0x31, 0x2E, 0x34, 0x30},
+ {0x31, 0x2E, 0x35, 0x30},
+ {0x32, 0x2E, 0x31, 0x30},
+ {0x31, 0x30, 0x30, 0x32},
+ {0x31, 0x30, 0x30, 0x33},
+ {0x31, 0x30, 0x30, 0x34},
+ {0x31, 0x30, 0x30, 0x36},
+ {0x31, 0x30, 0x30, 0x39},
+ {0x31, 0x30, 0x31, 0x32},
+ {0x31, 0x30, 0x31, 0x34},
+ {0x31, 0x30, 0x31, 0x35},
+ {0x31, 0x30, 0x31, 0x38},
+ {0x31, 0x30, 0x32, 0x31},
+ {0x31, 0x30, 0x32, 0x34},
+ {0x31, 0x30, 0x33, 0x32},
+ }
+
+ for _, d := range dwgVersions {
+ if bytes.Equal(raw[2:6], d) {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Jxl matches JPEG XL image file.
+func Jxl(raw []byte, _ uint32) bool {
+ return bytes.HasPrefix(raw, []byte{0xFF, 0x0A}) ||
+ bytes.HasPrefix(raw, []byte("\x00\x00\x00\x0cJXL\x20\x0d\x0a\x87\x0a"))
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go
new file mode 100644
index 0000000..5fe435b
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go
@@ -0,0 +1,212 @@
+// Package magic holds the matching functions used to find MIME types.
+package magic
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/gabriel-vasile/mimetype/internal/scan"
+)
+
+type (
+ // Detector receiveѕ the raw data of a file and returns whether the data
+ // meets any conditions. The limit parameter is an upper limit to the number
+ // of bytes received and is used to tell if the byte slice represents the
+ // whole file or is just the header of a file: len(raw) < limit or len(raw)>limit.
+ Detector func(raw []byte, limit uint32) bool
+ xmlSig struct {
+ // the local name of the root tag
+ localName []byte
+ // the namespace of the XML document
+ xmlns []byte
+ }
+)
+
+// prefix creates a Detector which returns true if any of the provided signatures
+// is the prefix of the raw input.
+func prefix(sigs ...[]byte) Detector {
+ return func(raw []byte, limit uint32) bool {
+ for _, s := range sigs {
+ if bytes.HasPrefix(raw, s) {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+// offset creates a Detector which returns true if the provided signature can be
+// found at offset in the raw input.
+func offset(sig []byte, offset int) Detector {
+ return func(raw []byte, limit uint32) bool {
+ return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig)
+ }
+}
+
+// ciPrefix is like prefix but the check is case insensitive.
+func ciPrefix(sigs ...[]byte) Detector {
+ return func(raw []byte, limit uint32) bool {
+ for _, s := range sigs {
+ if ciCheck(s, raw) {
+ return true
+ }
+ }
+ return false
+ }
+}
+func ciCheck(sig, raw []byte) bool {
+ if len(raw) < len(sig)+1 {
+ return false
+ }
+ // perform case insensitive check
+ for i, b := range sig {
+ db := raw[i]
+ if 'A' <= b && b <= 'Z' {
+ db &= 0xDF
+ }
+ if b != db {
+ return false
+ }
+ }
+
+ return true
+}
+
+// xml creates a Detector which returns true if any of the provided XML signatures
+// matches the raw input.
+func xml(sigs ...xmlSig) Detector {
+ return func(raw []byte, limit uint32) bool {
+ b := scan.Bytes(raw)
+ b.TrimLWS()
+ if len(b) == 0 {
+ return false
+ }
+ for _, s := range sigs {
+ if xmlCheck(s, b) {
+ return true
+ }
+ }
+ return false
+ }
+}
+func xmlCheck(sig xmlSig, raw []byte) bool {
+ raw = raw[:min(len(raw), 512)]
+
+ if len(sig.localName) == 0 {
+ return bytes.Index(raw, sig.xmlns) > 0
+ }
+ if len(sig.xmlns) == 0 {
+ return bytes.Index(raw, sig.localName) > 0
+ }
+
+ localNameIndex := bytes.Index(raw, sig.localName)
+ return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns)
+}
+
+// markup creates a Detector which returns true is any of the HTML signatures
+// matches the raw input.
+func markup(sigs ...[]byte) Detector {
+ return func(raw []byte, limit uint32) bool {
+ b := scan.Bytes(raw)
+ if bytes.HasPrefix(b, []byte{0xEF, 0xBB, 0xBF}) {
+ // We skip the UTF-8 BOM if present to ensure we correctly
+ // process any leading whitespace. The presence of the BOM
+ // is taken into account during charset detection in charset.go.
+ b.Advance(3)
+ }
+ b.TrimLWS()
+ if len(b) == 0 {
+ return false
+ }
+ for _, s := range sigs {
+ if markupCheck(s, b) {
+ return true
+ }
+ }
+ return false
+ }
+}
+func markupCheck(sig, raw []byte) bool {
+ if len(raw) < len(sig)+1 {
+ return false
+ }
+
+ // perform case insensitive check
+ for i, b := range sig {
+ db := raw[i]
+ if 'A' <= b && b <= 'Z' {
+ db &= 0xDF
+ }
+ if b != db {
+ return false
+ }
+ }
+ // Next byte must be space or right angle bracket.
+ if db := raw[len(sig)]; !scan.ByteIsWS(db) && db != '>' {
+ return false
+ }
+
+ return true
+}
+
+// ftyp creates a Detector which returns true if any of the FTYP signatures
+// matches the raw input.
+func ftyp(sigs ...[]byte) Detector {
+ return func(raw []byte, limit uint32) bool {
+ if len(raw) < 12 {
+ return false
+ }
+ for _, s := range sigs {
+ if bytes.Equal(raw[8:12], s) {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+func newXMLSig(localName, xmlns string) xmlSig {
+ ret := xmlSig{xmlns: []byte(xmlns)}
+ if localName != "" {
+ ret.localName = []byte(fmt.Sprintf("<%s", localName))
+ }
+
+ return ret
+}
+
+// A valid shebang starts with the "#!" characters,
+// followed by any number of spaces,
+// followed by the path to the interpreter,
+// and, optionally, followed by the arguments for the interpreter.
+//
+// Ex:
+//
+// #! /usr/bin/env php
+//
+// /usr/bin/env is the interpreter, php is the first and only argument.
+func shebang(sigs ...[]byte) Detector {
+ return func(raw []byte, limit uint32) bool {
+ b := scan.Bytes(raw)
+ line := b.Line()
+ for _, s := range sigs {
+ if shebangCheck(s, line) {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+func shebangCheck(sig []byte, raw scan.Bytes) bool {
+ if len(raw) < len(sig)+2 {
+ return false
+ }
+ if raw[0] != '#' || raw[1] != '!' {
+ return false
+ }
+
+ raw.Advance(2) // skip #! we checked above
+ raw.TrimLWS()
+ raw.TrimRWS()
+ return bytes.Equal(raw, sig)
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go
new file mode 100644
index 0000000..c912823
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ms_office.go
@@ -0,0 +1,211 @@
+package magic
+
+import (
+ "bytes"
+ "encoding/binary"
+)
+
+// Xlsx matches a Microsoft Excel 2007 file.
+func Xlsx(raw []byte, limit uint32) bool {
+ return msoxml(raw, zipEntries{{
+ name: []byte("xl/"),
+ dir: true,
+ }}, 100)
+}
+
+// Docx matches a Microsoft Word 2007 file.
+func Docx(raw []byte, limit uint32) bool {
+ return msoxml(raw, zipEntries{{
+ name: []byte("word/"),
+ dir: true,
+ }}, 100)
+}
+
+// Pptx matches a Microsoft PowerPoint 2007 file.
+func Pptx(raw []byte, limit uint32) bool {
+ return msoxml(raw, zipEntries{{
+ name: []byte("ppt/"),
+ dir: true,
+ }}, 100)
+}
+
+// Visio matches a Microsoft Visio 2013+ file.
+func Visio(raw []byte, limit uint32) bool {
+ return msoxml(raw, zipEntries{{
+ name: []byte("visio/"),
+ dir: true,
+ }}, 100)
+}
+
+// Ole matches an Open Linking and Embedding file.
+//
+// https://en.wikipedia.org/wiki/Object_Linking_and_Embedding
+func Ole(raw []byte, limit uint32) bool {
+ return bytes.HasPrefix(raw, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1})
+}
+
+// Aaf matches an Advanced Authoring Format file.
+// See: https://pyaaf.readthedocs.io/en/latest/about.html
+// See: https://en.wikipedia.org/wiki/Advanced_Authoring_Format
+func Aaf(raw []byte, limit uint32) bool {
+ if len(raw) < 31 {
+ return false
+ }
+ return bytes.HasPrefix(raw[8:], []byte{0x41, 0x41, 0x46, 0x42, 0x0D, 0x00, 0x4F, 0x4D}) &&
+ (raw[30] == 0x09 || raw[30] == 0x0C)
+}
+
+// Doc matches a Microsoft Word 97-2003 file.
+// See: https://github.com/decalage2/oletools/blob/412ee36ae45e70f42123e835871bac956d958461/oletools/common/clsid.py
+func Doc(raw []byte, _ uint32) bool {
+ clsids := [][]byte{
+ // Microsoft Word 97-2003 Document (Word.Document.8)
+ {0x06, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46},
+ // Microsoft Word 6.0-7.0 Document (Word.Document.6)
+ {0x00, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46},
+ // Microsoft Word Picture (Word.Picture.8)
+ {0x07, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46},
+ }
+
+ for _, clsid := range clsids {
+ if matchOleClsid(raw, clsid) {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Ppt matches a Microsoft PowerPoint 97-2003 file or a PowerPoint 95 presentation.
+func Ppt(raw []byte, limit uint32) bool {
+ // Root CLSID test is the safest way to detect identify OLE, however, the format
+ // often places the root CLSID at the end of the file.
+ if matchOleClsid(raw, []byte{
+ 0x10, 0x8d, 0x81, 0x64, 0x9b, 0x4f, 0xcf, 0x11,
+ 0x86, 0xea, 0x00, 0xaa, 0x00, 0xb9, 0x29, 0xe8,
+ }) || matchOleClsid(raw, []byte{
+ 0x70, 0xae, 0x7b, 0xea, 0x3b, 0xfb, 0xcd, 0x11,
+ 0xa9, 0x03, 0x00, 0xaa, 0x00, 0x51, 0x0e, 0xa3,
+ }) {
+ return true
+ }
+
+ lin := len(raw)
+ if lin < 520 {
+ return false
+ }
+ pptSubHeaders := [][]byte{
+ {0xA0, 0x46, 0x1D, 0xF0},
+ {0x00, 0x6E, 0x1E, 0xF0},
+ {0x0F, 0x00, 0xE8, 0x03},
+ }
+ for _, h := range pptSubHeaders {
+ if bytes.HasPrefix(raw[512:], h) {
+ return true
+ }
+ }
+
+ if bytes.HasPrefix(raw[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) &&
+ raw[518] == 0x00 && raw[519] == 0x00 {
+ return true
+ }
+
+ return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)],
+ []byte("P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00 D\x00o\x00c\x00u\x00m\x00e\x00n\x00t"))
+}
+
+// Xls matches a Microsoft Excel 97-2003 file.
+func Xls(raw []byte, limit uint32) bool {
+ // Root CLSID test is the safest way to detect identify OLE, however, the format
+ // often places the root CLSID at the end of the file.
+ if matchOleClsid(raw, []byte{
+ 0x10, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ }) || matchOleClsid(raw, []byte{
+ 0x20, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ }) {
+ return true
+ }
+
+ lin := len(raw)
+ if lin < 520 {
+ return false
+ }
+ xlsSubHeaders := [][]byte{
+ {0x09, 0x08, 0x10, 0x00, 0x00, 0x06, 0x05, 0x00},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x10},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x1F},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x22},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x23},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x28},
+ {0xFD, 0xFF, 0xFF, 0xFF, 0x29},
+ }
+ for _, h := range xlsSubHeaders {
+ if bytes.HasPrefix(raw[512:], h) {
+ return true
+ }
+ }
+
+ return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)],
+ []byte("W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k"))
+}
+
+// Pub matches a Microsoft Publisher file.
+func Pub(raw []byte, limit uint32) bool {
+ return matchOleClsid(raw, []byte{
+ 0x01, 0x12, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
+ })
+}
+
+// Msg matches a Microsoft Outlook email file.
+func Msg(raw []byte, limit uint32) bool {
+ return matchOleClsid(raw, []byte{
+ 0x0B, 0x0D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
+ })
+}
+
+// Msi matches a Microsoft Windows Installer file.
+// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File
+func Msi(raw []byte, limit uint32) bool {
+ return matchOleClsid(raw, []byte{
+ 0x84, 0x10, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
+ })
+}
+
+// One matches a Microsoft OneNote file.
+func One(raw []byte, limit uint32) bool {
+ return bytes.HasPrefix(raw, []byte{
+ 0xe4, 0x52, 0x5c, 0x7b, 0x8c, 0xd8, 0xa7, 0x4d,
+ 0xae, 0xb1, 0x53, 0x78, 0xd0, 0x29, 0x96, 0xd3,
+ })
+}
+
+// Helper to match by a specific CLSID of a compound file.
+//
+// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File
+func matchOleClsid(in []byte, clsid []byte) bool {
+ // Microsoft Compound files v3 have a sector length of 512, while v4 has 4096.
+ // Change sector offset depending on file version.
+ // https://www.loc.gov/preservation/digital/formats/fdd/fdd000392.shtml
+ sectorLength := 512
+ if len(in) < sectorLength {
+ return false
+ }
+ if in[26] == 0x04 && in[27] == 0x00 {
+ sectorLength = 4096
+ }
+
+ // SecID of first sector of the directory stream.
+ firstSecID := int(binary.LittleEndian.Uint32(in[48:52]))
+
+ // Expected offset of CLSID for root storage object.
+ clsidOffset := sectorLength*(1+firstSecID) + 80
+
+ if len(in) <= clsidOffset+16 {
+ return false
+ }
+
+ return bytes.HasPrefix(in[clsidOffset:], clsid)
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go
new file mode 100644
index 0000000..4baa257
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/netpbm.go
@@ -0,0 +1,111 @@
+package magic
+
+import (
+ "bytes"
+ "strconv"
+
+ "github.com/gabriel-vasile/mimetype/internal/scan"
+)
+
+// NetPBM matches a Netpbm Portable BitMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPBM(raw []byte, _ uint32) bool {
+ return netp(raw, "P1\n", "P4\n")
+}
+
+// NetPGM matches a Netpbm Portable GrayMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPGM(raw []byte, _ uint32) bool {
+ return netp(raw, "P2\n", "P5\n")
+}
+
+// NetPPM matches a Netpbm Portable PixMap ASCII/Binary file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPPM(raw []byte, _ uint32) bool {
+ return netp(raw, "P3\n", "P6\n")
+}
+
+// NetPAM matches a Netpbm Portable Arbitrary Map file.
+//
+// See: https://en.wikipedia.org/wiki/Netpbm
+func NetPAM(raw []byte, _ uint32) bool {
+ if !bytes.HasPrefix(raw, []byte("P7\n")) {
+ return false
+ }
+ w, h, d, m, e := false, false, false, false, false
+ s := scan.Bytes(raw)
+ var l scan.Bytes
+ // Read line by line.
+ for i := 0; i < 128; i++ {
+ l = s.Line()
+ // If the line is empty or a comment, skip.
+ if len(l) == 0 || l.Peek() == '#' {
+ if len(s) == 0 {
+ return false
+ }
+ continue
+ } else if bytes.HasPrefix(l, []byte("TUPLTYPE")) {
+ continue
+ } else if bytes.HasPrefix(l, []byte("WIDTH ")) {
+ w = true
+ } else if bytes.HasPrefix(l, []byte("HEIGHT ")) {
+ h = true
+ } else if bytes.HasPrefix(l, []byte("DEPTH ")) {
+ d = true
+ } else if bytes.HasPrefix(l, []byte("MAXVAL ")) {
+ m = true
+ } else if bytes.HasPrefix(l, []byte("ENDHDR")) {
+ e = true
+ }
+ // When we reached header, return true if we collected all four required headers.
+ // WIDTH, HEIGHT, DEPTH and MAXVAL.
+ if e {
+ return w && h && d && m
+ }
+ }
+ return false
+}
+
+func netp(s scan.Bytes, prefixes ...string) bool {
+ foundPrefix := ""
+ for _, p := range prefixes {
+ if bytes.HasPrefix(s, []byte(p)) {
+ foundPrefix = p
+ }
+ }
+ if foundPrefix == "" {
+ return false
+ }
+ s.Advance(len(foundPrefix)) // jump over P1, P2, P3, etc.
+
+ var l scan.Bytes
+ // Read line by line.
+ for i := 0; i < 128; i++ {
+ l = s.Line()
+ // If the line is a comment, skip.
+ if l.Peek() == '#' {
+ continue
+ }
+ // If line has leading whitespace, then skip over whitespace.
+ for scan.ByteIsWS(l.Peek()) {
+ l.Advance(1)
+ }
+ if len(s) == 0 || len(l) > 0 {
+ break
+ }
+ }
+
+ // At this point l should be the two integers denoting the size of the matrix.
+ width := l.PopUntil(scan.ASCIISpaces...)
+ for scan.ByteIsWS(l.Peek()) {
+ l.Advance(1)
+ }
+ height := l.PopUntil(scan.ASCIISpaces...)
+
+ w, errw := strconv.ParseInt(string(width), 10, 64)
+ h, errh := strconv.ParseInt(string(height), 10, 64)
+ return errw == nil && errh == nil && w > 0 && h > 0
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ogg.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ogg.go
new file mode 100644
index 0000000..bb4cd78
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/ogg.go
@@ -0,0 +1,42 @@
+package magic
+
+import (
+ "bytes"
+)
+
+/*
+ NOTE:
+
+ In May 2003, two Internet RFCs were published relating to the format.
+ The Ogg bitstream was defined in RFC 3533 (which is classified as
+ 'informative') and its Internet content type (application/ogg) in RFC
+ 3534 (which is, as of 2006, a proposed standard protocol). In
+ September 2008, RFC 3534 was obsoleted by RFC 5334, which added
+ content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
+ .oga, .spx.
+
+ See:
+ https://tools.ietf.org/html/rfc3533
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
+ https://github.com/file/file/blob/master/magic/Magdir/vorbis
+*/
+
+// Ogg matches an Ogg file.
+func Ogg(raw []byte, limit uint32) bool {
+ return bytes.HasPrefix(raw, []byte("\x4F\x67\x67\x53\x00"))
+}
+
+// OggAudio matches an audio ogg file.
+func OggAudio(raw []byte, limit uint32) bool {
+ return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x7fFLAC")) ||
+ bytes.HasPrefix(raw[28:], []byte("\x01vorbis")) ||
+ bytes.HasPrefix(raw[28:], []byte("OpusHead")) ||
+ bytes.HasPrefix(raw[28:], []byte("Speex\x20\x20\x20")))
+}
+
+// OggVideo matches a video ogg file.
+func OggVideo(raw []byte, limit uint32) bool {
+ return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x80theora")) ||
+ bytes.HasPrefix(raw[28:], []byte("fishead\x00")) ||
+ bytes.HasPrefix(raw[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
+}
diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go
new file mode 100644
index 0000000..1841ee8
--- /dev/null
+++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go
@@ -0,0 +1,411 @@
+package magic
+
+import (
+ "bytes"
+ "time"
+
+ "github.com/gabriel-vasile/mimetype/internal/charset"
+ "github.com/gabriel-vasile/mimetype/internal/json"
+ mkup "github.com/gabriel-vasile/mimetype/internal/markup"
+ "github.com/gabriel-vasile/mimetype/internal/scan"
+)
+
+var (
+ // HTML matches a Hypertext Markup Language file.
+ HTML = markup(
+ []byte(" 0
+}
+
+// NdJSON matches a Newline delimited JSON file. All complete lines from raw
+// must be valid JSON documents meaning they contain one of the valid JSON data
+// types.
+func NdJSON(raw []byte, limit uint32) bool {
+ lCount, objOrArr := 0, 0
+
+ s := scan.Bytes(raw)
+ s.DropLastLine(limit)
+ var l scan.Bytes
+ for len(s) != 0 {
+ l = s.Line()
+ _, inspected, firstToken, _ := json.Parse(json.QueryNone, l)
+ if len(l) != inspected {
+ return false
+ }
+ if firstToken == json.TokArray || firstToken == json.TokObject {
+ objOrArr++
+ }
+ lCount++
+ }
+
+ return lCount > 1 && objOrArr > 0
+}
+
+// Svg matches a SVG file.
+func Svg(raw []byte, limit uint32) bool {
+ return svgWithoutXMLDeclaration(raw) || svgWithXMLDeclaration(raw)
+}
+
+// svgWithoutXMLDeclaration matches a SVG image that does not have an XML header.
+// Example:
+//
+//
+//
+func svgWithoutXMLDeclaration(s scan.Bytes) bool {
+ for scan.ByteIsWS(s.Peek()) {
+ s.Advance(1)
+ }
+ for mkup.SkipAComment(&s) {
+ }
+ if !bytes.HasPrefix(s, []byte("