package magic import ( "bufio" "bytes" "strings" "time" "github.com/gabriel-vasile/mimetype/internal/charset" "github.com/gabriel-vasile/mimetype/internal/json" ) var ( // HTML matches a Hypertext Markup Language file. HTML = markup( []byte(" 0 } // GeoJSON matches a RFC 7946 GeoJSON file. // // GeoJSON detection implies searching for key:value pairs like: `"type": "Feature"` // in the input. // BUG(gabriel-vasile): The "type" key should be searched for in the root object. func GeoJSON(raw []byte, limit uint32) bool { raw = trimLWS(raw) if len(raw) == 0 { return false } // GeoJSON is always a JSON object, not a JSON array or any other JSON value. if raw[0] != '{' { return false } s := []byte(`"type"`) si, sl := bytes.Index(raw, s), len(s) if si == -1 { return false } // If the "type" string is the suffix of the input, // there is no need to search for the value of the key. if si+sl == len(raw) { return false } // Skip the "type" part. raw = raw[si+sl:] // Skip any whitespace before the colon. raw = trimLWS(raw) // Check for colon. if len(raw) == 0 || raw[0] != ':' { return false } // Skip any whitespace after the colon. raw = trimLWS(raw[1:]) geoJSONTypes := [][]byte{ []byte(`"Feature"`), []byte(`"FeatureCollection"`), []byte(`"Point"`), []byte(`"LineString"`), []byte(`"Polygon"`), []byte(`"MultiPoint"`), []byte(`"MultiLineString"`), []byte(`"MultiPolygon"`), []byte(`"GeometryCollection"`), } for _, t := range geoJSONTypes { if bytes.HasPrefix(raw, t) { return true } } return false } // NdJSON matches a Newline delimited JSON file. All complete lines from raw // must be valid JSON documents meaning they contain one of the valid JSON data // types. func NdJSON(raw []byte, limit uint32) bool { lCount, hasObjOrArr := 0, false sc := bufio.NewScanner(dropLastLine(raw, limit)) for sc.Scan() { l := sc.Bytes() // Empty lines are allowed in NDJSON. if l = trimRWS(trimLWS(l)); len(l) == 0 { continue } _, err := json.Scan(l) if err != nil { return false } if l[0] == '[' || l[0] == '{' { hasObjOrArr = true } lCount++ } return lCount > 1 && hasObjOrArr } // HAR matches a HAR Spec file. // Spec: http://www.softwareishard.com/blog/har-12-spec/ func HAR(raw []byte, limit uint32) bool { s := []byte(`"log"`) si, sl := bytes.Index(raw, s), len(s) if si == -1 { return false } // If the "log" string is the suffix of the input, // there is no need to search for the value of the key. if si+sl == len(raw) { return false } // Skip the "log" part. raw = raw[si+sl:] // Skip any whitespace before the colon. raw = trimLWS(raw) // Check for colon. if len(raw) == 0 || raw[0] != ':' { return false } // Skip any whitespace after the colon. raw = trimLWS(raw[1:]) harJSONTypes := [][]byte{ []byte(`"version"`), []byte(`"creator"`), []byte(`"entries"`), } for _, t := range harJSONTypes { si := bytes.Index(raw, t) if si > -1 { return true } } return false } // Svg matches a SVG file. func Svg(raw []byte, limit uint32) bool { return bytes.Contains(raw, []byte(" 00:02:19,376) limits secondLine // length to exactly 29 characters. if len(secondLine) != 29 { return false } // Decimal separator of fractional seconds in the timestamps must be a // comma, not a period. if strings.Contains(secondLine, ".") { return false } // For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`. secondLine = strings.ReplaceAll(secondLine, ",", ".") // Second line must be a time range. ts := strings.Split(secondLine, " --> ") if len(ts) != 2 { return false } const layout = "15:04:05.000" t0, err := time.Parse(layout, ts[0]) if err != nil { return false } t1, err := time.Parse(layout, ts[1]) if err != nil { return false } if t0.After(t1) { return false } // A third line must exist and not be empty. This is the actual subtitle text. return s.Scan() && len(s.Bytes()) != 0 } // Vtt matches a Web Video Text Tracks (WebVTT) file. See // https://www.iana.org/assignments/media-types/text/vtt. func Vtt(raw []byte, limit uint32) bool { // Prefix match. prefixes := [][]byte{ {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // UTF-8 BOM, "WEBVTT" and a line feed {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // UTF-8 BOM, "WEBVTT" and a carriage return {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // UTF-8 BOM, "WEBVTT" and a space {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // UTF-8 BOM, "WEBVTT" and a horizontal tab {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // "WEBVTT" and a line feed {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // "WEBVTT" and a carriage return {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // "WEBVTT" and a space {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // "WEBVTT" and a horizontal tab } for _, p := range prefixes { if bytes.HasPrefix(raw, p) { return true } } // Exact match. return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT" bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT" }