// Package magic holds the matching functions used to find MIME types. package magic import ( "bytes" "fmt" ) type ( // Detector receiveѕ the raw data of a file and returns whether the data // meets any conditions. The limit parameter is an upper limit to the number // of bytes received and is used to tell if the byte slice represents the // whole file or is just the header of a file: len(raw) < limit or len(raw)>limit. Detector func(raw []byte, limit uint32) bool xmlSig struct { // the local name of the root tag localName []byte // the namespace of the XML document xmlns []byte } ) // prefix creates a Detector which returns true if any of the provided signatures // is the prefix of the raw input. func prefix(sigs ...[]byte) Detector { return func(raw []byte, limit uint32) bool { for _, s := range sigs { if bytes.HasPrefix(raw, s) { return true } } return false } } // offset creates a Detector which returns true if the provided signature can be // found at offset in the raw input. func offset(sig []byte, offset int) Detector { return func(raw []byte, limit uint32) bool { return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig) } } // ciPrefix is like prefix but the check is case insensitive. func ciPrefix(sigs ...[]byte) Detector { return func(raw []byte, limit uint32) bool { for _, s := range sigs { if ciCheck(s, raw) { return true } } return false } } func ciCheck(sig, raw []byte) bool { if len(raw) < len(sig)+1 { return false } // perform case insensitive check for i, b := range sig { db := raw[i] if 'A' <= b && b <= 'Z' { db &= 0xDF } if b != db { return false } } return true } // xml creates a Detector which returns true if any of the provided XML signatures // matches the raw input. func xml(sigs ...xmlSig) Detector { return func(raw []byte, limit uint32) bool { raw = trimLWS(raw) if len(raw) == 0 { return false } for _, s := range sigs { if xmlCheck(s, raw) { return true } } return false } } func xmlCheck(sig xmlSig, raw []byte) bool { raw = raw[:min(len(raw), 512)] if len(sig.localName) == 0 { return bytes.Index(raw, sig.xmlns) > 0 } if len(sig.xmlns) == 0 { return bytes.Index(raw, sig.localName) > 0 } localNameIndex := bytes.Index(raw, sig.localName) return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns) } // markup creates a Detector which returns true is any of the HTML signatures // matches the raw input. func markup(sigs ...[]byte) Detector { return func(raw []byte, limit uint32) bool { if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) { // We skip the UTF-8 BOM if present to ensure we correctly // process any leading whitespace. The presence of the BOM // is taken into account during charset detection in charset.go. raw = trimLWS(raw[3:]) } else { raw = trimLWS(raw) } if len(raw) == 0 { return false } for _, s := range sigs { if markupCheck(s, raw) { return true } } return false } } func markupCheck(sig, raw []byte) bool { if len(raw) < len(sig)+1 { return false } // perform case insensitive check for i, b := range sig { db := raw[i] if 'A' <= b && b <= 'Z' { db &= 0xDF } if b != db { return false } } // Next byte must be space or right angle bracket. if db := raw[len(sig)]; db != ' ' && db != '>' { return false } return true } // ftyp creates a Detector which returns true if any of the FTYP signatures // matches the raw input. func ftyp(sigs ...[]byte) Detector { return func(raw []byte, limit uint32) bool { if len(raw) < 12 { return false } for _, s := range sigs { if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) { return true } } return false } } func newXMLSig(localName, xmlns string) xmlSig { ret := xmlSig{xmlns: []byte(xmlns)} if localName != "" { ret.localName = []byte(fmt.Sprintf("<%s", localName)) } return ret } // A valid shebang starts with the "#!" characters, // followed by any number of spaces, // followed by the path to the interpreter, // and, optionally, followed by the arguments for the interpreter. // // Ex: // #! /usr/bin/env php // /usr/bin/env is the interpreter, php is the first and only argument. func shebang(sigs ...[]byte) Detector { return func(raw []byte, limit uint32) bool { for _, s := range sigs { if shebangCheck(s, firstLine(raw)) { return true } } return false } } func shebangCheck(sig, raw []byte) bool { if len(raw) < len(sig)+2 { return false } if raw[0] != '#' || raw[1] != '!' { return false } return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig) } // trimLWS trims whitespace from beginning of the input. func trimLWS(in []byte) []byte { firstNonWS := 0 for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ { } return in[firstNonWS:] } // trimRWS trims whitespace from the end of the input. func trimRWS(in []byte) []byte { lastNonWS := len(in) - 1 for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- { } return in[:lastNonWS+1] } func firstLine(in []byte) []byte { lineEnd := 0 for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ { } return in[:lineEnd] } func isWS(b byte) bool { return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' ' } func min(a, b int) int { if a < b { return a } return b }