You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
187 lines
4.8 KiB
187 lines
4.8 KiB
package mimetype
|
|
|
|
import (
|
|
"mime"
|
|
|
|
"github.com/gabriel-vasile/mimetype/internal/charset"
|
|
"github.com/gabriel-vasile/mimetype/internal/magic"
|
|
)
|
|
|
|
// MIME struct holds information about a file format: the string representation
|
|
// of the MIME type, the extension and the parent file format.
|
|
type MIME struct {
|
|
mime string
|
|
aliases []string
|
|
extension string
|
|
// detector receives the raw input and a limit for the number of bytes it is
|
|
// allowed to check. It returns whether the input matches a signature or not.
|
|
detector magic.Detector
|
|
children []*MIME
|
|
parent *MIME
|
|
}
|
|
|
|
// String returns the string representation of the MIME type, e.g., "application/zip".
|
|
func (m *MIME) String() string {
|
|
return m.mime
|
|
}
|
|
|
|
// Extension returns the file extension associated with the MIME type.
|
|
// It includes the leading dot, as in ".html". When the file format does not
|
|
// have an extension, the empty string is returned.
|
|
func (m *MIME) Extension() string {
|
|
return m.extension
|
|
}
|
|
|
|
// Parent returns the parent MIME type from the hierarchy.
|
|
// Each MIME type has a non-nil parent, except for the root MIME type.
|
|
//
|
|
// For example, the application/json and text/html MIME types have text/plain as
|
|
// their parent because they are text files who happen to contain JSON or HTML.
|
|
// Another example is the ZIP format, which is used as container
|
|
// for Microsoft Office files, EPUB files, JAR files, and others.
|
|
func (m *MIME) Parent() *MIME {
|
|
return m.parent
|
|
}
|
|
|
|
// Is checks whether this MIME type, or any of its aliases, is equal to the
|
|
// expected MIME type. MIME type equality test is done on the "type/subtype"
|
|
// section, ignores any optional MIME parameters, ignores any leading and
|
|
// trailing whitespace, and is case insensitive.
|
|
func (m *MIME) Is(expectedMIME string) bool {
|
|
// Parsing is needed because some detected MIME types contain parameters
|
|
// that need to be stripped for the comparison.
|
|
expectedMIME, _, _ = mime.ParseMediaType(expectedMIME)
|
|
found, _, _ := mime.ParseMediaType(m.mime)
|
|
|
|
if expectedMIME == found {
|
|
return true
|
|
}
|
|
|
|
for _, alias := range m.aliases {
|
|
if alias == expectedMIME {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func newMIME(
|
|
mime, extension string,
|
|
detector magic.Detector,
|
|
children ...*MIME) *MIME {
|
|
m := &MIME{
|
|
mime: mime,
|
|
extension: extension,
|
|
detector: detector,
|
|
children: children,
|
|
}
|
|
|
|
for _, c := range children {
|
|
c.parent = m
|
|
}
|
|
|
|
return m
|
|
}
|
|
|
|
func (m *MIME) alias(aliases ...string) *MIME {
|
|
m.aliases = aliases
|
|
return m
|
|
}
|
|
|
|
// match does a depth-first search on the signature tree. It returns the deepest
|
|
// successful node for which all the children detection functions fail.
|
|
func (m *MIME) match(in []byte, readLimit uint32) *MIME {
|
|
for _, c := range m.children {
|
|
if c.detector(in, readLimit) {
|
|
return c.match(in, readLimit)
|
|
}
|
|
}
|
|
|
|
needsCharset := map[string]func([]byte) string{
|
|
"text/plain": charset.FromPlain,
|
|
"text/html": charset.FromHTML,
|
|
"text/xml": charset.FromXML,
|
|
}
|
|
// ps holds optional MIME parameters.
|
|
ps := map[string]string{}
|
|
if f, ok := needsCharset[m.mime]; ok {
|
|
if cset := f(in); cset != "" {
|
|
ps["charset"] = cset
|
|
}
|
|
}
|
|
|
|
return m.cloneHierarchy(ps)
|
|
}
|
|
|
|
// flatten transforms an hierarchy of MIMEs into a slice of MIMEs.
|
|
func (m *MIME) flatten() []*MIME {
|
|
out := []*MIME{m}
|
|
for _, c := range m.children {
|
|
out = append(out, c.flatten()...)
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
// clone creates a new MIME with the provided optional MIME parameters.
|
|
func (m *MIME) clone(ps map[string]string) *MIME {
|
|
clonedMIME := m.mime
|
|
if len(ps) > 0 {
|
|
clonedMIME = mime.FormatMediaType(m.mime, ps)
|
|
}
|
|
|
|
return &MIME{
|
|
mime: clonedMIME,
|
|
aliases: m.aliases,
|
|
extension: m.extension,
|
|
}
|
|
}
|
|
|
|
// cloneHierarchy creates a clone of m and all its ancestors. The optional MIME
|
|
// parameters are set on the last child of the hierarchy.
|
|
func (m *MIME) cloneHierarchy(ps map[string]string) *MIME {
|
|
ret := m.clone(ps)
|
|
lastChild := ret
|
|
for p := m.Parent(); p != nil; p = p.Parent() {
|
|
pClone := p.clone(nil)
|
|
lastChild.parent = pClone
|
|
lastChild = pClone
|
|
}
|
|
|
|
return ret
|
|
}
|
|
|
|
func (m *MIME) lookup(mime string) *MIME {
|
|
for _, n := range append(m.aliases, m.mime) {
|
|
if n == mime {
|
|
return m
|
|
}
|
|
}
|
|
|
|
for _, c := range m.children {
|
|
if m := c.lookup(mime); m != nil {
|
|
return m
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Extend adds detection for a sub-format. The detector is a function
|
|
// returning true when the raw input file satisfies a signature.
|
|
// The sub-format will be detected if all the detectors in the parent chain return true.
|
|
// The extension should include the leading dot, as in ".html".
|
|
func (m *MIME) Extend(detector func(raw []byte, limit uint32) bool, mime, extension string, aliases ...string) {
|
|
c := &MIME{
|
|
mime: mime,
|
|
extension: extension,
|
|
detector: detector,
|
|
parent: m,
|
|
aliases: aliases,
|
|
}
|
|
|
|
mu.Lock()
|
|
m.children = append([]*MIME{c}, m.children...)
|
|
mu.Unlock()
|
|
}
|