You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
141 lines
2.9 KiB
141 lines
2.9 KiB
package xml2json
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"io"
|
|
"unicode"
|
|
|
|
"golang.org/x/net/html/charset"
|
|
)
|
|
|
|
const (
|
|
attrPrefix = "-"
|
|
contentPrefix = "#"
|
|
)
|
|
|
|
// A Decoder reads and decodes XML objects from an input stream.
|
|
type Decoder struct {
|
|
r io.Reader
|
|
err error
|
|
attributePrefix string
|
|
contentPrefix string
|
|
}
|
|
|
|
type element struct {
|
|
parent *element
|
|
n *Node
|
|
label string
|
|
}
|
|
|
|
func (dec *Decoder) SetAttributePrefix(prefix string) {
|
|
dec.attributePrefix = prefix
|
|
}
|
|
|
|
func (dec *Decoder) SetContentPrefix(prefix string) {
|
|
dec.contentPrefix = prefix
|
|
}
|
|
|
|
func (dec *Decoder) DecodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error {
|
|
dec.contentPrefix = contentPrefix
|
|
dec.attributePrefix = attributePrefix
|
|
return dec.Decode(root)
|
|
}
|
|
|
|
// NewDecoder returns a new decoder that reads from r.
|
|
func NewDecoder(r io.Reader) *Decoder {
|
|
return &Decoder{r: r}
|
|
}
|
|
|
|
// Decode reads the next JSON-encoded value from its
|
|
// input and stores it in the value pointed to by v.
|
|
func (dec *Decoder) Decode(root *Node) error {
|
|
|
|
if dec.contentPrefix == "" {
|
|
dec.contentPrefix = contentPrefix
|
|
}
|
|
if dec.attributePrefix == "" {
|
|
dec.attributePrefix = attrPrefix
|
|
}
|
|
|
|
xmlDec := xml.NewDecoder(dec.r)
|
|
|
|
// That will convert the charset if the provided XML is non-UTF-8
|
|
xmlDec.CharsetReader = charset.NewReaderLabel
|
|
|
|
// Create first element from the root node
|
|
elem := &element{
|
|
parent: nil,
|
|
n: root,
|
|
}
|
|
|
|
for {
|
|
t, _ := xmlDec.Token()
|
|
if t == nil {
|
|
break
|
|
}
|
|
|
|
switch se := t.(type) {
|
|
case xml.StartElement:
|
|
// Build new a new current element and link it to its parent
|
|
elem = &element{
|
|
parent: elem,
|
|
n: &Node{},
|
|
label: se.Name.Local,
|
|
}
|
|
|
|
// Extract attributes as children
|
|
for _, a := range se.Attr {
|
|
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &Node{Data: a.Value})
|
|
}
|
|
case xml.CharData:
|
|
// Extract XML data (if any)
|
|
elem.n.Data = trimNonGraphic(string(xml.CharData(se)))
|
|
case xml.EndElement:
|
|
// And add it to its parent list
|
|
if elem.parent != nil {
|
|
elem.parent.n.AddChild(elem.label, elem.n)
|
|
}
|
|
|
|
// Then change the current element to its parent
|
|
elem = elem.parent
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// trimNonGraphic returns a slice of the string s, with all leading and trailing
|
|
// non graphic characters and spaces removed.
|
|
//
|
|
// Graphic characters include letters, marks, numbers, punctuation, symbols,
|
|
// and spaces, from categories L, M, N, P, S, Zs.
|
|
// Spacing characters are set by category Z and property Pattern_White_Space.
|
|
func trimNonGraphic(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
|
|
var first *int
|
|
var last int
|
|
for i, r := range []rune(s) {
|
|
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
|
|
continue
|
|
}
|
|
|
|
if first == nil {
|
|
f := i // copy i
|
|
first = &f
|
|
last = i
|
|
} else {
|
|
last = i
|
|
}
|
|
}
|
|
|
|
// If first is nil, it means there are no graphic characters
|
|
if first == nil {
|
|
return ""
|
|
}
|
|
|
|
return string([]rune(s)[*first : last+1])
|
|
}
|