You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
weishi/vendor/github.com/basgys/goxml2json/encoder.go

198 lines
4.2 KiB

package xml2json
import (
"bytes"
"io"
"unicode/utf8"
)
// An Encoder writes JSON objects to an output stream.
type Encoder struct {
w io.Writer
err error
contentPrefix string
attributePrefix string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w}
}
func (enc *Encoder) SetAttributePrefix(prefix string) {
enc.attributePrefix = prefix
}
func (enc *Encoder) SetContentPrefix(prefix string) {
enc.contentPrefix = prefix
}
func (enc *Encoder) EncodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error {
enc.contentPrefix = contentPrefix
enc.attributePrefix = attributePrefix
return enc.Encode(root)
}
// Encode writes the JSON encoding of v to the stream
func (enc *Encoder) Encode(root *Node) error {
if enc.err != nil {
return enc.err
}
if root == nil {
return nil
}
if enc.contentPrefix == "" {
enc.contentPrefix = contentPrefix
}
if enc.attributePrefix == "" {
enc.attributePrefix = attrPrefix
}
enc.err = enc.format(root, 0)
// Terminate each value with a newline.
// This makes the output look a little nicer
// when debugging, and some kind of space
// is required if the encoded value was a number,
// so that the reader knows there aren't more
// digits coming.
enc.write("\n")
return enc.err
}
func (enc *Encoder) format(n *Node, lvl int) error {
if n.IsComplex() {
enc.write("{")
// Add data as an additional attibute (if any)
if len(n.Data) > 0 {
enc.write("\"")
enc.write(enc.contentPrefix)
enc.write("content")
enc.write("\": ")
enc.write(sanitiseString(n.Data))
enc.write(", ")
}
i := 0
tot := len(n.Children)
for label, children := range n.Children {
enc.write("\"")
enc.write(label)
enc.write("\": ")
if len(children) > 1 {
// Array
enc.write("[")
for j, c := range children {
enc.format(c, lvl+1)
if j < len(children)-1 {
enc.write(", ")
}
}
enc.write("]")
} else {
// Map
enc.format(children[0], lvl+1)
}
if i < tot-1 {
enc.write(", ")
}
i++
}
enc.write("}")
} else {
// TODO : Extract data type
enc.write(sanitiseString(n.Data))
}
return nil
}
func (enc *Encoder) write(s string) {
enc.w.Write([]byte(s))
}
// https://golang.org/src/encoding/json/encode.go?s=5584:5627#L788
var hex = "0123456789abcdef"
func sanitiseString(s string) string {
var buf bytes.Buffer
buf.WriteByte('"')
start := 0
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
i++
continue
}
if start < i {
buf.WriteString(s[start:i])
}
switch b {
case '\\', '"':
buf.WriteByte('\\')
buf.WriteByte(b)
case '\n':
buf.WriteByte('\\')
buf.WriteByte('n')
case '\r':
buf.WriteByte('\\')
buf.WriteByte('r')
case '\t':
buf.WriteByte('\\')
buf.WriteByte('t')
default:
// This encodes bytes < 0x20 except for \n and \r,
// as well as <, > and &. The latter are escaped because they
// can lead to security holes when user-controlled strings
// are rendered into JSON and served to some browsers.
buf.WriteString(`\u00`)
buf.WriteByte(hex[b>>4])
buf.WriteByte(hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
buf.WriteString(s[start:i])
}
buf.WriteString(`\ufffd`)
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
buf.WriteString(s[start:i])
}
buf.WriteString(`\u202`)
buf.WriteByte(hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
buf.WriteString(s[start:])
}
buf.WriteByte('"')
return buf.String()
}