add xml decode

master
李光春 2 years ago
parent 41eb0ee8e0
commit 78ff79fc69

@ -4,7 +4,6 @@ go 1.18
require (
github.com/basgys/goxml2json v1.1.0
github.com/beevik/etree v1.1.0
github.com/dtapps/gotime v1.0.1
go.mongodb.org/mongo-driver v1.9.0
)

@ -1,7 +1,5 @@
github.com/basgys/goxml2json v1.1.0 h1:4ln5i4rseYfXNd86lGEB+Vi652IsIXIvggKM/BhUKVw=
github.com/basgys/goxml2json v1.1.0/go.mod h1:wH7a5Np/Q4QoECFIU8zTQlZwZkrilY0itPfecMw41Dw=
github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A=
github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

@ -1,14 +0,0 @@
language: go
sudo: false
go:
- 1.11.x
- tip
matrix:
allow_failures:
- go: tip
script:
- go vet ./...
- go test -v ./...

@ -1,10 +0,0 @@
Brett Vickers (beevik)
Felix Geisendörfer (felixge)
Kamil Kisiel (kisielk)
Graham King (grahamking)
Matt Smith (ma314smith)
Michal Jemala (michaljemala)
Nicolas Piganeau (npiganeau)
Chris Brown (ccbrown)
Earncef Sequeira (earncef)
Gabriel de Labachelerie (wuzuf)

@ -1,24 +0,0 @@
Copyright 2015-2019 Brett Vickers. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,205 +0,0 @@
[![Build Status](https://travis-ci.org/beevik/etree.svg?branch=master)](https://travis-ci.org/beevik/etree)
[![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree)
etree
=====
The etree package is a lightweight, pure go package that expresses XML in
the form of an element tree. Its design was inspired by the Python
[ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
module.
Some of the package's capabilities and features:
* Represents XML documents as trees of elements for easy traversal.
* Imports, serializes, modifies or creates XML documents from scratch.
* Writes and reads XML to/from files, byte slices, strings and io interfaces.
* Performs simple or complex searches with lightweight XPath-like query APIs.
* Auto-indents XML using spaces or tabs for better readability.
* Implemented in pure go; depends only on standard go libraries.
* Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
package.
### Creating an XML document
The following example creates an XML document from scratch using the etree
package and outputs its indented contents to stdout.
```go
doc := etree.NewDocument()
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
people := doc.CreateElement("People")
people.CreateComment("These are all known people")
jon := people.CreateElement("Person")
jon.CreateAttr("name", "Jon")
sally := people.CreateElement("Person")
sally.CreateAttr("name", "Sally")
doc.Indent(2)
doc.WriteTo(os.Stdout)
```
Output:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="style.xsl"?>
<People>
<!--These are all known people-->
<Person name="Jon"/>
<Person name="Sally"/>
</People>
```
### Reading an XML file
Suppose you have a file on disk called `bookstore.xml` containing the
following data:
```xml
<bookstore xmlns:p="urn:schemas-books-com:prices">
<book category="COOKING">
<title lang="en">Everyday Italian</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<p:price>30.00</p:price>
</book>
<book category="CHILDREN">
<title lang="en">Harry Potter</title>
<author>J K. Rowling</author>
<year>2005</year>
<p:price>29.99</p:price>
</book>
<book category="WEB">
<title lang="en">XQuery Kick Start</title>
<author>James McGovern</author>
<author>Per Bothner</author>
<author>Kurt Cagle</author>
<author>James Linn</author>
<author>Vaidyanathan Nagarajan</author>
<year>2003</year>
<p:price>49.99</p:price>
</book>
<book category="WEB">
<title lang="en">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<p:price>39.95</p:price>
</book>
</bookstore>
```
This code reads the file's contents into an etree document.
```go
doc := etree.NewDocument()
if err := doc.ReadFromFile("bookstore.xml"); err != nil {
panic(err)
}
```
You can also read XML from a string, a byte slice, or an `io.Reader`.
### Processing elements and attributes
This example illustrates several ways to access elements and attributes using
etree selection queries.
```go
root := doc.SelectElement("bookstore")
fmt.Println("ROOT element:", root.Tag)
for _, book := range root.SelectElements("book") {
fmt.Println("CHILD element:", book.Tag)
if title := book.SelectElement("title"); title != nil {
lang := title.SelectAttrValue("lang", "unknown")
fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang)
}
for _, attr := range book.Attr {
fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value)
}
}
```
Output:
```
ROOT element: bookstore
CHILD element: book
TITLE: Everyday Italian (en)
ATTR: category=COOKING
CHILD element: book
TITLE: Harry Potter (en)
ATTR: category=CHILDREN
CHILD element: book
TITLE: XQuery Kick Start (en)
ATTR: category=WEB
CHILD element: book
TITLE: Learning XML (en)
ATTR: category=WEB
```
### Path queries
This example uses etree's path functions to select all book titles that fall
into the category of 'WEB'. The double-slash prefix in the path causes the
search for book elements to occur recursively; book elements may appear at any
level of the XML hierarchy.
```go
for _, t := range doc.FindElements("//book[@category='WEB']/title") {
fmt.Println("Title:", t.Text())
}
```
Output:
```
Title: XQuery Kick Start
Title: Learning XML
```
This example finds the first book element under the root bookstore element and
outputs the tag and text of each of its child elements.
```go
for _, e := range doc.FindElements("./bookstore/book[1]/*") {
fmt.Printf("%s: %s\n", e.Tag, e.Text())
}
```
Output:
```
title: Everyday Italian
author: Giada De Laurentiis
year: 2005
price: 30.00
```
This example finds all books with a price of 49.99 and outputs their titles.
```go
path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
for _, e := range doc.FindElementsPath(path) {
fmt.Println(e.Text())
}
```
Output:
```
XQuery Kick Start
```
Note that this example uses the FindElementsPath function, which takes as an
argument a pre-compiled path object. Use precompiled paths when you plan to
search with the same path more than once.
### Other features
These are just a few examples of the things the etree package can do. See the
[documentation](http://godoc.org/github.com/beevik/etree) for a complete
description of its capabilities.
### Contributing
This project accepts contributions. Just fork the repo and submit a pull
request!

@ -1,109 +0,0 @@
Release v1.1.0
==============
**New Features**
* New attribute helpers.
* Added the `Element.SortAttrs` method, which lexicographically sorts an
element's attributes by key.
* New `ReadSettings` properties.
* Added `Entity` for the support of custom entity maps.
* New `WriteSettings` properties.
* Added `UseCRLF` to allow the output of CR-LF newlines instead of the
default LF newlines. This is useful on Windows systems.
* Additional support for text and CDATA sections.
* The `Element.Text` method now returns the concatenation of all consecutive
character data tokens immediately following an element's opening tag.
* Added `Element.SetCData` to replace the character data immediately
following an element's opening tag with a CDATA section.
* Added `Element.CreateCData` to create and add a CDATA section child
`CharData` token to an element.
* Added `Element.CreateText` to create and add a child text `CharData` token
to an element.
* Added `NewCData` to create a parentless CDATA section `CharData` token.
* Added `NewText` to create a parentless text `CharData`
token.
* Added `CharData.IsCData` to detect if the token contains a CDATA section.
* Added `CharData.IsWhitespace` to detect if the token contains whitespace
inserted by one of the document Indent functions.
* Modified `Element.SetText` so that it replaces a run of consecutive
character data tokens following the element's opening tag (instead of just
the first one).
* New "tail text" support.
* Added the `Element.Tail` method, which returns the text immediately
following an element's closing tag.
* Added the `Element.SetTail` method, which modifies the text immediately
following an element's closing tag.
* New element child insertion and removal methods.
* Added the `Element.InsertChildAt` method, which inserts a new child token
before the specified child token index.
* Added the `Element.RemoveChildAt` method, which removes the child token at
the specified child token index.
* New element and attribute queries.
* Added the `Element.Index` method, which returns the element's index within
its parent element's child token list.
* Added the `Element.NamespaceURI` method to return the namespace URI
associated with an element.
* Added the `Attr.NamespaceURI` method to return the namespace URI
associated with an element.
* Added the `Attr.Element` method to return the element that an attribute
belongs to.
* New Path filter functions.
* Added `[local-name()='val']` to keep elements whose unprefixed tag matches
the desired value.
* Added `[name()='val']` to keep elements whose full tag matches the desired
value.
* Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
matches the desired value.
* Added `[namespace-uri()='val']` to keep elements whose namespace URI
matches the desired value.
**Bug Fixes**
* A default XML `CharSetReader` is now used to prevent failed parsing of XML
documents using certain encodings.
([Issue](https://github.com/beevik/etree/issues/53)).
* All characters are now properly escaped according to XML parsing rules.
([Issue](https://github.com/beevik/etree/issues/55)).
* The `Document.Indent` and `Document.IndentTabs` functions no longer insert
empty string `CharData` tokens.
**Deprecated**
* `Element`
* The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
* The `CreateCharData` method is deprecated. Use `CreateText` instead.
* `CharData`
* The `NewCharData` method is deprecated. Use `NewText` instead.
Release v1.0.1
==============
**Changes**
* Added support for absolute etree Path queries. An absolute path begins with
`/` or `//` and begins its search from the element's document root.
* Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
type.
**Breaking changes**
* A path starting with `//` is now interpreted as an absolute path.
Previously, it was interpreted as a relative path starting from the element
whose
[`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
method was called. To remain compatible with this release, all paths
prefixed with `//` should be prefixed with `.//` when called from any
element other than the document's root.
* [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
Even though this breaking change was very minor, it was a mistake to include
it in this minor release. In the future, all breaking changes will be
limited to major releases (e.g., version 2.0.0).
Release v1.0.0
==============
Initial release.

File diff suppressed because it is too large Load Diff

@ -1,276 +0,0 @@
// Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import (
"bufio"
"io"
"strings"
"unicode/utf8"
)
// A simple stack
type stack struct {
data []interface{}
}
func (s *stack) empty() bool {
return len(s.data) == 0
}
func (s *stack) push(value interface{}) {
s.data = append(s.data, value)
}
func (s *stack) pop() interface{} {
value := s.data[len(s.data)-1]
s.data[len(s.data)-1] = nil
s.data = s.data[:len(s.data)-1]
return value
}
func (s *stack) peek() interface{} {
return s.data[len(s.data)-1]
}
// A fifo is a simple first-in-first-out queue.
type fifo struct {
data []interface{}
head, tail int
}
func (f *fifo) add(value interface{}) {
if f.len()+1 >= len(f.data) {
f.grow()
}
f.data[f.tail] = value
if f.tail++; f.tail == len(f.data) {
f.tail = 0
}
}
func (f *fifo) remove() interface{} {
value := f.data[f.head]
f.data[f.head] = nil
if f.head++; f.head == len(f.data) {
f.head = 0
}
return value
}
func (f *fifo) len() int {
if f.tail >= f.head {
return f.tail - f.head
}
return len(f.data) - f.head + f.tail
}
func (f *fifo) grow() {
c := len(f.data) * 2
if c == 0 {
c = 4
}
buf, count := make([]interface{}, c), f.len()
if f.tail >= f.head {
copy(buf[0:count], f.data[f.head:f.tail])
} else {
hindex := len(f.data) - f.head
copy(buf[0:hindex], f.data[f.head:])
copy(buf[hindex:count], f.data[:f.tail])
}
f.data, f.head, f.tail = buf, 0, count
}
// countReader implements a proxy reader that counts the number of
// bytes read from its encapsulated reader.
type countReader struct {
r io.Reader
bytes int64
}
func newCountReader(r io.Reader) *countReader {
return &countReader{r: r}
}
func (cr *countReader) Read(p []byte) (n int, err error) {
b, err := cr.r.Read(p)
cr.bytes += int64(b)
return b, err
}
// countWriter implements a proxy writer that counts the number of
// bytes written by its encapsulated writer.
type countWriter struct {
w io.Writer
bytes int64
}
func newCountWriter(w io.Writer) *countWriter {
return &countWriter{w: w}
}
func (cw *countWriter) Write(p []byte) (n int, err error) {
b, err := cw.w.Write(p)
cw.bytes += int64(b)
return b, err
}
// isWhitespace returns true if the byte slice contains only
// whitespace characters.
func isWhitespace(s string) bool {
for i := 0; i < len(s); i++ {
if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
return false
}
}
return true
}
// spaceMatch returns true if namespace a is the empty string
// or if namespace a equals namespace b.
func spaceMatch(a, b string) bool {
switch {
case a == "":
return true
default:
return a == b
}
}
// spaceDecompose breaks a namespace:tag identifier at the ':'
// and returns the two parts.
func spaceDecompose(str string) (space, key string) {
colon := strings.IndexByte(str, ':')
if colon == -1 {
return "", str
}
return str[:colon], str[colon+1:]
}
// Strings used by indentCRLF and indentLF
const (
indentSpaces = "\r\n "
indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
)
// indentCRLF returns a CRLF newline followed by n copies of the first
// non-CRLF character in the source string.
func indentCRLF(n int, source string) string {
switch {
case n < 0:
return source[:2]
case n < len(source)-1:
return source[:n+2]
default:
return source + strings.Repeat(source[2:3], n-len(source)+2)
}
}
// indentLF returns a LF newline followed by n copies of the first non-LF
// character in the source string.
func indentLF(n int, source string) string {
switch {
case n < 0:
return source[1:2]
case n < len(source)-1:
return source[1 : n+2]
default:
return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
}
}
// nextIndex returns the index of the next occurrence of sep in s,
// starting from offset. It returns -1 if the sep string is not found.
func nextIndex(s, sep string, offset int) int {
switch i := strings.Index(s[offset:], sep); i {
case -1:
return -1
default:
return offset + i
}
}
// isInteger returns true if the string s contains an integer.
func isInteger(s string) bool {
for i := 0; i < len(s); i++ {
if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
return false
}
}
return true
}
type escapeMode byte
const (
escapeNormal escapeMode = iota
escapeCanonicalText
escapeCanonicalAttr
)
// escapeString writes an escaped version of a string to the writer.
func escapeString(w *bufio.Writer, s string, m escapeMode) {
var esc []byte
last := 0
for i := 0; i < len(s); {
r, width := utf8.DecodeRuneInString(s[i:])
i += width
switch r {
case '&':
esc = []byte("&amp;")
case '<':
esc = []byte("&lt;")
case '>':
if m == escapeCanonicalAttr {
continue
}
esc = []byte("&gt;")
case '\'':
if m != escapeNormal {
continue
}
esc = []byte("&apos;")
case '"':
if m == escapeCanonicalText {
continue
}
esc = []byte("&quot;")
case '\t':
if m != escapeCanonicalAttr {
continue
}
esc = []byte("&#x9;")
case '\n':
if m != escapeCanonicalAttr {
continue
}
esc = []byte("&#xA;")
case '\r':
if m == escapeNormal {
continue
}
esc = []byte("&#xD;")
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
esc = []byte("\uFFFD")
break
}
continue
}
w.WriteString(s[last : i-width])
w.Write(esc)
last = i
}
w.WriteString(s[last:])
}
func isInCharacterRange(r rune) bool {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}

@ -1,582 +0,0 @@
// Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import (
"strconv"
"strings"
)
/*
A Path is a string that represents a search path through an etree starting
from the document root or an arbitrary element. Paths are used with the
Element object's Find* methods to locate and return desired elements.
A Path consists of a series of slash-separated "selectors", each of which may
be modified by one or more bracket-enclosed "filters". Selectors are used to
traverse the etree from element to element, while filters are used to narrow
the list of candidate elements at each node.
Although etree Path strings are similar to XPath strings
(https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more limited set
of selectors and filtering options.
The following selectors are supported by etree Path strings:
. Select the current element.
.. Select the parent of the current element.
* Select all child elements of the current element.
/ Select the root element when used at the start of a path.
// Select all descendants of the current element.
tag Select all child elements with a name matching the tag.
The following basic filters are supported by etree Path strings:
[@attrib] Keep elements with an attribute named attrib.
[@attrib='val'] Keep elements with an attribute named attrib and value matching val.
[tag] Keep elements with a child element named tag.
[tag='val'] Keep elements with a child element named tag and text matching val.
[n] Keep the n-th element, where n is a numeric index starting from 1.
The following function filters are also supported:
[text()] Keep elements with non-empty text.
[text()='val'] Keep elements whose text matches val.
[local-name()='val'] Keep elements whose un-prefixed tag matches val.
[name()='val'] Keep elements whose full tag exactly matches val.
[namespace-prefix()='val'] Keep elements whose namespace prefix matches val.
[namespace-uri()='val'] Keep elements whose namespace URI matches val.
Here are some examples of Path strings:
- Select the bookstore child element of the root element:
/bookstore
- Beginning from the root element, select the title elements of all
descendant book elements having a 'category' attribute of 'WEB':
//book[@category='WEB']/title
- Beginning from the current element, select the first descendant
book element with a title child element containing the text 'Great
Expectations':
.//book[title='Great Expectations'][1]
- Beginning from the current element, select all child elements of
book elements with an attribute 'language' set to 'english':
./book/*[@language='english']
- Beginning from the current element, select all child elements of
book elements containing the text 'special':
./book/*[text()='special']
- Beginning from the current element, select all descendant book
elements whose title child element has a 'language' attribute of 'french':
.//book/title[@language='french']/..
- Beginning from the current element, select all book elements
belonging to the http://www.w3.org/TR/html4/ namespace:
.//book[namespace-uri()='http://www.w3.org/TR/html4/']
*/
type Path struct {
segments []segment
}
// ErrPath is returned by path functions when an invalid etree path is provided.
type ErrPath string
// Error returns the string describing a path error.
func (err ErrPath) Error() string {
return "etree: " + string(err)
}
// CompilePath creates an optimized version of an XPath-like string that
// can be used to query elements in an element tree.
func CompilePath(path string) (Path, error) {
var comp compiler
segments := comp.parsePath(path)
if comp.err != ErrPath("") {
return Path{nil}, comp.err
}
return Path{segments}, nil
}
// MustCompilePath creates an optimized version of an XPath-like string that
// can be used to query elements in an element tree. Panics if an error
// occurs. Use this function to create Paths when you know the path is
// valid (i.e., if it's hard-coded).
func MustCompilePath(path string) Path {
p, err := CompilePath(path)
if err != nil {
panic(err)
}
return p
}
// A segment is a portion of a path between "/" characters.
// It contains one selector and zero or more [filters].
type segment struct {
sel selector
filters []filter
}
func (seg *segment) apply(e *Element, p *pather) {
seg.sel.apply(e, p)
for _, f := range seg.filters {
f.apply(p)
}
}
// A selector selects XML elements for consideration by the
// path traversal.
type selector interface {
apply(e *Element, p *pather)
}
// A filter pares down a list of candidate XML elements based
// on a path filter in [brackets].
type filter interface {
apply(p *pather)
}
// A pather is helper object that traverses an element tree using
// a Path object. It collects and deduplicates all elements matching
// the path query.
type pather struct {
queue fifo
results []*Element
inResults map[*Element]bool
candidates []*Element
scratch []*Element // used by filters
}
// A node represents an element and the remaining path segments that
// should be applied against it by the pather.
type node struct {
e *Element
segments []segment
}
func newPather() *pather {
return &pather{
results: make([]*Element, 0),
inResults: make(map[*Element]bool),
candidates: make([]*Element, 0),
scratch: make([]*Element, 0),
}
}
// traverse follows the path from the element e, collecting
// and then returning all elements that match the path's selectors
// and filters.
func (p *pather) traverse(e *Element, path Path) []*Element {
for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
p.eval(p.queue.remove().(node))
}
return p.results
}
// eval evalutes the current path node by applying the remaining
// path's selector rules against the node's element.
func (p *pather) eval(n node) {
p.candidates = p.candidates[0:0]
seg, remain := n.segments[0], n.segments[1:]
seg.apply(n.e, p)
if len(remain) == 0 {
for _, c := range p.candidates {
if in := p.inResults[c]; !in {
p.inResults[c] = true
p.results = append(p.results, c)
}
}
} else {
for _, c := range p.candidates {
p.queue.add(node{c, remain})
}
}
}
// A compiler generates a compiled path from a path string.
type compiler struct {
err ErrPath
}
// parsePath parses an XPath-like string describing a path
// through an element tree and returns a slice of segment
// descriptors.
func (c *compiler) parsePath(path string) []segment {
// If path ends with //, fix it
if strings.HasSuffix(path, "//") {
path = path + "*"
}
var segments []segment
// Check for an absolute path
if strings.HasPrefix(path, "/") {
segments = append(segments, segment{new(selectRoot), []filter{}})
path = path[1:]
}
// Split path into segments
for _, s := range splitPath(path) {
segments = append(segments, c.parseSegment(s))
if c.err != ErrPath("") {
break
}
}
return segments
}
func splitPath(path string) []string {
pieces := make([]string, 0)
start := 0
inquote := false
for i := 0; i+1 <= len(path); i++ {
if path[i] == '\'' {
inquote = !inquote
} else if path[i] == '/' && !inquote {
pieces = append(pieces, path[start:i])
start = i + 1
}
}
return append(pieces, path[start:])
}
// parseSegment parses a path segment between / characters.
func (c *compiler) parseSegment(path string) segment {
pieces := strings.Split(path, "[")
seg := segment{
sel: c.parseSelector(pieces[0]),
filters: []filter{},
}
for i := 1; i < len(pieces); i++ {
fpath := pieces[i]
if fpath[len(fpath)-1] != ']' {
c.err = ErrPath("path has invalid filter [brackets].")
break
}
seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
}
return seg
}
// parseSelector parses a selector at the start of a path segment.
func (c *compiler) parseSelector(path string) selector {
switch path {
case ".":
return new(selectSelf)
case "..":
return new(selectParent)
case "*":
return new(selectChildren)
case "":
return new(selectDescendants)
default:
return newSelectChildrenByTag(path)
}
}
var fnTable = map[string]struct {
hasFn func(e *Element) bool
getValFn func(e *Element) string
}{
"local-name": {nil, (*Element).name},
"name": {nil, (*Element).FullTag},
"namespace-prefix": {nil, (*Element).namespacePrefix},
"namespace-uri": {nil, (*Element).NamespaceURI},
"text": {(*Element).hasText, (*Element).Text},
}
// parseFilter parses a path filter contained within [brackets].
func (c *compiler) parseFilter(path string) filter {
if len(path) == 0 {
c.err = ErrPath("path contains an empty filter expression.")
return nil
}
// Filter contains [@attr='val'], [fn()='val'], or [tag='val']?
eqindex := strings.Index(path, "='")
if eqindex >= 0 {
rindex := nextIndex(path, "'", eqindex+2)
if rindex != len(path)-1 {
c.err = ErrPath("path has mismatched filter quotes.")
return nil
}
key := path[:eqindex]
value := path[eqindex+2 : rindex]
switch {
case key[0] == '@':
return newFilterAttrVal(key[1:], value)
case strings.HasSuffix(key, "()"):
fn := key[:len(key)-2]
if t, ok := fnTable[fn]; ok && t.getValFn != nil {
return newFilterFuncVal(t.getValFn, value)
}
c.err = ErrPath("path has unknown function " + fn)
return nil
default:
return newFilterChildText(key, value)
}
}
// Filter contains [@attr], [N], [tag] or [fn()]
switch {
case path[0] == '@':
return newFilterAttr(path[1:])
case strings.HasSuffix(path, "()"):
fn := path[:len(path)-2]
if t, ok := fnTable[fn]; ok && t.hasFn != nil {
return newFilterFunc(t.hasFn)
}
c.err = ErrPath("path has unknown function " + fn)
return nil
case isInteger(path):
pos, _ := strconv.Atoi(path)
switch {
case pos > 0:
return newFilterPos(pos - 1)
default:
return newFilterPos(pos)
}
default:
return newFilterChild(path)
}
}
// selectSelf selects the current element into the candidate list.
type selectSelf struct{}
func (s *selectSelf) apply(e *Element, p *pather) {
p.candidates = append(p.candidates, e)
}
// selectRoot selects the element's root node.
type selectRoot struct{}
func (s *selectRoot) apply(e *Element, p *pather) {
root := e
for root.parent != nil {
root = root.parent
}
p.candidates = append(p.candidates, root)
}
// selectParent selects the element's parent into the candidate list.
type selectParent struct{}
func (s *selectParent) apply(e *Element, p *pather) {
if e.parent != nil {
p.candidates = append(p.candidates, e.parent)
}
}
// selectChildren selects the element's child elements into the
// candidate list.
type selectChildren struct{}
func (s *selectChildren) apply(e *Element, p *pather) {
for _, c := range e.Child {
if c, ok := c.(*Element); ok {
p.candidates = append(p.candidates, c)
}
}
}
// selectDescendants selects all descendant child elements
// of the element into the candidate list.
type selectDescendants struct{}
func (s *selectDescendants) apply(e *Element, p *pather) {
var queue fifo
for queue.add(e); queue.len() > 0; {
e := queue.remove().(*Element)
p.candidates = append(p.candidates, e)
for _, c := range e.Child {
if c, ok := c.(*Element); ok {
queue.add(c)
}
}
}
}
// selectChildrenByTag selects into the candidate list all child
// elements of the element having the specified tag.
type selectChildrenByTag struct {
space, tag string
}
func newSelectChildrenByTag(path string) *selectChildrenByTag {
s, l := spaceDecompose(path)
return &selectChildrenByTag{s, l}
}
func (s *selectChildrenByTag) apply(e *Element, p *pather) {
for _, c := range e.Child {
if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
p.candidates = append(p.candidates, c)
}
}
}
// filterPos filters the candidate list, keeping only the
// candidate at the specified index.
type filterPos struct {
index int
}
func newFilterPos(pos int) *filterPos {
return &filterPos{pos}
}
func (f *filterPos) apply(p *pather) {
if f.index >= 0 {
if f.index < len(p.candidates) {
p.scratch = append(p.scratch, p.candidates[f.index])
}
} else {
if -f.index <= len(p.candidates) {
p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterAttr filters the candidate list for elements having
// the specified attribute.
type filterAttr struct {
space, key string
}
func newFilterAttr(str string) *filterAttr {
s, l := spaceDecompose(str)
return &filterAttr{s, l}
}
func (f *filterAttr) apply(p *pather) {
for _, c := range p.candidates {
for _, a := range c.Attr {
if spaceMatch(f.space, a.Space) && f.key == a.Key {
p.scratch = append(p.scratch, c)
break
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterAttrVal filters the candidate list for elements having
// the specified attribute with the specified value.
type filterAttrVal struct {
space, key, val string
}
func newFilterAttrVal(str, value string) *filterAttrVal {
s, l := spaceDecompose(str)
return &filterAttrVal{s, l, value}
}
func (f *filterAttrVal) apply(p *pather) {
for _, c := range p.candidates {
for _, a := range c.Attr {
if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
p.scratch = append(p.scratch, c)
break
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterFunc filters the candidate list for elements satisfying a custom
// boolean function.
type filterFunc struct {
fn func(e *Element) bool
}
func newFilterFunc(fn func(e *Element) bool) *filterFunc {
return &filterFunc{fn}
}
func (f *filterFunc) apply(p *pather) {
for _, c := range p.candidates {
if f.fn(c) {
p.scratch = append(p.scratch, c)
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterFuncVal filters the candidate list for elements containing a value
// matching the result of a custom function.
type filterFuncVal struct {
fn func(e *Element) string
val string
}
func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
return &filterFuncVal{fn, value}
}
func (f *filterFuncVal) apply(p *pather) {
for _, c := range p.candidates {
if f.fn(c) == f.val {
p.scratch = append(p.scratch, c)
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterChild filters the candidate list for elements having
// a child element with the specified tag.
type filterChild struct {
space, tag string
}
func newFilterChild(str string) *filterChild {
s, l := spaceDecompose(str)
return &filterChild{s, l}
}
func (f *filterChild) apply(p *pather) {
for _, c := range p.candidates {
for _, cc := range c.Child {
if cc, ok := cc.(*Element); ok &&
spaceMatch(f.space, cc.Space) &&
f.tag == cc.Tag {
p.scratch = append(p.scratch, c)
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterChildText filters the candidate list for elements having
// a child element with the specified tag and text.
type filterChildText struct {
space, tag, text string
}
func newFilterChildText(str, text string) *filterChildText {
s, l := spaceDecompose(str)
return &filterChildText{s, l, text}
}
func (f *filterChildText) apply(p *pather) {
for _, c := range p.candidates {
for _, cc := range c.Child {
if cc, ok := cc.(*Element); ok &&
spaceMatch(f.space, cc.Space) &&
f.tag == cc.Tag &&
f.text == cc.Text() {
p.scratch = append(p.scratch, c)
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}

@ -1,9 +1,6 @@
# github.com/basgys/goxml2json v1.1.0
## explicit
github.com/basgys/goxml2json
# github.com/beevik/etree v1.1.0
## explicit
github.com/beevik/etree
# github.com/bitly/go-simplejson v0.5.0
## explicit
# github.com/dtapps/gotime v1.0.1

@ -1 +1,16 @@
package gomongo
import (
"encoding/json"
"github.com/basgys/goxml2json"
"strings"
)
// XmlDecodeNoError Xml解码不报错
func XmlDecodeNoError(b []byte) map[string]interface{} {
xtj := strings.NewReader(string(b))
jtx, _ := xml2json.Convert(xtj)
var data map[string]interface{}
_ = json.Unmarshal(jtx.Bytes(), &data)
return data
}

Loading…
Cancel
Save