parent
41eb0ee8e0
commit
78ff79fc69
@ -1,14 +0,0 @@
|
||||
language: go
|
||||
sudo: false
|
||||
|
||||
go:
|
||||
- 1.11.x
|
||||
- tip
|
||||
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
script:
|
||||
- go vet ./...
|
||||
- go test -v ./...
|
@ -1,10 +0,0 @@
|
||||
Brett Vickers (beevik)
|
||||
Felix Geisendörfer (felixge)
|
||||
Kamil Kisiel (kisielk)
|
||||
Graham King (grahamking)
|
||||
Matt Smith (ma314smith)
|
||||
Michal Jemala (michaljemala)
|
||||
Nicolas Piganeau (npiganeau)
|
||||
Chris Brown (ccbrown)
|
||||
Earncef Sequeira (earncef)
|
||||
Gabriel de Labachelerie (wuzuf)
|
@ -1,24 +0,0 @@
|
||||
Copyright 2015-2019 Brett Vickers. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,205 +0,0 @@
|
||||
[![Build Status](https://travis-ci.org/beevik/etree.svg?branch=master)](https://travis-ci.org/beevik/etree)
|
||||
[![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree)
|
||||
|
||||
etree
|
||||
=====
|
||||
|
||||
The etree package is a lightweight, pure go package that expresses XML in
|
||||
the form of an element tree. Its design was inspired by the Python
|
||||
[ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
|
||||
module.
|
||||
|
||||
Some of the package's capabilities and features:
|
||||
|
||||
* Represents XML documents as trees of elements for easy traversal.
|
||||
* Imports, serializes, modifies or creates XML documents from scratch.
|
||||
* Writes and reads XML to/from files, byte slices, strings and io interfaces.
|
||||
* Performs simple or complex searches with lightweight XPath-like query APIs.
|
||||
* Auto-indents XML using spaces or tabs for better readability.
|
||||
* Implemented in pure go; depends only on standard go libraries.
|
||||
* Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
|
||||
package.
|
||||
|
||||
### Creating an XML document
|
||||
|
||||
The following example creates an XML document from scratch using the etree
|
||||
package and outputs its indented contents to stdout.
|
||||
```go
|
||||
doc := etree.NewDocument()
|
||||
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
|
||||
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
|
||||
|
||||
people := doc.CreateElement("People")
|
||||
people.CreateComment("These are all known people")
|
||||
|
||||
jon := people.CreateElement("Person")
|
||||
jon.CreateAttr("name", "Jon")
|
||||
|
||||
sally := people.CreateElement("Person")
|
||||
sally.CreateAttr("name", "Sally")
|
||||
|
||||
doc.Indent(2)
|
||||
doc.WriteTo(os.Stdout)
|
||||
```
|
||||
|
||||
Output:
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="style.xsl"?>
|
||||
<People>
|
||||
<!--These are all known people-->
|
||||
<Person name="Jon"/>
|
||||
<Person name="Sally"/>
|
||||
</People>
|
||||
```
|
||||
|
||||
### Reading an XML file
|
||||
|
||||
Suppose you have a file on disk called `bookstore.xml` containing the
|
||||
following data:
|
||||
|
||||
```xml
|
||||
<bookstore xmlns:p="urn:schemas-books-com:prices">
|
||||
|
||||
<book category="COOKING">
|
||||
<title lang="en">Everyday Italian</title>
|
||||
<author>Giada De Laurentiis</author>
|
||||
<year>2005</year>
|
||||
<p:price>30.00</p:price>
|
||||
</book>
|
||||
|
||||
<book category="CHILDREN">
|
||||
<title lang="en">Harry Potter</title>
|
||||
<author>J K. Rowling</author>
|
||||
<year>2005</year>
|
||||
<p:price>29.99</p:price>
|
||||
</book>
|
||||
|
||||
<book category="WEB">
|
||||
<title lang="en">XQuery Kick Start</title>
|
||||
<author>James McGovern</author>
|
||||
<author>Per Bothner</author>
|
||||
<author>Kurt Cagle</author>
|
||||
<author>James Linn</author>
|
||||
<author>Vaidyanathan Nagarajan</author>
|
||||
<year>2003</year>
|
||||
<p:price>49.99</p:price>
|
||||
</book>
|
||||
|
||||
<book category="WEB">
|
||||
<title lang="en">Learning XML</title>
|
||||
<author>Erik T. Ray</author>
|
||||
<year>2003</year>
|
||||
<p:price>39.95</p:price>
|
||||
</book>
|
||||
|
||||
</bookstore>
|
||||
```
|
||||
|
||||
This code reads the file's contents into an etree document.
|
||||
```go
|
||||
doc := etree.NewDocument()
|
||||
if err := doc.ReadFromFile("bookstore.xml"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
```
|
||||
|
||||
You can also read XML from a string, a byte slice, or an `io.Reader`.
|
||||
|
||||
### Processing elements and attributes
|
||||
|
||||
This example illustrates several ways to access elements and attributes using
|
||||
etree selection queries.
|
||||
```go
|
||||
root := doc.SelectElement("bookstore")
|
||||
fmt.Println("ROOT element:", root.Tag)
|
||||
|
||||
for _, book := range root.SelectElements("book") {
|
||||
fmt.Println("CHILD element:", book.Tag)
|
||||
if title := book.SelectElement("title"); title != nil {
|
||||
lang := title.SelectAttrValue("lang", "unknown")
|
||||
fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang)
|
||||
}
|
||||
for _, attr := range book.Attr {
|
||||
fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value)
|
||||
}
|
||||
}
|
||||
```
|
||||
Output:
|
||||
```
|
||||
ROOT element: bookstore
|
||||
CHILD element: book
|
||||
TITLE: Everyday Italian (en)
|
||||
ATTR: category=COOKING
|
||||
CHILD element: book
|
||||
TITLE: Harry Potter (en)
|
||||
ATTR: category=CHILDREN
|
||||
CHILD element: book
|
||||
TITLE: XQuery Kick Start (en)
|
||||
ATTR: category=WEB
|
||||
CHILD element: book
|
||||
TITLE: Learning XML (en)
|
||||
ATTR: category=WEB
|
||||
```
|
||||
|
||||
### Path queries
|
||||
|
||||
This example uses etree's path functions to select all book titles that fall
|
||||
into the category of 'WEB'. The double-slash prefix in the path causes the
|
||||
search for book elements to occur recursively; book elements may appear at any
|
||||
level of the XML hierarchy.
|
||||
```go
|
||||
for _, t := range doc.FindElements("//book[@category='WEB']/title") {
|
||||
fmt.Println("Title:", t.Text())
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
Title: XQuery Kick Start
|
||||
Title: Learning XML
|
||||
```
|
||||
|
||||
This example finds the first book element under the root bookstore element and
|
||||
outputs the tag and text of each of its child elements.
|
||||
```go
|
||||
for _, e := range doc.FindElements("./bookstore/book[1]/*") {
|
||||
fmt.Printf("%s: %s\n", e.Tag, e.Text())
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
title: Everyday Italian
|
||||
author: Giada De Laurentiis
|
||||
year: 2005
|
||||
price: 30.00
|
||||
```
|
||||
|
||||
This example finds all books with a price of 49.99 and outputs their titles.
|
||||
```go
|
||||
path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
|
||||
for _, e := range doc.FindElementsPath(path) {
|
||||
fmt.Println(e.Text())
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
XQuery Kick Start
|
||||
```
|
||||
|
||||
Note that this example uses the FindElementsPath function, which takes as an
|
||||
argument a pre-compiled path object. Use precompiled paths when you plan to
|
||||
search with the same path more than once.
|
||||
|
||||
### Other features
|
||||
|
||||
These are just a few examples of the things the etree package can do. See the
|
||||
[documentation](http://godoc.org/github.com/beevik/etree) for a complete
|
||||
description of its capabilities.
|
||||
|
||||
### Contributing
|
||||
|
||||
This project accepts contributions. Just fork the repo and submit a pull
|
||||
request!
|
@ -1,109 +0,0 @@
|
||||
Release v1.1.0
|
||||
==============
|
||||
|
||||
**New Features**
|
||||
|
||||
* New attribute helpers.
|
||||
* Added the `Element.SortAttrs` method, which lexicographically sorts an
|
||||
element's attributes by key.
|
||||
* New `ReadSettings` properties.
|
||||
* Added `Entity` for the support of custom entity maps.
|
||||
* New `WriteSettings` properties.
|
||||
* Added `UseCRLF` to allow the output of CR-LF newlines instead of the
|
||||
default LF newlines. This is useful on Windows systems.
|
||||
* Additional support for text and CDATA sections.
|
||||
* The `Element.Text` method now returns the concatenation of all consecutive
|
||||
character data tokens immediately following an element's opening tag.
|
||||
* Added `Element.SetCData` to replace the character data immediately
|
||||
following an element's opening tag with a CDATA section.
|
||||
* Added `Element.CreateCData` to create and add a CDATA section child
|
||||
`CharData` token to an element.
|
||||
* Added `Element.CreateText` to create and add a child text `CharData` token
|
||||
to an element.
|
||||
* Added `NewCData` to create a parentless CDATA section `CharData` token.
|
||||
* Added `NewText` to create a parentless text `CharData`
|
||||
token.
|
||||
* Added `CharData.IsCData` to detect if the token contains a CDATA section.
|
||||
* Added `CharData.IsWhitespace` to detect if the token contains whitespace
|
||||
inserted by one of the document Indent functions.
|
||||
* Modified `Element.SetText` so that it replaces a run of consecutive
|
||||
character data tokens following the element's opening tag (instead of just
|
||||
the first one).
|
||||
* New "tail text" support.
|
||||
* Added the `Element.Tail` method, which returns the text immediately
|
||||
following an element's closing tag.
|
||||
* Added the `Element.SetTail` method, which modifies the text immediately
|
||||
following an element's closing tag.
|
||||
* New element child insertion and removal methods.
|
||||
* Added the `Element.InsertChildAt` method, which inserts a new child token
|
||||
before the specified child token index.
|
||||
* Added the `Element.RemoveChildAt` method, which removes the child token at
|
||||
the specified child token index.
|
||||
* New element and attribute queries.
|
||||
* Added the `Element.Index` method, which returns the element's index within
|
||||
its parent element's child token list.
|
||||
* Added the `Element.NamespaceURI` method to return the namespace URI
|
||||
associated with an element.
|
||||
* Added the `Attr.NamespaceURI` method to return the namespace URI
|
||||
associated with an element.
|
||||
* Added the `Attr.Element` method to return the element that an attribute
|
||||
belongs to.
|
||||
* New Path filter functions.
|
||||
* Added `[local-name()='val']` to keep elements whose unprefixed tag matches
|
||||
the desired value.
|
||||
* Added `[name()='val']` to keep elements whose full tag matches the desired
|
||||
value.
|
||||
* Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
|
||||
matches the desired value.
|
||||
* Added `[namespace-uri()='val']` to keep elements whose namespace URI
|
||||
matches the desired value.
|
||||
|
||||
**Bug Fixes**
|
||||
|
||||
* A default XML `CharSetReader` is now used to prevent failed parsing of XML
|
||||
documents using certain encodings.
|
||||
([Issue](https://github.com/beevik/etree/issues/53)).
|
||||
* All characters are now properly escaped according to XML parsing rules.
|
||||
([Issue](https://github.com/beevik/etree/issues/55)).
|
||||
* The `Document.Indent` and `Document.IndentTabs` functions no longer insert
|
||||
empty string `CharData` tokens.
|
||||
|
||||
**Deprecated**
|
||||
|
||||
* `Element`
|
||||
* The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
|
||||
* The `CreateCharData` method is deprecated. Use `CreateText` instead.
|
||||
* `CharData`
|
||||
* The `NewCharData` method is deprecated. Use `NewText` instead.
|
||||
|
||||
|
||||
Release v1.0.1
|
||||
==============
|
||||
|
||||
**Changes**
|
||||
|
||||
* Added support for absolute etree Path queries. An absolute path begins with
|
||||
`/` or `//` and begins its search from the element's document root.
|
||||
* Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
|
||||
and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
|
||||
functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
|
||||
type.
|
||||
|
||||
**Breaking changes**
|
||||
|
||||
* A path starting with `//` is now interpreted as an absolute path.
|
||||
Previously, it was interpreted as a relative path starting from the element
|
||||
whose
|
||||
[`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
|
||||
method was called. To remain compatible with this release, all paths
|
||||
prefixed with `//` should be prefixed with `.//` when called from any
|
||||
element other than the document's root.
|
||||
* [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
|
||||
Even though this breaking change was very minor, it was a mistake to include
|
||||
it in this minor release. In the future, all breaking changes will be
|
||||
limited to major releases (e.g., version 2.0.0).
|
||||
|
||||
Release v1.0.0
|
||||
==============
|
||||
|
||||
Initial release.
|
File diff suppressed because it is too large
Load Diff
@ -1,276 +0,0 @@
|
||||
// Copyright 2015-2019 Brett Vickers.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package etree
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// A simple stack
|
||||
type stack struct {
|
||||
data []interface{}
|
||||
}
|
||||
|
||||
func (s *stack) empty() bool {
|
||||
return len(s.data) == 0
|
||||
}
|
||||
|
||||
func (s *stack) push(value interface{}) {
|
||||
s.data = append(s.data, value)
|
||||
}
|
||||
|
||||
func (s *stack) pop() interface{} {
|
||||
value := s.data[len(s.data)-1]
|
||||
s.data[len(s.data)-1] = nil
|
||||
s.data = s.data[:len(s.data)-1]
|
||||
return value
|
||||
}
|
||||
|
||||
func (s *stack) peek() interface{} {
|
||||
return s.data[len(s.data)-1]
|
||||
}
|
||||
|
||||
// A fifo is a simple first-in-first-out queue.
|
||||
type fifo struct {
|
||||
data []interface{}
|
||||
head, tail int
|
||||
}
|
||||
|
||||
func (f *fifo) add(value interface{}) {
|
||||
if f.len()+1 >= len(f.data) {
|
||||
f.grow()
|
||||
}
|
||||
f.data[f.tail] = value
|
||||
if f.tail++; f.tail == len(f.data) {
|
||||
f.tail = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fifo) remove() interface{} {
|
||||
value := f.data[f.head]
|
||||
f.data[f.head] = nil
|
||||
if f.head++; f.head == len(f.data) {
|
||||
f.head = 0
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func (f *fifo) len() int {
|
||||
if f.tail >= f.head {
|
||||
return f.tail - f.head
|
||||
}
|
||||
return len(f.data) - f.head + f.tail
|
||||
}
|
||||
|
||||
func (f *fifo) grow() {
|
||||
c := len(f.data) * 2
|
||||
if c == 0 {
|
||||
c = 4
|
||||
}
|
||||
buf, count := make([]interface{}, c), f.len()
|
||||
if f.tail >= f.head {
|
||||
copy(buf[0:count], f.data[f.head:f.tail])
|
||||
} else {
|
||||
hindex := len(f.data) - f.head
|
||||
copy(buf[0:hindex], f.data[f.head:])
|
||||
copy(buf[hindex:count], f.data[:f.tail])
|
||||
}
|
||||
f.data, f.head, f.tail = buf, 0, count
|
||||
}
|
||||
|
||||
// countReader implements a proxy reader that counts the number of
|
||||
// bytes read from its encapsulated reader.
|
||||
type countReader struct {
|
||||
r io.Reader
|
||||
bytes int64
|
||||
}
|
||||
|
||||
func newCountReader(r io.Reader) *countReader {
|
||||
return &countReader{r: r}
|
||||
}
|
||||
|
||||
func (cr *countReader) Read(p []byte) (n int, err error) {
|
||||
b, err := cr.r.Read(p)
|
||||
cr.bytes += int64(b)
|
||||
return b, err
|
||||
}
|
||||
|
||||
// countWriter implements a proxy writer that counts the number of
|
||||
// bytes written by its encapsulated writer.
|
||||
type countWriter struct {
|
||||
w io.Writer
|
||||
bytes int64
|
||||
}
|
||||
|
||||
func newCountWriter(w io.Writer) *countWriter {
|
||||
return &countWriter{w: w}
|
||||
}
|
||||
|
||||
func (cw *countWriter) Write(p []byte) (n int, err error) {
|
||||
b, err := cw.w.Write(p)
|
||||
cw.bytes += int64(b)
|
||||
return b, err
|
||||
}
|
||||
|
||||
// isWhitespace returns true if the byte slice contains only
|
||||
// whitespace characters.
|
||||
func isWhitespace(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// spaceMatch returns true if namespace a is the empty string
|
||||
// or if namespace a equals namespace b.
|
||||
func spaceMatch(a, b string) bool {
|
||||
switch {
|
||||
case a == "":
|
||||
return true
|
||||
default:
|
||||
return a == b
|
||||
}
|
||||
}
|
||||
|
||||
// spaceDecompose breaks a namespace:tag identifier at the ':'
|
||||
// and returns the two parts.
|
||||
func spaceDecompose(str string) (space, key string) {
|
||||
colon := strings.IndexByte(str, ':')
|
||||
if colon == -1 {
|
||||
return "", str
|
||||
}
|
||||
return str[:colon], str[colon+1:]
|
||||
}
|
||||
|
||||
// Strings used by indentCRLF and indentLF
|
||||
const (
|
||||
indentSpaces = "\r\n "
|
||||
indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
|
||||
)
|
||||
|
||||
// indentCRLF returns a CRLF newline followed by n copies of the first
|
||||
// non-CRLF character in the source string.
|
||||
func indentCRLF(n int, source string) string {
|
||||
switch {
|
||||
case n < 0:
|
||||
return source[:2]
|
||||
case n < len(source)-1:
|
||||
return source[:n+2]
|
||||
default:
|
||||
return source + strings.Repeat(source[2:3], n-len(source)+2)
|
||||
}
|
||||
}
|
||||
|
||||
// indentLF returns a LF newline followed by n copies of the first non-LF
|
||||
// character in the source string.
|
||||
func indentLF(n int, source string) string {
|
||||
switch {
|
||||
case n < 0:
|
||||
return source[1:2]
|
||||
case n < len(source)-1:
|
||||
return source[1 : n+2]
|
||||
default:
|
||||
return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
|
||||
}
|
||||
}
|
||||
|
||||
// nextIndex returns the index of the next occurrence of sep in s,
|
||||
// starting from offset. It returns -1 if the sep string is not found.
|
||||
func nextIndex(s, sep string, offset int) int {
|
||||
switch i := strings.Index(s[offset:], sep); i {
|
||||
case -1:
|
||||
return -1
|
||||
default:
|
||||
return offset + i
|
||||
}
|
||||
}
|
||||
|
||||
// isInteger returns true if the string s contains an integer.
|
||||
func isInteger(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type escapeMode byte
|
||||
|
||||
const (
|
||||
escapeNormal escapeMode = iota
|
||||
escapeCanonicalText
|
||||
escapeCanonicalAttr
|
||||
)
|
||||
|
||||
// escapeString writes an escaped version of a string to the writer.
|
||||
func escapeString(w *bufio.Writer, s string, m escapeMode) {
|
||||
var esc []byte
|
||||
last := 0
|
||||
for i := 0; i < len(s); {
|
||||
r, width := utf8.DecodeRuneInString(s[i:])
|
||||
i += width
|
||||
switch r {
|
||||
case '&':
|
||||
esc = []byte("&")
|
||||
case '<':
|
||||
esc = []byte("<")
|
||||
case '>':
|
||||
if m == escapeCanonicalAttr {
|
||||
continue
|
||||
}
|
||||
esc = []byte(">")
|
||||
case '\'':
|
||||
if m != escapeNormal {
|
||||
continue
|
||||
}
|
||||
esc = []byte("'")
|
||||
case '"':
|
||||
if m == escapeCanonicalText {
|
||||
continue
|
||||
}
|
||||
esc = []byte(""")
|
||||
case '\t':
|
||||
if m != escapeCanonicalAttr {
|
||||
continue
|
||||
}
|
||||
esc = []byte("	")
|
||||
case '\n':
|
||||
if m != escapeCanonicalAttr {
|
||||
continue
|
||||
}
|
||||
esc = []byte("
")
|
||||
case '\r':
|
||||
if m == escapeNormal {
|
||||
continue
|
||||
}
|
||||
esc = []byte("
")
|
||||
default:
|
||||
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
|
||||
esc = []byte("\uFFFD")
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
w.WriteString(s[last : i-width])
|
||||
w.Write(esc)
|
||||
last = i
|
||||
}
|
||||
w.WriteString(s[last:])
|
||||
}
|
||||
|
||||
func isInCharacterRange(r rune) bool {
|
||||
return r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xD7FF ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF
|
||||
}
|
@ -1,582 +0,0 @@
|
||||
// Copyright 2015-2019 Brett Vickers.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package etree
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
/*
|
||||
A Path is a string that represents a search path through an etree starting
|
||||
from the document root or an arbitrary element. Paths are used with the
|
||||
Element object's Find* methods to locate and return desired elements.
|
||||
|
||||
A Path consists of a series of slash-separated "selectors", each of which may
|
||||
be modified by one or more bracket-enclosed "filters". Selectors are used to
|
||||
traverse the etree from element to element, while filters are used to narrow
|
||||
the list of candidate elements at each node.
|
||||
|
||||
Although etree Path strings are similar to XPath strings
|
||||
(https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more limited set
|
||||
of selectors and filtering options.
|
||||
|
||||
The following selectors are supported by etree Path strings:
|
||||
|
||||
. Select the current element.
|
||||
.. Select the parent of the current element.
|
||||
* Select all child elements of the current element.
|
||||
/ Select the root element when used at the start of a path.
|
||||
// Select all descendants of the current element.
|
||||
tag Select all child elements with a name matching the tag.
|
||||
|
||||
The following basic filters are supported by etree Path strings:
|
||||
|
||||
[@attrib] Keep elements with an attribute named attrib.
|
||||
[@attrib='val'] Keep elements with an attribute named attrib and value matching val.
|
||||
[tag] Keep elements with a child element named tag.
|
||||
[tag='val'] Keep elements with a child element named tag and text matching val.
|
||||
[n] Keep the n-th element, where n is a numeric index starting from 1.
|
||||
|
||||
The following function filters are also supported:
|
||||
|
||||
[text()] Keep elements with non-empty text.
|
||||
[text()='val'] Keep elements whose text matches val.
|
||||
[local-name()='val'] Keep elements whose un-prefixed tag matches val.
|
||||
[name()='val'] Keep elements whose full tag exactly matches val.
|
||||
[namespace-prefix()='val'] Keep elements whose namespace prefix matches val.
|
||||
[namespace-uri()='val'] Keep elements whose namespace URI matches val.
|
||||
|
||||
Here are some examples of Path strings:
|
||||
|
||||
- Select the bookstore child element of the root element:
|
||||
/bookstore
|
||||
|
||||
- Beginning from the root element, select the title elements of all
|
||||
descendant book elements having a 'category' attribute of 'WEB':
|
||||
//book[@category='WEB']/title
|
||||
|
||||
- Beginning from the current element, select the first descendant
|
||||
book element with a title child element containing the text 'Great
|
||||
Expectations':
|
||||
.//book[title='Great Expectations'][1]
|
||||
|
||||
- Beginning from the current element, select all child elements of
|
||||
book elements with an attribute 'language' set to 'english':
|
||||
./book/*[@language='english']
|
||||
|
||||
- Beginning from the current element, select all child elements of
|
||||
book elements containing the text 'special':
|
||||
./book/*[text()='special']
|
||||
|
||||
- Beginning from the current element, select all descendant book
|
||||
elements whose title child element has a 'language' attribute of 'french':
|
||||
.//book/title[@language='french']/..
|
||||
|
||||
- Beginning from the current element, select all book elements
|
||||
belonging to the http://www.w3.org/TR/html4/ namespace:
|
||||
.//book[namespace-uri()='http://www.w3.org/TR/html4/']
|
||||
|
||||
*/
|
||||
type Path struct {
|
||||
segments []segment
|
||||
}
|
||||
|
||||
// ErrPath is returned by path functions when an invalid etree path is provided.
|
||||
type ErrPath string
|
||||
|
||||
// Error returns the string describing a path error.
|
||||
func (err ErrPath) Error() string {
|
||||
return "etree: " + string(err)
|
||||
}
|
||||
|
||||
// CompilePath creates an optimized version of an XPath-like string that
|
||||
// can be used to query elements in an element tree.
|
||||
func CompilePath(path string) (Path, error) {
|
||||
var comp compiler
|
||||
segments := comp.parsePath(path)
|
||||
if comp.err != ErrPath("") {
|
||||
return Path{nil}, comp.err
|
||||
}
|
||||
return Path{segments}, nil
|
||||
}
|
||||
|
||||
// MustCompilePath creates an optimized version of an XPath-like string that
|
||||
// can be used to query elements in an element tree. Panics if an error
|
||||
// occurs. Use this function to create Paths when you know the path is
|
||||
// valid (i.e., if it's hard-coded).
|
||||
func MustCompilePath(path string) Path {
|
||||
p, err := CompilePath(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// A segment is a portion of a path between "/" characters.
|
||||
// It contains one selector and zero or more [filters].
|
||||
type segment struct {
|
||||
sel selector
|
||||
filters []filter
|
||||
}
|
||||
|
||||
func (seg *segment) apply(e *Element, p *pather) {
|
||||
seg.sel.apply(e, p)
|
||||
for _, f := range seg.filters {
|
||||
f.apply(p)
|
||||
}
|
||||
}
|
||||
|
||||
// A selector selects XML elements for consideration by the
|
||||
// path traversal.
|
||||
type selector interface {
|
||||
apply(e *Element, p *pather)
|
||||
}
|
||||
|
||||
// A filter pares down a list of candidate XML elements based
|
||||
// on a path filter in [brackets].
|
||||
type filter interface {
|
||||
apply(p *pather)
|
||||
}
|
||||
|
||||
// A pather is helper object that traverses an element tree using
|
||||
// a Path object. It collects and deduplicates all elements matching
|
||||
// the path query.
|
||||
type pather struct {
|
||||
queue fifo
|
||||
results []*Element
|
||||
inResults map[*Element]bool
|
||||
candidates []*Element
|
||||
scratch []*Element // used by filters
|
||||
}
|
||||
|
||||
// A node represents an element and the remaining path segments that
|
||||
// should be applied against it by the pather.
|
||||
type node struct {
|
||||
e *Element
|
||||
segments []segment
|
||||
}
|
||||
|
||||
func newPather() *pather {
|
||||
return &pather{
|
||||
results: make([]*Element, 0),
|
||||
inResults: make(map[*Element]bool),
|
||||
candidates: make([]*Element, 0),
|
||||
scratch: make([]*Element, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// traverse follows the path from the element e, collecting
|
||||
// and then returning all elements that match the path's selectors
|
||||
// and filters.
|
||||
func (p *pather) traverse(e *Element, path Path) []*Element {
|
||||
for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
|
||||
p.eval(p.queue.remove().(node))
|
||||
}
|
||||
return p.results
|
||||
}
|
||||
|
||||
// eval evalutes the current path node by applying the remaining
|
||||
// path's selector rules against the node's element.
|
||||
func (p *pather) eval(n node) {
|
||||
p.candidates = p.candidates[0:0]
|
||||
seg, remain := n.segments[0], n.segments[1:]
|
||||
seg.apply(n.e, p)
|
||||
|
||||
if len(remain) == 0 {
|
||||
for _, c := range p.candidates {
|
||||
if in := p.inResults[c]; !in {
|
||||
p.inResults[c] = true
|
||||
p.results = append(p.results, c)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, c := range p.candidates {
|
||||
p.queue.add(node{c, remain})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A compiler generates a compiled path from a path string.
|
||||
type compiler struct {
|
||||
err ErrPath
|
||||
}
|
||||
|
||||
// parsePath parses an XPath-like string describing a path
|
||||
// through an element tree and returns a slice of segment
|
||||
// descriptors.
|
||||
func (c *compiler) parsePath(path string) []segment {
|
||||
// If path ends with //, fix it
|
||||
if strings.HasSuffix(path, "//") {
|
||||
path = path + "*"
|
||||
}
|
||||
|
||||
var segments []segment
|
||||
|
||||
// Check for an absolute path
|
||||
if strings.HasPrefix(path, "/") {
|
||||
segments = append(segments, segment{new(selectRoot), []filter{}})
|
||||
path = path[1:]
|
||||
}
|
||||
|
||||
// Split path into segments
|
||||
for _, s := range splitPath(path) {
|
||||
segments = append(segments, c.parseSegment(s))
|
||||
if c.err != ErrPath("") {
|
||||
break
|
||||
}
|
||||
}
|
||||
return segments
|
||||
}
|
||||
|
||||
func splitPath(path string) []string {
|
||||
pieces := make([]string, 0)
|
||||
start := 0
|
||||
inquote := false
|
||||
for i := 0; i+1 <= len(path); i++ {
|
||||
if path[i] == '\'' {
|
||||
inquote = !inquote
|
||||
} else if path[i] == '/' && !inquote {
|
||||
pieces = append(pieces, path[start:i])
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
return append(pieces, path[start:])
|
||||
}
|
||||
|
||||
// parseSegment parses a path segment between / characters.
|
||||
func (c *compiler) parseSegment(path string) segment {
|
||||
pieces := strings.Split(path, "[")
|
||||
seg := segment{
|
||||
sel: c.parseSelector(pieces[0]),
|
||||
filters: []filter{},
|
||||
}
|
||||
for i := 1; i < len(pieces); i++ {
|
||||
fpath := pieces[i]
|
||||
if fpath[len(fpath)-1] != ']' {
|
||||
c.err = ErrPath("path has invalid filter [brackets].")
|
||||
break
|
||||
}
|
||||
seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
|
||||
}
|
||||
return seg
|
||||
}
|
||||
|
||||
// parseSelector parses a selector at the start of a path segment.
|
||||
func (c *compiler) parseSelector(path string) selector {
|
||||
switch path {
|
||||
case ".":
|
||||
return new(selectSelf)
|
||||
case "..":
|
||||
return new(selectParent)
|
||||
case "*":
|
||||
return new(selectChildren)
|
||||
case "":
|
||||
return new(selectDescendants)
|
||||
default:
|
||||
return newSelectChildrenByTag(path)
|
||||
}
|
||||
}
|
||||
|
||||
var fnTable = map[string]struct {
|
||||
hasFn func(e *Element) bool
|
||||
getValFn func(e *Element) string
|
||||
}{
|
||||
"local-name": {nil, (*Element).name},
|
||||
"name": {nil, (*Element).FullTag},
|
||||
"namespace-prefix": {nil, (*Element).namespacePrefix},
|
||||
"namespace-uri": {nil, (*Element).NamespaceURI},
|
||||
"text": {(*Element).hasText, (*Element).Text},
|
||||
}
|
||||
|
||||
// parseFilter parses a path filter contained within [brackets].
|
||||
func (c *compiler) parseFilter(path string) filter {
|
||||
if len(path) == 0 {
|
||||
c.err = ErrPath("path contains an empty filter expression.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter contains [@attr='val'], [fn()='val'], or [tag='val']?
|
||||
eqindex := strings.Index(path, "='")
|
||||
if eqindex >= 0 {
|
||||
rindex := nextIndex(path, "'", eqindex+2)
|
||||
if rindex != len(path)-1 {
|
||||
c.err = ErrPath("path has mismatched filter quotes.")
|
||||
return nil
|
||||
}
|
||||
|
||||
key := path[:eqindex]
|
||||
value := path[eqindex+2 : rindex]
|
||||
|
||||
switch {
|
||||
case key[0] == '@':
|
||||
return newFilterAttrVal(key[1:], value)
|
||||
case strings.HasSuffix(key, "()"):
|
||||
fn := key[:len(key)-2]
|
||||
if t, ok := fnTable[fn]; ok && t.getValFn != nil {
|
||||
return newFilterFuncVal(t.getValFn, value)
|
||||
}
|
||||
c.err = ErrPath("path has unknown function " + fn)
|
||||
return nil
|
||||
default:
|
||||
return newFilterChildText(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
// Filter contains [@attr], [N], [tag] or [fn()]
|
||||
switch {
|
||||
case path[0] == '@':
|
||||
return newFilterAttr(path[1:])
|
||||
case strings.HasSuffix(path, "()"):
|
||||
fn := path[:len(path)-2]
|
||||
if t, ok := fnTable[fn]; ok && t.hasFn != nil {
|
||||
return newFilterFunc(t.hasFn)
|
||||
}
|
||||
c.err = ErrPath("path has unknown function " + fn)
|
||||
return nil
|
||||
case isInteger(path):
|
||||
pos, _ := strconv.Atoi(path)
|
||||
switch {
|
||||
case pos > 0:
|
||||
return newFilterPos(pos - 1)
|
||||
default:
|
||||
return newFilterPos(pos)
|
||||
}
|
||||
default:
|
||||
return newFilterChild(path)
|
||||
}
|
||||
}
|
||||
|
||||
// selectSelf selects the current element into the candidate list.
|
||||
type selectSelf struct{}
|
||||
|
||||
func (s *selectSelf) apply(e *Element, p *pather) {
|
||||
p.candidates = append(p.candidates, e)
|
||||
}
|
||||
|
||||
// selectRoot selects the element's root node.
|
||||
type selectRoot struct{}
|
||||
|
||||
func (s *selectRoot) apply(e *Element, p *pather) {
|
||||
root := e
|
||||
for root.parent != nil {
|
||||
root = root.parent
|
||||
}
|
||||
p.candidates = append(p.candidates, root)
|
||||
}
|
||||
|
||||
// selectParent selects the element's parent into the candidate list.
|
||||
type selectParent struct{}
|
||||
|
||||
func (s *selectParent) apply(e *Element, p *pather) {
|
||||
if e.parent != nil {
|
||||
p.candidates = append(p.candidates, e.parent)
|
||||
}
|
||||
}
|
||||
|
||||
// selectChildren selects the element's child elements into the
|
||||
// candidate list.
|
||||
type selectChildren struct{}
|
||||
|
||||
func (s *selectChildren) apply(e *Element, p *pather) {
|
||||
for _, c := range e.Child {
|
||||
if c, ok := c.(*Element); ok {
|
||||
p.candidates = append(p.candidates, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// selectDescendants selects all descendant child elements
|
||||
// of the element into the candidate list.
|
||||
type selectDescendants struct{}
|
||||
|
||||
func (s *selectDescendants) apply(e *Element, p *pather) {
|
||||
var queue fifo
|
||||
for queue.add(e); queue.len() > 0; {
|
||||
e := queue.remove().(*Element)
|
||||
p.candidates = append(p.candidates, e)
|
||||
for _, c := range e.Child {
|
||||
if c, ok := c.(*Element); ok {
|
||||
queue.add(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// selectChildrenByTag selects into the candidate list all child
|
||||
// elements of the element having the specified tag.
|
||||
type selectChildrenByTag struct {
|
||||
space, tag string
|
||||
}
|
||||
|
||||
func newSelectChildrenByTag(path string) *selectChildrenByTag {
|
||||
s, l := spaceDecompose(path)
|
||||
return &selectChildrenByTag{s, l}
|
||||
}
|
||||
|
||||
func (s *selectChildrenByTag) apply(e *Element, p *pather) {
|
||||
for _, c := range e.Child {
|
||||
if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
|
||||
p.candidates = append(p.candidates, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filterPos filters the candidate list, keeping only the
|
||||
// candidate at the specified index.
|
||||
type filterPos struct {
|
||||
index int
|
||||
}
|
||||
|
||||
func newFilterPos(pos int) *filterPos {
|
||||
return &filterPos{pos}
|
||||
}
|
||||
|
||||
func (f *filterPos) apply(p *pather) {
|
||||
if f.index >= 0 {
|
||||
if f.index < len(p.candidates) {
|
||||
p.scratch = append(p.scratch, p.candidates[f.index])
|
||||
}
|
||||
} else {
|
||||
if -f.index <= len(p.candidates) {
|
||||
p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterAttr filters the candidate list for elements having
|
||||
// the specified attribute.
|
||||
type filterAttr struct {
|
||||
space, key string
|
||||
}
|
||||
|
||||
func newFilterAttr(str string) *filterAttr {
|
||||
s, l := spaceDecompose(str)
|
||||
return &filterAttr{s, l}
|
||||
}
|
||||
|
||||
func (f *filterAttr) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
for _, a := range c.Attr {
|
||||
if spaceMatch(f.space, a.Space) && f.key == a.Key {
|
||||
p.scratch = append(p.scratch, c)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterAttrVal filters the candidate list for elements having
|
||||
// the specified attribute with the specified value.
|
||||
type filterAttrVal struct {
|
||||
space, key, val string
|
||||
}
|
||||
|
||||
func newFilterAttrVal(str, value string) *filterAttrVal {
|
||||
s, l := spaceDecompose(str)
|
||||
return &filterAttrVal{s, l, value}
|
||||
}
|
||||
|
||||
func (f *filterAttrVal) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
for _, a := range c.Attr {
|
||||
if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
|
||||
p.scratch = append(p.scratch, c)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterFunc filters the candidate list for elements satisfying a custom
|
||||
// boolean function.
|
||||
type filterFunc struct {
|
||||
fn func(e *Element) bool
|
||||
}
|
||||
|
||||
func newFilterFunc(fn func(e *Element) bool) *filterFunc {
|
||||
return &filterFunc{fn}
|
||||
}
|
||||
|
||||
func (f *filterFunc) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
if f.fn(c) {
|
||||
p.scratch = append(p.scratch, c)
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterFuncVal filters the candidate list for elements containing a value
|
||||
// matching the result of a custom function.
|
||||
type filterFuncVal struct {
|
||||
fn func(e *Element) string
|
||||
val string
|
||||
}
|
||||
|
||||
func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
|
||||
return &filterFuncVal{fn, value}
|
||||
}
|
||||
|
||||
func (f *filterFuncVal) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
if f.fn(c) == f.val {
|
||||
p.scratch = append(p.scratch, c)
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterChild filters the candidate list for elements having
|
||||
// a child element with the specified tag.
|
||||
type filterChild struct {
|
||||
space, tag string
|
||||
}
|
||||
|
||||
func newFilterChild(str string) *filterChild {
|
||||
s, l := spaceDecompose(str)
|
||||
return &filterChild{s, l}
|
||||
}
|
||||
|
||||
func (f *filterChild) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
for _, cc := range c.Child {
|
||||
if cc, ok := cc.(*Element); ok &&
|
||||
spaceMatch(f.space, cc.Space) &&
|
||||
f.tag == cc.Tag {
|
||||
p.scratch = append(p.scratch, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
||||
|
||||
// filterChildText filters the candidate list for elements having
|
||||
// a child element with the specified tag and text.
|
||||
type filterChildText struct {
|
||||
space, tag, text string
|
||||
}
|
||||
|
||||
func newFilterChildText(str, text string) *filterChildText {
|
||||
s, l := spaceDecompose(str)
|
||||
return &filterChildText{s, l, text}
|
||||
}
|
||||
|
||||
func (f *filterChildText) apply(p *pather) {
|
||||
for _, c := range p.candidates {
|
||||
for _, cc := range c.Child {
|
||||
if cc, ok := cc.(*Element); ok &&
|
||||
spaceMatch(f.space, cc.Space) &&
|
||||
f.tag == cc.Tag &&
|
||||
f.text == cc.Text() {
|
||||
p.scratch = append(p.scratch, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
|
||||
}
|
Loading…
Reference in new issue