From ee716d3f7ee28de8758ae753002bf2c90ff8a691 Mon Sep 17 00:00:00 2001
From: Adrien Delorme
+
+## License
+
+The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic].
+
+[jquery]: http://jquery.com/
+[go]: http://golang.org/
+[cascadia]: https://github.com/andybalholm/cascadia
+[cascadiacli]: https://github.com/suntong/cascadia
+[bsd]: http://opensource.org/licenses/BSD-3-Clause
+[golic]: http://golang.org/LICENSE
+[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
+[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
+[index]: http://api.jquery.com/index/
+[gonet]: https://github.com/golang/net/
+[html]: http://godoc.org/golang.org/x/net/html
+[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
+[thatguystone]: https://github.com/thatguystone
+[piotr]: https://github.com/piotrkowalczuk
+[goq]: https://github.com/andrewstuart/goq
diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go
new file mode 100644
index 000000000..1b1f6cbe6
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/array.go
@@ -0,0 +1,124 @@
+package goquery
+
+import (
+ "golang.org/x/net/html"
+)
+
+const (
+ maxUint = ^uint(0)
+ maxInt = int(maxUint >> 1)
+
+ // ToEnd is a special index value that can be used as end index in a call
+ // to Slice so that all elements are selected until the end of the Selection.
+ // It is equivalent to passing (*Selection).Length().
+ ToEnd = maxInt
+)
+
+// First reduces the set of matched elements to the first in the set.
+// It returns a new Selection object, and an empty Selection object if the
+// the selection is empty.
+func (s *Selection) First() *Selection {
+ return s.Eq(0)
+}
+
+// Last reduces the set of matched elements to the last in the set.
+// It returns a new Selection object, and an empty Selection object if
+// the selection is empty.
+func (s *Selection) Last() *Selection {
+ return s.Eq(-1)
+}
+
+// Eq reduces the set of matched elements to the one at the specified index.
+// If a negative index is given, it counts backwards starting at the end of the
+// set. It returns a new Selection object, and an empty Selection object if the
+// index is invalid.
+func (s *Selection) Eq(index int) *Selection {
+ if index < 0 {
+ index += len(s.Nodes)
+ }
+
+ if index >= len(s.Nodes) || index < 0 {
+ return newEmptySelection(s.document)
+ }
+
+ return s.Slice(index, index+1)
+}
+
+// Slice reduces the set of matched elements to a subset specified by a range
+// of indices. The start index is 0-based and indicates the index of the first
+// element to select. The end index is 0-based and indicates the index at which
+// the elements stop being selected (the end index is not selected).
+//
+// The indices may be negative, in which case they represent an offset from the
+// end of the selection.
+//
+// The special value ToEnd may be specified as end index, in which case all elements
+// until the end are selected. This works both for a positive and negative start
+// index.
+func (s *Selection) Slice(start, end int) *Selection {
+ if start < 0 {
+ start += len(s.Nodes)
+ }
+ if end == ToEnd {
+ end = len(s.Nodes)
+ } else if end < 0 {
+ end += len(s.Nodes)
+ }
+ return pushStack(s, s.Nodes[start:end])
+}
+
+// Get retrieves the underlying node at the specified index.
+// Get without parameter is not implemented, since the node array is available
+// on the Selection object.
+func (s *Selection) Get(index int) *html.Node {
+ if index < 0 {
+ index += len(s.Nodes) // Negative index gets from the end
+ }
+ return s.Nodes[index]
+}
+
+// Index returns the position of the first element within the Selection object
+// relative to its sibling elements.
+func (s *Selection) Index() int {
+ if len(s.Nodes) > 0 {
+ return newSingleSelection(s.Nodes[0], s.document).PrevAll().Length()
+ }
+ return -1
+}
+
+// IndexSelector returns the position of the first element within the
+// Selection object relative to the elements matched by the selector, or -1 if
+// not found.
+func (s *Selection) IndexSelector(selector string) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.Find(selector)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexMatcher returns the position of the first element within the
+// Selection object relative to the elements matched by the matcher, or -1 if
+// not found.
+func (s *Selection) IndexMatcher(m Matcher) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.FindMatcher(m)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexOfNode returns the position of the specified node within the Selection
+// object, or -1 if not found.
+func (s *Selection) IndexOfNode(node *html.Node) int {
+ return indexInSlice(s.Nodes, node)
+}
+
+// IndexOfSelection returns the position of the first node in the specified
+// Selection object within this Selection object, or -1 if not found.
+func (s *Selection) IndexOfSelection(sel *Selection) int {
+ if sel != nil && len(sel.Nodes) > 0 {
+ return indexInSlice(s.Nodes, sel.Nodes[0])
+ }
+ return -1
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/doc.go b/vendor/github.com/PuerkitoBio/goquery/doc.go
new file mode 100644
index 000000000..71146a780
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/doc.go
@@ -0,0 +1,123 @@
+// Copyright (c) 2012-2016, Martin Angers & Contributors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.
+// * Neither the name of the author nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+Package goquery implements features similar to jQuery, including the chainable
+syntax, to manipulate and query an HTML document.
+
+It brings a syntax and a set of features similar to jQuery to the Go language.
+It is based on Go's net/html package and the CSS Selector library cascadia.
+Since the net/html parser returns nodes, and not a full-featured DOM
+tree, jQuery's stateful manipulation functions (like height(), css(), detach())
+have been left off.
+
+Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is
+the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
+See the repository's wiki for various options on how to do this.
+
+Syntax-wise, it is as close as possible to jQuery, with the same method names when
+possible, and that warm and fuzzy chainable interface. jQuery being the
+ultra-popular library that it is, writing a similar HTML-manipulating
+library was better to follow its API than to start anew (in the same spirit as
+Go's fmt package), even though some of its methods are less than intuitive (looking
+at you, index()...).
+
+It is hosted on GitHub, along with additional documentation in the README.md
+file: https://github.com/puerkitobio/goquery
+
+Please note that because of the net/html dependency, goquery requires Go1.1+.
+
+The various methods are split into files based on the category of behavior.
+The three dots (...) indicate that various "overloads" are available.
+
+* array.go : array-like positional manipulation of the selection.
+ - Eq()
+ - First()
+ - Get()
+ - Index...()
+ - Last()
+ - Slice()
+
+* expand.go : methods that expand or augment the selection's set.
+ - Add...()
+ - AndSelf()
+ - Union(), which is an alias for AddSelection()
+
+* filter.go : filtering methods, that reduce the selection's set.
+ - End()
+ - Filter...()
+ - Has...()
+ - Intersection(), which is an alias of FilterSelection()
+ - Not...()
+
+* iteration.go : methods to loop over the selection's nodes.
+ - Each()
+ - EachWithBreak()
+ - Map()
+
+* manipulation.go : methods for modifying the document
+ - After...()
+ - Append...()
+ - Before...()
+ - Clone()
+ - Empty()
+ - Prepend...()
+ - Remove...()
+ - ReplaceWith...()
+ - Unwrap()
+ - Wrap...()
+ - WrapAll...()
+ - WrapInner...()
+
+* property.go : methods that inspect and get the node's properties values.
+ - Attr*(), RemoveAttr(), SetAttr()
+ - AddClass(), HasClass(), RemoveClass(), ToggleClass()
+ - Html()
+ - Length()
+ - Size(), which is an alias for Length()
+ - Text()
+
+* query.go : methods that query, or reflect, a node's identity.
+ - Contains()
+ - Is...()
+
+* traversal.go : methods to traverse the HTML document tree.
+ - Children...()
+ - Contents()
+ - Find...()
+ - Next...()
+ - Parent[s]...()
+ - Prev...()
+ - Siblings...()
+
+* type.go : definition of the types exposed by goquery.
+ - Document
+ - Selection
+ - Matcher
+
+* utilities.go : definition of helper functions (and not methods on a *Selection)
+that are not part of jQuery, but are useful to goquery.
+ - NodeName
+ - OuterHtml
+*/
+package goquery
diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go
new file mode 100644
index 000000000..7caade531
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/expand.go
@@ -0,0 +1,70 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Add adds the selector string's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The selector string is run in the context of the document of the current
+// Selection object.
+func (s *Selection) Add(selector string) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, compileMatcher(selector))...)
+}
+
+// AddMatcher adds the matcher's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The matcher is run in the context of the document of the current
+// Selection object.
+func (s *Selection) AddMatcher(m Matcher) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, m)...)
+}
+
+// AddSelection adds the specified Selection object's nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.AddNodes()
+ }
+ return s.AddNodes(sel.Nodes...)
+}
+
+// Union is an alias for AddSelection.
+func (s *Selection) Union(sel *Selection) *Selection {
+ return s.AddSelection(sel)
+}
+
+// AddNodes adds the specified nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, appendWithoutDuplicates(s.Nodes, nodes, nil))
+}
+
+// AndSelf adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+// Deprecated: This function has been deprecated and is now an alias for AddBack().
+func (s *Selection) AndSelf() *Selection {
+ return s.AddBack()
+}
+
+// AddBack adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+func (s *Selection) AddBack() *Selection {
+ return s.AddSelection(s.prevSel)
+}
+
+// AddBackFiltered reduces the previous set of elements on the stack to those that
+// match the selector string, and adds them to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackFiltered(selector string) *Selection {
+ return s.AddSelection(s.prevSel.Filter(selector))
+}
+
+// AddBackMatcher reduces the previous set of elements on the stack to those that match
+// the mateher, and adds them to the curernt set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackMatcher(m Matcher) *Selection {
+ return s.AddSelection(s.prevSel.FilterMatcher(m))
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/filter.go b/vendor/github.com/PuerkitoBio/goquery/filter.go
new file mode 100644
index 000000000..9138ffb33
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/filter.go
@@ -0,0 +1,163 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Filter reduces the set of matched elements to those that match the selector string.
+// It returns a new Selection object for this subset of matching elements.
+func (s *Selection) Filter(selector string) *Selection {
+ return s.FilterMatcher(compileMatcher(selector))
+}
+
+// FilterMatcher reduces the set of matched elements to those that match
+// the given matcher. It returns a new Selection object for this subset
+// of matching elements.
+func (s *Selection) FilterMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, true))
+}
+
+// Not removes elements from the Selection that match the selector string.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) Not(selector string) *Selection {
+ return s.NotMatcher(compileMatcher(selector))
+}
+
+// NotMatcher removes elements from the Selection that match the given matcher.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, false))
+}
+
+// FilterFunction reduces the set of matched elements to those that pass the function's test.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, true))
+}
+
+// NotFunction removes elements from the Selection that pass the function's test.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, false))
+}
+
+// FilterNodes reduces the set of matched elements to those that match the specified nodes.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, true))
+}
+
+// NotNodes removes elements from the Selection that match the specified nodes.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, false))
+}
+
+// FilterSelection reduces the set of matched elements to those that match a
+// node in the specified Selection object.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, true))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, true))
+}
+
+// NotSelection removes elements from the Selection that match a node in the specified
+// Selection object. It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, false))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, false))
+}
+
+// Intersection is an alias for FilterSelection.
+func (s *Selection) Intersection(sel *Selection) *Selection {
+ return s.FilterSelection(sel)
+}
+
+// Has reduces the set of matched elements to those that have a descendant
+// that matches the selector.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) Has(selector string) *Selection {
+ return s.HasSelection(s.document.Find(selector))
+}
+
+// HasMatcher reduces the set of matched elements to those that have a descendant
+// that matches the matcher.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasMatcher(m Matcher) *Selection {
+ return s.HasSelection(s.document.FindMatcher(m))
+}
+
+// HasNodes reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasNodes(nodes ...*html.Node) *Selection {
+ return s.FilterFunction(func(_ int, sel *Selection) bool {
+ // Add all nodes that contain one of the specified nodes
+ for _, n := range nodes {
+ if sel.Contains(n) {
+ return true
+ }
+ }
+ return false
+ })
+}
+
+// HasSelection reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes of the specified Selection object.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.HasNodes()
+ }
+ return s.HasNodes(sel.Nodes...)
+}
+
+// End ends the most recent filtering operation in the current chain and
+// returns the set of matched elements to its previous state.
+func (s *Selection) End() *Selection {
+ if s.prevSel != nil {
+ return s.prevSel
+ }
+ return newEmptySelection(s.document)
+}
+
+// Filter based on the matcher, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnow(sel *Selection, m Matcher, keep bool) []*html.Node {
+ // Optimize if keep is requested
+ if keep {
+ return m.Filter(sel.Nodes)
+ }
+ // Use grep
+ return grep(sel, func(i int, s *Selection) bool {
+ return !m.Match(s.Get(0))
+ })
+}
+
+// Filter based on an array of nodes, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node {
+ if len(nodes)+len(sel.Nodes) < minNodesForSet {
+ return grep(sel, func(i int, s *Selection) bool {
+ return isInSlice(nodes, s.Get(0)) == keep
+ })
+ }
+
+ set := make(map[*html.Node]bool)
+ for _, n := range nodes {
+ set[n] = true
+ }
+ return grep(sel, func(i int, s *Selection) bool {
+ return set[s.Get(0)] == keep
+ })
+}
+
+// Filter based on a function test, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowFunction(sel *Selection, f func(int, *Selection) bool, keep bool) []*html.Node {
+ return grep(sel, func(i int, s *Selection) bool {
+ return f(i, s) == keep
+ })
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod
new file mode 100644
index 000000000..2fa1332a5
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/go.mod
@@ -0,0 +1,6 @@
+module github.com/PuerkitoBio/goquery
+
+require (
+ github.com/andybalholm/cascadia v1.0.0
+ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
+)
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum
new file mode 100644
index 000000000..11c575754
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/go.sum
@@ -0,0 +1,5 @@
+github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
+github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
+golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
diff --git a/vendor/github.com/PuerkitoBio/goquery/iteration.go b/vendor/github.com/PuerkitoBio/goquery/iteration.go
new file mode 100644
index 000000000..e246f2e0e
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/iteration.go
@@ -0,0 +1,39 @@
+package goquery
+
+// Each iterates over a Selection object, executing a function for each
+// matched element. It returns the current Selection object. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Each(f func(int, *Selection)) *Selection {
+ for i, n := range s.Nodes {
+ f(i, newSingleSelection(n, s.document))
+ }
+ return s
+}
+
+// EachWithBreak iterates over a Selection object, executing a function for each
+// matched element. It is identical to Each except that it is possible to break
+// out of the loop by returning false in the callback function. It returns the
+// current Selection object.
+func (s *Selection) EachWithBreak(f func(int, *Selection) bool) *Selection {
+ for i, n := range s.Nodes {
+ if !f(i, newSingleSelection(n, s.document)) {
+ return s
+ }
+ }
+ return s
+}
+
+// Map passes each element in the current matched set through a function,
+// producing a slice of string holding the returned values. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Map(f func(int, *Selection) string) (result []string) {
+ for i, n := range s.Nodes {
+ result = append(result, f(i, newSingleSelection(n, s.document)))
+ }
+
+ return result
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
new file mode 100644
index 000000000..34eb7570f
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
@@ -0,0 +1,574 @@
+package goquery
+
+import (
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// After applies the selector from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) After(selector string) *Selection {
+ return s.AfterMatcher(compileMatcher(selector))
+}
+
+// AfterMatcher applies the matcher from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterMatcher(m Matcher) *Selection {
+ return s.AfterNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AfterSelection inserts the elements in the selection after each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterSelection(sel *Selection) *Selection {
+ return s.AfterNodes(sel.Nodes...)
+}
+
+// AfterHtml parses the html and inserts it after the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterHtml(html string) *Selection {
+ return s.AfterNodes(parseHtml(html)...)
+}
+
+// AfterNodes inserts the nodes after each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn.NextSibling)
+ }
+ })
+}
+
+// Append appends the elements specified by the selector to the end of each element
+// in the set of matched elements, following those rules:
+//
+// 1) The selector is applied to the root document.
+//
+// 2) Elements that are part of the document will be moved to the new location.
+//
+// 3) If there are multiple locations to append to, cloned nodes will be
+// appended to all target locations except the last one, which will be moved
+// as noted in (2).
+func (s *Selection) Append(selector string) *Selection {
+ return s.AppendMatcher(compileMatcher(selector))
+}
+
+// AppendMatcher appends the elements specified by the matcher to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendMatcher(m Matcher) *Selection {
+ return s.AppendNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AppendSelection appends the elements in the selection to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendSelection(sel *Selection) *Selection {
+ return s.AppendNodes(sel.Nodes...)
+}
+
+// AppendHtml parses the html and appends it to the set of matched elements.
+func (s *Selection) AppendHtml(html string) *Selection {
+ return s.AppendNodes(parseHtml(html)...)
+}
+
+// AppendNodes appends the specified nodes to each node in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ sn.AppendChild(n)
+ })
+}
+
+// Before inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) Before(selector string) *Selection {
+ return s.BeforeMatcher(compileMatcher(selector))
+}
+
+// BeforeMatcher inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeMatcher(m Matcher) *Selection {
+ return s.BeforeNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// BeforeSelection inserts the elements in the selection before each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeSelection(sel *Selection) *Selection {
+ return s.BeforeNodes(sel.Nodes...)
+}
+
+// BeforeHtml parses the html and inserts it before the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeHtml(html string) *Selection {
+ return s.BeforeNodes(parseHtml(html)...)
+}
+
+// BeforeNodes inserts the nodes before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn)
+ }
+ })
+}
+
+// Clone creates a deep copy of the set of matched nodes. The new nodes will not be
+// attached to the document.
+func (s *Selection) Clone() *Selection {
+ ns := newEmptySelection(s.document)
+ ns.Nodes = cloneNodes(s.Nodes)
+ return ns
+}
+
+// Empty removes all children nodes from the set of matched elements.
+// It returns the children nodes in a new Selection.
+func (s *Selection) Empty() *Selection {
+ var nodes []*html.Node
+
+ for _, n := range s.Nodes {
+ for c := n.FirstChild; c != nil; c = n.FirstChild {
+ n.RemoveChild(c)
+ nodes = append(nodes, c)
+ }
+ }
+
+ return pushStack(s, nodes)
+}
+
+// Prepend prepends the elements specified by the selector to each element in
+// the set of matched elements, following the same rules as Append.
+func (s *Selection) Prepend(selector string) *Selection {
+ return s.PrependMatcher(compileMatcher(selector))
+}
+
+// PrependMatcher prepends the elements specified by the matcher to each
+// element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependMatcher(m Matcher) *Selection {
+ return s.PrependNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// PrependSelection prepends the elements in the selection to each element in
+// the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependSelection(sel *Selection) *Selection {
+ return s.PrependNodes(sel.Nodes...)
+}
+
+// PrependHtml parses the html and prepends it to the set of matched elements.
+func (s *Selection) PrependHtml(html string) *Selection {
+ return s.PrependNodes(parseHtml(html)...)
+}
+
+// PrependNodes prepends the specified nodes to each node in the set of
+// matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ // sn.FirstChild may be nil, in which case this functions like
+ // sn.AppendChild()
+ sn.InsertBefore(n, sn.FirstChild)
+ })
+}
+
+// Remove removes the set of matched elements from the document.
+// It returns the same selection, now consisting of nodes not in the document.
+func (s *Selection) Remove() *Selection {
+ for _, n := range s.Nodes {
+ if n.Parent != nil {
+ n.Parent.RemoveChild(n)
+ }
+ }
+
+ return s
+}
+
+// RemoveFiltered removes the set of matched elements by selector.
+// It returns the Selection of removed nodes.
+func (s *Selection) RemoveFiltered(selector string) *Selection {
+ return s.RemoveMatcher(compileMatcher(selector))
+}
+
+// RemoveMatcher removes the set of matched elements.
+// It returns the Selection of removed nodes.
+func (s *Selection) RemoveMatcher(m Matcher) *Selection {
+ return s.FilterMatcher(m).Remove()
+}
+
+// ReplaceWith replaces each element in the set of matched elements with the
+// nodes matched by the given selector.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWith(selector string) *Selection {
+ return s.ReplaceWithMatcher(compileMatcher(selector))
+}
+
+// ReplaceWithMatcher replaces each element in the set of matched elements with
+// the nodes matched by the given Matcher.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithMatcher(m Matcher) *Selection {
+ return s.ReplaceWithNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// ReplaceWithSelection replaces each element in the set of matched elements with
+// the nodes from the given Selection.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
+ return s.ReplaceWithNodes(sel.Nodes...)
+}
+
+// ReplaceWithHtml replaces each element in the set of matched elements with
+// the parsed HTML.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithHtml(html string) *Selection {
+ return s.ReplaceWithNodes(parseHtml(html)...)
+}
+
+// ReplaceWithNodes replaces each element in the set of matched elements with
+// the given nodes.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
+ s.AfterNodes(ns...)
+ return s.Remove()
+}
+
+// SetHtml sets the html content of each element in the selection to
+// specified html string.
+func (s *Selection) SetHtml(html string) *Selection {
+ return setHtmlNodes(s, parseHtml(html)...)
+}
+
+// SetText sets the content of each element in the selection to specified content.
+// The provided text string is escaped.
+func (s *Selection) SetText(text string) *Selection {
+ return s.SetHtml(html.EscapeString(text))
+}
+
+// Unwrap removes the parents of the set of matched elements, leaving the matched
+// elements (and their siblings, if any) in their place.
+// It returns the original selection.
+func (s *Selection) Unwrap() *Selection {
+ s.Parent().Each(func(i int, ss *Selection) {
+ // For some reason, jquery allows unwrap to remove the element, so
+ // allowing it here too. Same for . Why it allows those elements to
+ // be unwrapped while not allowing body is a mystery to me.
+ if ss.Nodes[0].Data != "body" {
+ ss.ReplaceWithSelection(ss.Contents())
+ }
+ })
+
+ return s
+}
+
+// Wrap wraps each element in the set of matched elements inside the first
+// element matched by the given selector. The matched child is cloned before
+// being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) Wrap(selector string) *Selection {
+ return s.WrapMatcher(compileMatcher(selector))
+}
+
+// WrapMatcher wraps each element in the set of matched elements inside the
+// first element matched by the given matcher. The matched child is cloned
+// before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapMatcher(m Matcher) *Selection {
+ return s.wrapNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapSelection wraps each element in the set of matched elements inside the
+// first element in the given Selection. The element is cloned before being
+// inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapSelection(sel *Selection) *Selection {
+ return s.wrapNodes(sel.Nodes...)
+}
+
+// WrapHtml wraps each element in the set of matched elements inside the inner-
+// most child of the given HTML.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapHtml(html string) *Selection {
+ return s.wrapNodes(parseHtml(html)...)
+}
+
+// WrapNode wraps each element in the set of matched elements inside the inner-
+// most child of the given node. The given node is copied before being inserted
+// into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapNode(n *html.Node) *Selection {
+ return s.wrapNodes(n)
+}
+
+func (s *Selection) wrapNodes(ns ...*html.Node) *Selection {
+ s.Each(func(i int, ss *Selection) {
+ ss.wrapAllNodes(ns...)
+ })
+
+ return s
+}
+
+// WrapAll wraps a single HTML structure, matched by the given selector, around
+// all elements in the set of matched elements. The matched child is cloned
+// before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAll(selector string) *Selection {
+ return s.WrapAllMatcher(compileMatcher(selector))
+}
+
+// WrapAllMatcher wraps a single HTML structure, matched by the given Matcher,
+// around all elements in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllMatcher(m Matcher) *Selection {
+ return s.wrapAllNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapAllSelection wraps a single HTML structure, the first node of the given
+// Selection, around all elements in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
+ return s.wrapAllNodes(sel.Nodes...)
+}
+
+// WrapAllHtml wraps the given HTML structure around all elements in the set of
+// matched elements. The matched child is cloned before being inserted into the
+// document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllHtml(html string) *Selection {
+ return s.wrapAllNodes(parseHtml(html)...)
+}
+
+func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
+ if len(ns) > 0 {
+ return s.WrapAllNode(ns[0])
+ }
+ return s
+}
+
+// WrapAllNode wraps the given node around the first element in the Selection,
+// making all other nodes in the Selection children of the given node. The node
+// is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllNode(n *html.Node) *Selection {
+ if s.Size() == 0 {
+ return s
+ }
+
+ wrap := cloneNode(n)
+
+ first := s.Nodes[0]
+ if first.Parent != nil {
+ first.Parent.InsertBefore(wrap, first)
+ first.Parent.RemoveChild(first)
+ }
+
+ for c := getFirstChildEl(wrap); c != nil; c = getFirstChildEl(wrap) {
+ wrap = c
+ }
+
+ newSingleSelection(wrap, s.document).AppendSelection(s)
+
+ return s
+}
+
+// WrapInner wraps an HTML structure, matched by the given selector, around the
+// content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInner(selector string) *Selection {
+ return s.WrapInnerMatcher(compileMatcher(selector))
+}
+
+// WrapInnerMatcher wraps an HTML structure, matched by the given selector,
+// around the content of element in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerMatcher(m Matcher) *Selection {
+ return s.wrapInnerNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapInnerSelection wraps an HTML structure, matched by the given selector,
+// around the content of element in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
+ return s.wrapInnerNodes(sel.Nodes...)
+}
+
+// WrapInnerHtml wraps an HTML structure, matched by the given selector, around
+// the content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerHtml(html string) *Selection {
+ return s.wrapInnerNodes(parseHtml(html)...)
+}
+
+// WrapInnerNode wraps an HTML structure, matched by the given selector, around
+// the content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerNode(n *html.Node) *Selection {
+ return s.wrapInnerNodes(n)
+}
+
+func (s *Selection) wrapInnerNodes(ns ...*html.Node) *Selection {
+ if len(ns) == 0 {
+ return s
+ }
+
+ s.Each(func(i int, s *Selection) {
+ contents := s.Contents()
+
+ if contents.Size() > 0 {
+ contents.wrapAllNodes(ns...)
+ } else {
+ s.AppendNodes(cloneNode(ns[0]))
+ }
+ })
+
+ return s
+}
+
+func parseHtml(h string) []*html.Node {
+ // Errors are only returned when the io.Reader returns any error besides
+ // EOF, but strings.Reader never will
+ nodes, err := html.ParseFragment(strings.NewReader(h), &html.Node{Type: html.ElementNode})
+ if err != nil {
+ panic("goquery: failed to parse HTML: " + err.Error())
+ }
+ return nodes
+}
+
+func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
+ for _, n := range s.Nodes {
+ for c := n.FirstChild; c != nil; c = n.FirstChild {
+ n.RemoveChild(c)
+ }
+ for _, c := range ns {
+ n.AppendChild(cloneNode(c))
+ }
+ }
+ return s
+}
+
+// Get the first child that is an ElementNode
+func getFirstChildEl(n *html.Node) *html.Node {
+ c := n.FirstChild
+ for c != nil && c.Type != html.ElementNode {
+ c = c.NextSibling
+ }
+ return c
+}
+
+// Deep copy a slice of nodes.
+func cloneNodes(ns []*html.Node) []*html.Node {
+ cns := make([]*html.Node, 0, len(ns))
+
+ for _, n := range ns {
+ cns = append(cns, cloneNode(n))
+ }
+
+ return cns
+}
+
+// Deep copy a node. The new node has clones of all the original node's
+// children but none of its parents or siblings.
+func cloneNode(n *html.Node) *html.Node {
+ nn := &html.Node{
+ Type: n.Type,
+ DataAtom: n.DataAtom,
+ Data: n.Data,
+ Attr: make([]html.Attribute, len(n.Attr)),
+ }
+
+ copy(nn.Attr, n.Attr)
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ nn.AppendChild(cloneNode(c))
+ }
+
+ return nn
+}
+
+func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
+ f func(sn *html.Node, n *html.Node)) *Selection {
+
+ lasti := s.Size() - 1
+
+ // net.Html doesn't provide document fragments for insertion, so to get
+ // things in the correct order with After() and Prepend(), the callback
+ // needs to be called on the reverse of the nodes.
+ if reverse {
+ for i, j := 0, len(ns)-1; i < j; i, j = i+1, j-1 {
+ ns[i], ns[j] = ns[j], ns[i]
+ }
+ }
+
+ for i, sn := range s.Nodes {
+ for _, n := range ns {
+ if i != lasti {
+ f(sn, cloneNode(n))
+ } else {
+ if n.Parent != nil {
+ n.Parent.RemoveChild(n)
+ }
+ f(sn, n)
+ }
+ }
+ }
+
+ return s
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/property.go b/vendor/github.com/PuerkitoBio/goquery/property.go
new file mode 100644
index 000000000..411126db2
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/property.go
@@ -0,0 +1,275 @@
+package goquery
+
+import (
+ "bytes"
+ "regexp"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+var rxClassTrim = regexp.MustCompile("[\t\r\n]")
+
+// Attr gets the specified attribute's value for the first element in the
+// Selection. To get the value for each element individually, use a looping
+// construct such as Each or Map method.
+func (s *Selection) Attr(attrName string) (val string, exists bool) {
+ if len(s.Nodes) == 0 {
+ return
+ }
+ return getAttributeValue(attrName, s.Nodes[0])
+}
+
+// AttrOr works like Attr but returns default value if attribute is not present.
+func (s *Selection) AttrOr(attrName, defaultValue string) string {
+ if len(s.Nodes) == 0 {
+ return defaultValue
+ }
+
+ val, exists := getAttributeValue(attrName, s.Nodes[0])
+ if !exists {
+ return defaultValue
+ }
+
+ return val
+}
+
+// RemoveAttr removes the named attribute from each element in the set of matched elements.
+func (s *Selection) RemoveAttr(attrName string) *Selection {
+ for _, n := range s.Nodes {
+ removeAttr(n, attrName)
+ }
+
+ return s
+}
+
+// SetAttr sets the given attribute on each element in the set of matched elements.
+func (s *Selection) SetAttr(attrName, val string) *Selection {
+ for _, n := range s.Nodes {
+ attr := getAttributePtr(attrName, n)
+ if attr == nil {
+ n.Attr = append(n.Attr, html.Attribute{Key: attrName, Val: val})
+ } else {
+ attr.Val = val
+ }
+ }
+
+ return s
+}
+
+// Text gets the combined text contents of each element in the set of matched
+// elements, including their descendants.
+func (s *Selection) Text() string {
+ var buf bytes.Buffer
+
+ // Slightly optimized vs calling Each: no single selection object created
+ var f func(*html.Node)
+ f = func(n *html.Node) {
+ if n.Type == html.TextNode {
+ // Keep newlines and spaces, like jQuery
+ buf.WriteString(n.Data)
+ }
+ if n.FirstChild != nil {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ f(c)
+ }
+ }
+ }
+ for _, n := range s.Nodes {
+ f(n)
+ }
+
+ return buf.String()
+}
+
+// Size is an alias for Length.
+func (s *Selection) Size() int {
+ return s.Length()
+}
+
+// Length returns the number of elements in the Selection object.
+func (s *Selection) Length() int {
+ return len(s.Nodes)
+}
+
+// Html gets the HTML contents of the first element in the set of matched
+// elements. It includes text and comment nodes.
+func (s *Selection) Html() (ret string, e error) {
+ // Since there is no .innerHtml, the HTML content must be re-created from
+ // the nodes using html.Render.
+ var buf bytes.Buffer
+
+ if len(s.Nodes) > 0 {
+ for c := s.Nodes[0].FirstChild; c != nil; c = c.NextSibling {
+ e = html.Render(&buf, c)
+ if e != nil {
+ return
+ }
+ }
+ ret = buf.String()
+ }
+
+ return
+}
+
+// AddClass adds the given class(es) to each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+func (s *Selection) AddClass(class ...string) *Selection {
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+
+ if classStr == "" {
+ return s
+ }
+
+ tcls := getClassesSlice(classStr)
+ for _, n := range s.Nodes {
+ curClasses, attr := getClassesAndAttr(n, true)
+ for _, newClass := range tcls {
+ if !strings.Contains(curClasses, " "+newClass+" ") {
+ curClasses += newClass + " "
+ }
+ }
+
+ setClasses(n, attr, curClasses)
+ }
+
+ return s
+}
+
+// HasClass determines whether any of the matched elements are assigned the
+// given class.
+func (s *Selection) HasClass(class string) bool {
+ class = " " + class + " "
+ for _, n := range s.Nodes {
+ classes, _ := getClassesAndAttr(n, false)
+ if strings.Contains(classes, class) {
+ return true
+ }
+ }
+ return false
+}
+
+// RemoveClass removes the given class(es) from each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+// If no class name is provided, all classes are removed.
+func (s *Selection) RemoveClass(class ...string) *Selection {
+ var rclasses []string
+
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+ remove := classStr == ""
+
+ if !remove {
+ rclasses = getClassesSlice(classStr)
+ }
+
+ for _, n := range s.Nodes {
+ if remove {
+ removeAttr(n, "class")
+ } else {
+ classes, attr := getClassesAndAttr(n, true)
+ for _, rcl := range rclasses {
+ classes = strings.Replace(classes, " "+rcl+" ", " ", -1)
+ }
+
+ setClasses(n, attr, classes)
+ }
+ }
+
+ return s
+}
+
+// ToggleClass adds or removes the given class(es) for each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+func (s *Selection) ToggleClass(class ...string) *Selection {
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+
+ if classStr == "" {
+ return s
+ }
+
+ tcls := getClassesSlice(classStr)
+
+ for _, n := range s.Nodes {
+ classes, attr := getClassesAndAttr(n, true)
+ for _, tcl := range tcls {
+ if strings.Contains(classes, " "+tcl+" ") {
+ classes = strings.Replace(classes, " "+tcl+" ", " ", -1)
+ } else {
+ classes += tcl + " "
+ }
+ }
+
+ setClasses(n, attr, classes)
+ }
+
+ return s
+}
+
+func getAttributePtr(attrName string, n *html.Node) *html.Attribute {
+ if n == nil {
+ return nil
+ }
+
+ for i, a := range n.Attr {
+ if a.Key == attrName {
+ return &n.Attr[i]
+ }
+ }
+ return nil
+}
+
+// Private function to get the specified attribute's value from a node.
+func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) {
+ if a := getAttributePtr(attrName, n); a != nil {
+ val = a.Val
+ exists = true
+ }
+ return
+}
+
+// Get and normalize the "class" attribute from the node.
+func getClassesAndAttr(n *html.Node, create bool) (classes string, attr *html.Attribute) {
+ // Applies only to element nodes
+ if n.Type == html.ElementNode {
+ attr = getAttributePtr("class", n)
+ if attr == nil && create {
+ n.Attr = append(n.Attr, html.Attribute{
+ Key: "class",
+ Val: "",
+ })
+ attr = &n.Attr[len(n.Attr)-1]
+ }
+ }
+
+ if attr == nil {
+ classes = " "
+ } else {
+ classes = rxClassTrim.ReplaceAllString(" "+attr.Val+" ", " ")
+ }
+
+ return
+}
+
+func getClassesSlice(classes string) []string {
+ return strings.Split(rxClassTrim.ReplaceAllString(" "+classes+" ", " "), " ")
+}
+
+func removeAttr(n *html.Node, attrName string) {
+ for i, a := range n.Attr {
+ if a.Key == attrName {
+ n.Attr[i], n.Attr[len(n.Attr)-1], n.Attr =
+ n.Attr[len(n.Attr)-1], html.Attribute{}, n.Attr[:len(n.Attr)-1]
+ return
+ }
+ }
+}
+
+func setClasses(n *html.Node, attr *html.Attribute, classes string) {
+ classes = strings.TrimSpace(classes)
+ if classes == "" {
+ removeAttr(n, "class")
+ return
+ }
+
+ attr.Val = classes
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/query.go b/vendor/github.com/PuerkitoBio/goquery/query.go
new file mode 100644
index 000000000..fe86bf0bf
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/query.go
@@ -0,0 +1,49 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Is checks the current matched set of elements against a selector and
+// returns true if at least one of these elements matches.
+func (s *Selection) Is(selector string) bool {
+ return s.IsMatcher(compileMatcher(selector))
+}
+
+// IsMatcher checks the current matched set of elements against a matcher and
+// returns true if at least one of these elements matches.
+func (s *Selection) IsMatcher(m Matcher) bool {
+ if len(s.Nodes) > 0 {
+ if len(s.Nodes) == 1 {
+ return m.Match(s.Nodes[0])
+ }
+ return len(m.Filter(s.Nodes)) > 0
+ }
+
+ return false
+}
+
+// IsFunction checks the current matched set of elements against a predicate and
+// returns true if at least one of these elements matches.
+func (s *Selection) IsFunction(f func(int, *Selection) bool) bool {
+ return s.FilterFunction(f).Length() > 0
+}
+
+// IsSelection checks the current matched set of elements against a Selection object
+// and returns true if at least one of these elements matches.
+func (s *Selection) IsSelection(sel *Selection) bool {
+ return s.FilterSelection(sel).Length() > 0
+}
+
+// IsNodes checks the current matched set of elements against the specified nodes
+// and returns true if at least one of these elements matches.
+func (s *Selection) IsNodes(nodes ...*html.Node) bool {
+ return s.FilterNodes(nodes...).Length() > 0
+}
+
+// Contains returns true if the specified Node is within,
+// at any depth, one of the nodes in the Selection object.
+// It is NOT inclusive, to behave like jQuery's implementation, and
+// unlike Javascript's .contains, so if the contained
+// node is itself in the selection, it returns false.
+func (s *Selection) Contains(n *html.Node) bool {
+ return sliceContains(s.Nodes, n)
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/traversal.go b/vendor/github.com/PuerkitoBio/goquery/traversal.go
new file mode 100644
index 000000000..5fa5315ac
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/traversal.go
@@ -0,0 +1,698 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+type siblingType int
+
+// Sibling type, used internally when iterating over children at the same
+// level (siblings) to specify which nodes are requested.
+const (
+ siblingPrevUntil siblingType = iota - 3
+ siblingPrevAll
+ siblingPrev
+ siblingAll
+ siblingNext
+ siblingNextAll
+ siblingNextUntil
+ siblingAllIncludingNonElements
+)
+
+// Find gets the descendants of each element in the current set of matched
+// elements, filtered by a selector. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) Find(selector string) *Selection {
+ return pushStack(s, findWithMatcher(s.Nodes, compileMatcher(selector)))
+}
+
+// FindMatcher gets the descendants of each element in the current set of matched
+// elements, filtered by the matcher. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindMatcher(m Matcher) *Selection {
+ return pushStack(s, findWithMatcher(s.Nodes, m))
+}
+
+// FindSelection gets the descendants of each element in the current
+// Selection, filtered by a Selection. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, nil)
+ }
+ return s.FindNodes(sel.Nodes...)
+}
+
+// FindNodes gets the descendants of each element in the current
+// Selection, filtered by some nodes. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ if sliceContains(s.Nodes, n) {
+ return []*html.Node{n}
+ }
+ return nil
+ }))
+}
+
+// Contents gets the children of each element in the Selection,
+// including text and comment nodes. It returns a new Selection object
+// containing these elements.
+func (s *Selection) Contents() *Selection {
+ return pushStack(s, getChildrenNodes(s.Nodes, siblingAllIncludingNonElements))
+}
+
+// ContentsFiltered gets the children of each element in the Selection,
+// filtered by the specified selector. It returns a new Selection
+// object containing these elements. Since selectors only act on Element nodes,
+// this function is an alias to ChildrenFiltered unless the selector is empty,
+// in which case it is an alias to Contents.
+func (s *Selection) ContentsFiltered(selector string) *Selection {
+ if selector != "" {
+ return s.ChildrenFiltered(selector)
+ }
+ return s.Contents()
+}
+
+// ContentsMatcher gets the children of each element in the Selection,
+// filtered by the specified matcher. It returns a new Selection
+// object containing these elements. Since matchers only act on Element nodes,
+// this function is an alias to ChildrenMatcher.
+func (s *Selection) ContentsMatcher(m Matcher) *Selection {
+ return s.ChildrenMatcher(m)
+}
+
+// Children gets the child elements of each element in the Selection.
+// It returns a new Selection object containing these elements.
+func (s *Selection) Children() *Selection {
+ return pushStack(s, getChildrenNodes(s.Nodes, siblingAll))
+}
+
+// ChildrenFiltered gets the child elements of each element in the Selection,
+// filtered by the specified selector. It returns a new
+// Selection object containing these elements.
+func (s *Selection) ChildrenFiltered(selector string) *Selection {
+ return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), compileMatcher(selector))
+}
+
+// ChildrenMatcher gets the child elements of each element in the Selection,
+// filtered by the specified matcher. It returns a new
+// Selection object containing these elements.
+func (s *Selection) ChildrenMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), m)
+}
+
+// Parent gets the parent of each element in the Selection. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) Parent() *Selection {
+ return pushStack(s, getParentNodes(s.Nodes))
+}
+
+// ParentFiltered gets the parent of each element in the Selection filtered by a
+// selector. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentFiltered(selector string) *Selection {
+ return filterAndPush(s, getParentNodes(s.Nodes), compileMatcher(selector))
+}
+
+// ParentMatcher gets the parent of each element in the Selection filtered by a
+// matcher. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getParentNodes(s.Nodes), m)
+}
+
+// Closest gets the first element that matches the selector by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) Closest(selector string) *Selection {
+ cs := compileMatcher(selector)
+ return s.ClosestMatcher(cs)
+}
+
+// ClosestMatcher gets the first element that matches the matcher by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) ClosestMatcher(m Matcher) *Selection {
+ return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
+ // For each node in the selection, test the node itself, then each parent
+ // until a match is found.
+ for ; n != nil; n = n.Parent {
+ if m.Match(n) {
+ return []*html.Node{n}
+ }
+ }
+ return nil
+ }))
+}
+
+// ClosestNodes gets the first element that matches one of the nodes by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) ClosestNodes(nodes ...*html.Node) *Selection {
+ set := make(map[*html.Node]bool)
+ for _, n := range nodes {
+ set[n] = true
+ }
+ return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
+ // For each node in the selection, test the node itself, then each parent
+ // until a match is found.
+ for ; n != nil; n = n.Parent {
+ if set[n] {
+ return []*html.Node{n}
+ }
+ }
+ return nil
+ }))
+}
+
+// ClosestSelection gets the first element that matches one of the nodes in the
+// Selection by testing the element itself and traversing up through its ancestors
+// in the DOM tree.
+func (s *Selection) ClosestSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, nil)
+ }
+ return s.ClosestNodes(sel.Nodes...)
+}
+
+// Parents gets the ancestors of each element in the current Selection. It
+// returns a new Selection object with the matched elements.
+func (s *Selection) Parents() *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, nil, nil))
+}
+
+// ParentsFiltered gets the ancestors of each element in the current
+// Selection. It returns a new Selection object with the matched elements.
+func (s *Selection) ParentsFiltered(selector string) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), compileMatcher(selector))
+}
+
+// ParentsMatcher gets the ancestors of each element in the current
+// Selection. It returns a new Selection object with the matched elements.
+func (s *Selection) ParentsMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), m)
+}
+
+// ParentsUntil gets the ancestors of each element in the Selection, up to but
+// not including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsUntil(selector string) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, compileMatcher(selector), nil))
+}
+
+// ParentsUntilMatcher gets the ancestors of each element in the Selection, up to but
+// not including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, m, nil))
+}
+
+// ParentsUntilSelection gets the ancestors of each element in the Selection,
+// up to but not including the elements in the specified Selection. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) ParentsUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.Parents()
+ }
+ return s.ParentsUntilNodes(sel.Nodes...)
+}
+
+// ParentsUntilNodes gets the ancestors of each element in the Selection,
+// up to but not including the specified nodes. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) ParentsUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, nil, nodes))
+}
+
+// ParentsFilteredUntil is like ParentsUntil, with the option to filter the
+// results based on a selector string. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// ParentsFilteredUntilMatcher is like ParentsUntilMatcher, with the option to filter the
+// results based on a matcher. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, until, nil), filter)
+}
+
+// ParentsFilteredUntilSelection is like ParentsUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.ParentsMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// ParentsMatcherUntilSelection is like ParentsUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.ParentsMatcher(filter)
+ }
+ return s.ParentsMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// ParentsFilteredUntilNodes is like ParentsUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), compileMatcher(filterSelector))
+}
+
+// ParentsMatcherUntilNodes is like ParentsUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), filter)
+}
+
+// Siblings gets the siblings of each element in the Selection. It returns
+// a new Selection object containing the matched elements.
+func (s *Selection) Siblings() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil))
+}
+
+// SiblingsFiltered gets the siblings of each element in the Selection
+// filtered by a selector. It returns a new Selection object containing the
+// matched elements.
+func (s *Selection) SiblingsFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), compileMatcher(selector))
+}
+
+// SiblingsMatcher gets the siblings of each element in the Selection
+// filtered by a matcher. It returns a new Selection object containing the
+// matched elements.
+func (s *Selection) SiblingsMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), m)
+}
+
+// Next gets the immediately following sibling of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) Next() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil))
+}
+
+// NextFiltered gets the immediately following sibling of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), compileMatcher(selector))
+}
+
+// NextMatcher gets the immediately following sibling of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), m)
+}
+
+// NextAll gets all the following siblings of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) NextAll() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil))
+}
+
+// NextAllFiltered gets all the following siblings of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextAllFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), compileMatcher(selector))
+}
+
+// NextAllMatcher gets all the following siblings of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextAllMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), m)
+}
+
+// Prev gets the immediately preceding sibling of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) Prev() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil))
+}
+
+// PrevFiltered gets the immediately preceding sibling of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), compileMatcher(selector))
+}
+
+// PrevMatcher gets the immediately preceding sibling of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), m)
+}
+
+// PrevAll gets all the preceding siblings of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevAll() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil))
+}
+
+// PrevAllFiltered gets all the preceding siblings of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevAllFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), compileMatcher(selector))
+}
+
+// PrevAllMatcher gets all the preceding siblings of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevAllMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), m)
+}
+
+// NextUntil gets all following siblings of each element up to but not
+// including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntil(selector string) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ compileMatcher(selector), nil))
+}
+
+// NextUntilMatcher gets all following siblings of each element up to but not
+// including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ m, nil))
+}
+
+// NextUntilSelection gets all following siblings of each element up to but not
+// including the element matched by the Selection. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.NextAll()
+ }
+ return s.NextUntilNodes(sel.Nodes...)
+}
+
+// NextUntilNodes gets all following siblings of each element up to but not
+// including the element matched by the nodes. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes))
+}
+
+// PrevUntil gets all preceding siblings of each element up to but not
+// including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntil(selector string) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ compileMatcher(selector), nil))
+}
+
+// PrevUntilMatcher gets all preceding siblings of each element up to but not
+// including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ m, nil))
+}
+
+// PrevUntilSelection gets all preceding siblings of each element up to but not
+// including the element matched by the Selection. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.PrevAll()
+ }
+ return s.PrevUntilNodes(sel.Nodes...)
+}
+
+// PrevUntilNodes gets all preceding siblings of each element up to but not
+// including the element matched by the nodes. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes))
+}
+
+// NextFilteredUntil is like NextUntil, with the option to filter
+// the results based on a selector string.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// NextFilteredUntilMatcher is like NextUntilMatcher, with the option to filter
+// the results based on a matcher.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ until, nil), filter)
+}
+
+// NextFilteredUntilSelection is like NextUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.NextMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// NextMatcherUntilSelection is like NextUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.NextMatcher(filter)
+ }
+ return s.NextMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// NextFilteredUntilNodes is like NextUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes), compileMatcher(filterSelector))
+}
+
+// NextMatcherUntilNodes is like NextUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes), filter)
+}
+
+// PrevFilteredUntil is like PrevUntil, with the option to filter
+// the results based on a selector string.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// PrevFilteredUntilMatcher is like PrevUntilMatcher, with the option to filter
+// the results based on a matcher.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ until, nil), filter)
+}
+
+// PrevFilteredUntilSelection is like PrevUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.PrevMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// PrevMatcherUntilSelection is like PrevUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.PrevMatcher(filter)
+ }
+ return s.PrevMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// PrevFilteredUntilNodes is like PrevUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes), compileMatcher(filterSelector))
+}
+
+// PrevMatcherUntilNodes is like PrevUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes), filter)
+}
+
+// Filter and push filters the nodes based on a matcher, and pushes the results
+// on the stack, with the srcSel as previous selection.
+func filterAndPush(srcSel *Selection, nodes []*html.Node, m Matcher) *Selection {
+ // Create a temporary Selection with the specified nodes to filter using winnow
+ sel := &Selection{nodes, srcSel.document, nil}
+ // Filter based on matcher and push on stack
+ return pushStack(srcSel, winnow(sel, m, true))
+}
+
+// Internal implementation of Find that return raw nodes.
+func findWithMatcher(nodes []*html.Node, m Matcher) []*html.Node {
+ // Map nodes to find the matches within the children of each node
+ return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
+ // Go down one level, becausejQuery's Find selects only within descendants
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode {
+ result = append(result, m.MatchAll(c)...)
+ }
+ }
+ return
+ })
+}
+
+// Internal implementation to get all parent nodes, stopping at the specified
+// node (or nil if no stop).
+func getParentsNodes(nodes []*html.Node, stopm Matcher, stopNodes []*html.Node) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
+ for p := n.Parent; p != nil; p = p.Parent {
+ sel := newSingleSelection(p, nil)
+ if stopm != nil {
+ if sel.IsMatcher(stopm) {
+ break
+ }
+ } else if len(stopNodes) > 0 {
+ if sel.IsNodes(stopNodes...) {
+ break
+ }
+ }
+ if p.Type == html.ElementNode {
+ result = append(result, p)
+ }
+ }
+ return
+ })
+}
+
+// Internal implementation of sibling nodes that return a raw slice of matches.
+func getSiblingNodes(nodes []*html.Node, st siblingType, untilm Matcher, untilNodes []*html.Node) []*html.Node {
+ var f func(*html.Node) bool
+
+ // If the requested siblings are ...Until, create the test function to
+ // determine if the until condition is reached (returns true if it is)
+ if st == siblingNextUntil || st == siblingPrevUntil {
+ f = func(n *html.Node) bool {
+ if untilm != nil {
+ // Matcher-based condition
+ sel := newSingleSelection(n, nil)
+ return sel.IsMatcher(untilm)
+ } else if len(untilNodes) > 0 {
+ // Nodes-based condition
+ sel := newSingleSelection(n, nil)
+ return sel.IsNodes(untilNodes...)
+ }
+ return false
+ }
+ }
+
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ return getChildrenWithSiblingType(n.Parent, st, n, f)
+ })
+}
+
+// Gets the children nodes of each node in the specified slice of nodes,
+// based on the sibling type request.
+func getChildrenNodes(nodes []*html.Node, st siblingType) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ return getChildrenWithSiblingType(n, st, nil, nil)
+ })
+}
+
+// Gets the children of the specified parent, based on the requested sibling
+// type, skipping a specified node if required.
+func getChildrenWithSiblingType(parent *html.Node, st siblingType, skipNode *html.Node,
+ untilFunc func(*html.Node) bool) (result []*html.Node) {
+
+ // Create the iterator function
+ var iter = func(cur *html.Node) (ret *html.Node) {
+ // Based on the sibling type requested, iterate the right way
+ for {
+ switch st {
+ case siblingAll, siblingAllIncludingNonElements:
+ if cur == nil {
+ // First iteration, start with first child of parent
+ // Skip node if required
+ if ret = parent.FirstChild; ret == skipNode && skipNode != nil {
+ ret = skipNode.NextSibling
+ }
+ } else {
+ // Skip node if required
+ if ret = cur.NextSibling; ret == skipNode && skipNode != nil {
+ ret = skipNode.NextSibling
+ }
+ }
+ case siblingPrev, siblingPrevAll, siblingPrevUntil:
+ if cur == nil {
+ // Start with previous sibling of the skip node
+ ret = skipNode.PrevSibling
+ } else {
+ ret = cur.PrevSibling
+ }
+ case siblingNext, siblingNextAll, siblingNextUntil:
+ if cur == nil {
+ // Start with next sibling of the skip node
+ ret = skipNode.NextSibling
+ } else {
+ ret = cur.NextSibling
+ }
+ default:
+ panic("Invalid sibling type.")
+ }
+ if ret == nil || ret.Type == html.ElementNode || st == siblingAllIncludingNonElements {
+ return
+ }
+ // Not a valid node, try again from this one
+ cur = ret
+ }
+ }
+
+ for c := iter(nil); c != nil; c = iter(c) {
+ // If this is an ...Until case, test before append (returns true
+ // if the until condition is reached)
+ if st == siblingNextUntil || st == siblingPrevUntil {
+ if untilFunc(c) {
+ return
+ }
+ }
+ result = append(result, c)
+ if st == siblingNext || st == siblingPrev {
+ // Only one node was requested (immediate next or previous), so exit
+ return
+ }
+ }
+ return
+}
+
+// Internal implementation of parent nodes that return a raw slice of Nodes.
+func getParentNodes(nodes []*html.Node) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ if n.Parent != nil && n.Parent.Type == html.ElementNode {
+ return []*html.Node{n.Parent}
+ }
+ return nil
+ })
+}
+
+// Internal map function used by many traversing methods. Takes the source nodes
+// to iterate on and the mapping function that returns an array of nodes.
+// Returns an array of nodes mapped by calling the callback function once for
+// each node in the source nodes.
+func mapNodes(nodes []*html.Node, f func(int, *html.Node) []*html.Node) (result []*html.Node) {
+ set := make(map[*html.Node]bool)
+ for i, n := range nodes {
+ if vals := f(i, n); len(vals) > 0 {
+ result = appendWithoutDuplicates(result, vals, set)
+ }
+ }
+ return result
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go
new file mode 100644
index 000000000..6ad51dbc5
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/type.go
@@ -0,0 +1,141 @@
+package goquery
+
+import (
+ "errors"
+ "io"
+ "net/http"
+ "net/url"
+
+ "github.com/andybalholm/cascadia"
+
+ "golang.org/x/net/html"
+)
+
+// Document represents an HTML document to be manipulated. Unlike jQuery, which
+// is loaded as part of a DOM document, and thus acts upon its containing
+// document, GoQuery doesn't know which HTML document to act upon. So it needs
+// to be told, and that's what the Document class is for. It holds the root
+// document node to manipulate, and can make selections on this document.
+type Document struct {
+ *Selection
+ Url *url.URL
+ rootNode *html.Node
+}
+
+// NewDocumentFromNode is a Document constructor that takes a root html Node
+// as argument.
+func NewDocumentFromNode(root *html.Node) *Document {
+ return newDocument(root, nil)
+}
+
+// NewDocument is a Document constructor that takes a string URL as argument.
+// It loads the specified document, parses it, and stores the root Document
+// node, ready to be manipulated.
+//
+// Deprecated: Use the net/http standard library package to make the request
+// and validate the response before calling goquery.NewDocumentFromReader
+// with the response's body.
+func NewDocument(url string) (*Document, error) {
+ // Load the URL
+ res, e := http.Get(url)
+ if e != nil {
+ return nil, e
+ }
+ return NewDocumentFromResponse(res)
+}
+
+// NewDocumentFromReader returns a Document from an io.Reader.
+// It returns an error as second value if the reader's data cannot be parsed
+// as html. It does not check if the reader is also an io.Closer, the
+// provided reader is never closed by this call. It is the responsibility
+// of the caller to close it if required.
+func NewDocumentFromReader(r io.Reader) (*Document, error) {
+ root, e := html.Parse(r)
+ if e != nil {
+ return nil, e
+ }
+ return newDocument(root, nil), nil
+}
+
+// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
+// It loads the specified response's document, parses it, and stores the root Document
+// node, ready to be manipulated. The response's body is closed on return.
+//
+// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
+func NewDocumentFromResponse(res *http.Response) (*Document, error) {
+ if res == nil {
+ return nil, errors.New("Response is nil")
+ }
+ defer res.Body.Close()
+ if res.Request == nil {
+ return nil, errors.New("Response.Request is nil")
+ }
+
+ // Parse the HTML into nodes
+ root, e := html.Parse(res.Body)
+ if e != nil {
+ return nil, e
+ }
+
+ // Create and fill the document
+ return newDocument(root, res.Request.URL), nil
+}
+
+// CloneDocument creates a deep-clone of a document.
+func CloneDocument(doc *Document) *Document {
+ return newDocument(cloneNode(doc.rootNode), doc.Url)
+}
+
+// Private constructor, make sure all fields are correctly filled.
+func newDocument(root *html.Node, url *url.URL) *Document {
+ // Create and fill the document
+ d := &Document{nil, url, root}
+ d.Selection = newSingleSelection(root, d)
+ return d
+}
+
+// Selection represents a collection of nodes matching some criteria. The
+// initial Selection can be created by using Document.Find, and then
+// manipulated using the jQuery-like chainable syntax and methods.
+type Selection struct {
+ Nodes []*html.Node
+ document *Document
+ prevSel *Selection
+}
+
+// Helper constructor to create an empty selection
+func newEmptySelection(doc *Document) *Selection {
+ return &Selection{nil, doc, nil}
+}
+
+// Helper constructor to create a selection of only one node
+func newSingleSelection(node *html.Node, doc *Document) *Selection {
+ return &Selection{[]*html.Node{node}, doc, nil}
+}
+
+// Matcher is an interface that defines the methods to match
+// HTML nodes against a compiled selector string. Cascadia's
+// Selector implements this interface.
+type Matcher interface {
+ Match(*html.Node) bool
+ MatchAll(*html.Node) []*html.Node
+ Filter([]*html.Node) []*html.Node
+}
+
+// compileMatcher compiles the selector string s and returns
+// the corresponding Matcher. If s is an invalid selector string,
+// it returns a Matcher that fails all matches.
+func compileMatcher(s string) Matcher {
+ cs, err := cascadia.Compile(s)
+ if err != nil {
+ return invalidMatcher{}
+ }
+ return cs
+}
+
+// invalidMatcher is a Matcher that always fails to match.
+type invalidMatcher struct{}
+
+func (invalidMatcher) Match(n *html.Node) bool { return false }
+func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil }
+func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go
new file mode 100644
index 000000000..b4c061a4d
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go
@@ -0,0 +1,161 @@
+package goquery
+
+import (
+ "bytes"
+
+ "golang.org/x/net/html"
+)
+
+// used to determine if a set (map[*html.Node]bool) should be used
+// instead of iterating over a slice. The set uses more memory and
+// is slower than slice iteration for small N.
+const minNodesForSet = 1000
+
+var nodeNames = []string{
+ html.ErrorNode: "#error",
+ html.TextNode: "#text",
+ html.DocumentNode: "#document",
+ html.CommentNode: "#comment",
+}
+
+// NodeName returns the node name of the first element in the selection.
+// It tries to behave in a similar way as the DOM's nodeName property
+// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
+//
+// Go's net/html package defines the following node types, listed with
+// the corresponding returned value from this function:
+//
+// ErrorNode : #error
+// TextNode : #text
+// DocumentNode : #document
+// ElementNode : the element's tag name
+// CommentNode : #comment
+// DoctypeNode : the name of the document type
+//
+func NodeName(s *Selection) string {
+ if s.Length() == 0 {
+ return ""
+ }
+ switch n := s.Get(0); n.Type {
+ case html.ElementNode, html.DoctypeNode:
+ return n.Data
+ default:
+ if n.Type >= 0 && int(n.Type) < len(nodeNames) {
+ return nodeNames[n.Type]
+ }
+ return ""
+ }
+}
+
+// OuterHtml returns the outer HTML rendering of the first item in
+// the selection - that is, the HTML including the first element's
+// tag and attributes.
+//
+// Unlike InnerHtml, this is a function and not a method on the Selection,
+// because this is not a jQuery method (in javascript-land, this is
+// a property provided by the DOM).
+func OuterHtml(s *Selection) (string, error) {
+ var buf bytes.Buffer
+
+ if s.Length() == 0 {
+ return "", nil
+ }
+ n := s.Get(0)
+ if err := html.Render(&buf, n); err != nil {
+ return "", err
+ }
+ return buf.String(), nil
+}
+
+// Loop through all container nodes to search for the target node.
+func sliceContains(container []*html.Node, contained *html.Node) bool {
+ for _, n := range container {
+ if nodeContains(n, contained) {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Checks if the contained node is within the container node.
+func nodeContains(container *html.Node, contained *html.Node) bool {
+ // Check if the parent of the contained node is the container node, traversing
+ // upward until the top is reached, or the container is found.
+ for contained = contained.Parent; contained != nil; contained = contained.Parent {
+ if container == contained {
+ return true
+ }
+ }
+ return false
+}
+
+// Checks if the target node is in the slice of nodes.
+func isInSlice(slice []*html.Node, node *html.Node) bool {
+ return indexInSlice(slice, node) > -1
+}
+
+// Returns the index of the target node in the slice, or -1.
+func indexInSlice(slice []*html.Node, node *html.Node) int {
+ if node != nil {
+ for i, n := range slice {
+ if n == node {
+ return i
+ }
+ }
+ }
+ return -1
+}
+
+// Appends the new nodes to the target slice, making sure no duplicate is added.
+// There is no check to the original state of the target slice, so it may still
+// contain duplicates. The target slice is returned because append() may create
+// a new underlying array. If targetSet is nil, a local set is created with the
+// target if len(target) + len(nodes) is greater than minNodesForSet.
+func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node, targetSet map[*html.Node]bool) []*html.Node {
+ // if there are not that many nodes, don't use the map, faster to just use nested loops
+ // (unless a non-nil targetSet is passed, in which case the caller knows better).
+ if targetSet == nil && len(target)+len(nodes) < minNodesForSet {
+ for _, n := range nodes {
+ if !isInSlice(target, n) {
+ target = append(target, n)
+ }
+ }
+ return target
+ }
+
+ // if a targetSet is passed, then assume it is reliable, otherwise create one
+ // and initialize it with the current target contents.
+ if targetSet == nil {
+ targetSet = make(map[*html.Node]bool, len(target))
+ for _, n := range target {
+ targetSet[n] = true
+ }
+ }
+ for _, n := range nodes {
+ if !targetSet[n] {
+ target = append(target, n)
+ targetSet[n] = true
+ }
+ }
+
+ return target
+}
+
+// Loop through a selection, returning only those nodes that pass the predicate
+// function.
+func grep(sel *Selection, predicate func(i int, s *Selection) bool) (result []*html.Node) {
+ for i, n := range sel.Nodes {
+ if predicate(i, newSingleSelection(n, sel.document)) {
+ result = append(result, n)
+ }
+ }
+ return result
+}
+
+// Creates a new Selection object based on the specified nodes, and keeps the
+// source Selection object on the stack (linked list).
+func pushStack(fromSel *Selection, nodes []*html.Node) *Selection {
+ result := &Selection{nodes, fromSel.document, fromSel}
+ return result
+}
diff --git a/vendor/github.com/andybalholm/cascadia/.travis.yml b/vendor/github.com/andybalholm/cascadia/.travis.yml
new file mode 100644
index 000000000..6f227517d
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/.travis.yml
@@ -0,0 +1,14 @@
+language: go
+
+go:
+ - 1.3
+ - 1.4
+
+install:
+ - go get github.com/andybalholm/cascadia
+
+script:
+ - go test -v
+
+notifications:
+ email: false
diff --git a/vendor/github.com/andybalholm/cascadia/LICENSE b/vendor/github.com/andybalholm/cascadia/LICENSE
new file mode 100644
index 000000000..ee5ad35ac
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2011 Andy Balholm. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md
new file mode 100644
index 000000000..9021cb92a
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/README.md
@@ -0,0 +1,7 @@
+# cascadia
+
+[](https://travis-ci.org/andybalholm/cascadia)
+
+The Cascadia package implements CSS selectors for use with the parse trees produced by the html package.
+
+To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package.
diff --git a/vendor/github.com/andybalholm/cascadia/go.mod b/vendor/github.com/andybalholm/cascadia/go.mod
new file mode 100644
index 000000000..e6febbbfe
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/go.mod
@@ -0,0 +1,3 @@
+module "github.com/andybalholm/cascadia"
+
+require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01
diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go
new file mode 100644
index 000000000..495db9ccf
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/parser.go
@@ -0,0 +1,835 @@
+// Package cascadia is an implementation of CSS selectors.
+package cascadia
+
+import (
+ "errors"
+ "fmt"
+ "regexp"
+ "strconv"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// a parser for CSS selectors
+type parser struct {
+ s string // the source text
+ i int // the current position
+}
+
+// parseEscape parses a backslash escape.
+func (p *parser) parseEscape() (result string, err error) {
+ if len(p.s) < p.i+2 || p.s[p.i] != '\\' {
+ return "", errors.New("invalid escape sequence")
+ }
+
+ start := p.i + 1
+ c := p.s[start]
+ switch {
+ case c == '\r' || c == '\n' || c == '\f':
+ return "", errors.New("escaped line ending outside string")
+ case hexDigit(c):
+ // unicode escape (hex)
+ var i int
+ for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
+ // empty
+ }
+ v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
+ if len(p.s) > i {
+ switch p.s[i] {
+ case '\r':
+ i++
+ if len(p.s) > i && p.s[i] == '\n' {
+ i++
+ }
+ case ' ', '\t', '\n', '\f':
+ i++
+ }
+ }
+ p.i = i
+ return string(rune(v)), nil
+ }
+
+ // Return the literal character after the backslash.
+ result = p.s[start : start+1]
+ p.i += 2
+ return result, nil
+}
+
+func hexDigit(c byte) bool {
+ return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
+}
+
+// nameStart returns whether c can be the first character of an identifier
+// (not counting an initial hyphen, or an escape sequence).
+func nameStart(c byte) bool {
+ return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127
+}
+
+// nameChar returns whether c can be a character within an identifier
+// (not counting an escape sequence).
+func nameChar(c byte) bool {
+ return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 ||
+ c == '-' || '0' <= c && c <= '9'
+}
+
+// parseIdentifier parses an identifier.
+func (p *parser) parseIdentifier() (result string, err error) {
+ startingDash := false
+ if len(p.s) > p.i && p.s[p.i] == '-' {
+ startingDash = true
+ p.i++
+ }
+
+ if len(p.s) <= p.i {
+ return "", errors.New("expected identifier, found EOF instead")
+ }
+
+ if c := p.s[p.i]; !(nameStart(c) || c == '\\') {
+ return "", fmt.Errorf("expected identifier, found %c instead", c)
+ }
+
+ result, err = p.parseName()
+ if startingDash && err == nil {
+ result = "-" + result
+ }
+ return
+}
+
+// parseName parses a name (which is like an identifier, but doesn't have
+// extra restrictions on the first character).
+func (p *parser) parseName() (result string, err error) {
+ i := p.i
+loop:
+ for i < len(p.s) {
+ c := p.s[i]
+ switch {
+ case nameChar(c):
+ start := i
+ for i < len(p.s) && nameChar(p.s[i]) {
+ i++
+ }
+ result += p.s[start:i]
+ case c == '\\':
+ p.i = i
+ val, err := p.parseEscape()
+ if err != nil {
+ return "", err
+ }
+ i = p.i
+ result += val
+ default:
+ break loop
+ }
+ }
+
+ if result == "" {
+ return "", errors.New("expected name, found EOF instead")
+ }
+
+ p.i = i
+ return result, nil
+}
+
+// parseString parses a single- or double-quoted string.
+func (p *parser) parseString() (result string, err error) {
+ i := p.i
+ if len(p.s) < i+2 {
+ return "", errors.New("expected string, found EOF instead")
+ }
+
+ quote := p.s[i]
+ i++
+
+loop:
+ for i < len(p.s) {
+ switch p.s[i] {
+ case '\\':
+ if len(p.s) > i+1 {
+ switch c := p.s[i+1]; c {
+ case '\r':
+ if len(p.s) > i+2 && p.s[i+2] == '\n' {
+ i += 3
+ continue loop
+ }
+ fallthrough
+ case '\n', '\f':
+ i += 2
+ continue loop
+ }
+ }
+ p.i = i
+ val, err := p.parseEscape()
+ if err != nil {
+ return "", err
+ }
+ i = p.i
+ result += val
+ case quote:
+ break loop
+ case '\r', '\n', '\f':
+ return "", errors.New("unexpected end of line in string")
+ default:
+ start := i
+ for i < len(p.s) {
+ if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' {
+ break
+ }
+ i++
+ }
+ result += p.s[start:i]
+ }
+ }
+
+ if i >= len(p.s) {
+ return "", errors.New("EOF in string")
+ }
+
+ // Consume the final quote.
+ i++
+
+ p.i = i
+ return result, nil
+}
+
+// parseRegex parses a regular expression; the end is defined by encountering an
+// unmatched closing ')' or ']' which is not consumed
+func (p *parser) parseRegex() (rx *regexp.Regexp, err error) {
+ i := p.i
+ if len(p.s) < i+2 {
+ return nil, errors.New("expected regular expression, found EOF instead")
+ }
+
+ // number of open parens or brackets;
+ // when it becomes negative, finished parsing regex
+ open := 0
+
+loop:
+ for i < len(p.s) {
+ switch p.s[i] {
+ case '(', '[':
+ open++
+ case ')', ']':
+ open--
+ if open < 0 {
+ break loop
+ }
+ }
+ i++
+ }
+
+ if i >= len(p.s) {
+ return nil, errors.New("EOF in regular expression")
+ }
+ rx, err = regexp.Compile(p.s[p.i:i])
+ p.i = i
+ return rx, err
+}
+
+// skipWhitespace consumes whitespace characters and comments.
+// It returns true if there was actually anything to skip.
+func (p *parser) skipWhitespace() bool {
+ i := p.i
+ for i < len(p.s) {
+ switch p.s[i] {
+ case ' ', '\t', '\r', '\n', '\f':
+ i++
+ continue
+ case '/':
+ if strings.HasPrefix(p.s[i:], "/*") {
+ end := strings.Index(p.s[i+len("/*"):], "*/")
+ if end != -1 {
+ i += end + len("/**/")
+ continue
+ }
+ }
+ }
+ break
+ }
+
+ if i > p.i {
+ p.i = i
+ return true
+ }
+
+ return false
+}
+
+// consumeParenthesis consumes an opening parenthesis and any following
+// whitespace. It returns true if there was actually a parenthesis to skip.
+func (p *parser) consumeParenthesis() bool {
+ if p.i < len(p.s) && p.s[p.i] == '(' {
+ p.i++
+ p.skipWhitespace()
+ return true
+ }
+ return false
+}
+
+// consumeClosingParenthesis consumes a closing parenthesis and any preceding
+// whitespace. It returns true if there was actually a parenthesis to skip.
+func (p *parser) consumeClosingParenthesis() bool {
+ i := p.i
+ p.skipWhitespace()
+ if p.i < len(p.s) && p.s[p.i] == ')' {
+ p.i++
+ return true
+ }
+ p.i = i
+ return false
+}
+
+// parseTypeSelector parses a type selector (one that matches by tag name).
+func (p *parser) parseTypeSelector() (result Selector, err error) {
+ tag, err := p.parseIdentifier()
+ if err != nil {
+ return nil, err
+ }
+
+ return typeSelector(tag), nil
+}
+
+// parseIDSelector parses a selector that matches by id attribute.
+func (p *parser) parseIDSelector() (Selector, error) {
+ if p.i >= len(p.s) {
+ return nil, fmt.Errorf("expected id selector (#id), found EOF instead")
+ }
+ if p.s[p.i] != '#' {
+ return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
+ }
+
+ p.i++
+ id, err := p.parseName()
+ if err != nil {
+ return nil, err
+ }
+
+ return attributeEqualsSelector("id", id), nil
+}
+
+// parseClassSelector parses a selector that matches by class attribute.
+func (p *parser) parseClassSelector() (Selector, error) {
+ if p.i >= len(p.s) {
+ return nil, fmt.Errorf("expected class selector (.class), found EOF instead")
+ }
+ if p.s[p.i] != '.' {
+ return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
+ }
+
+ p.i++
+ class, err := p.parseIdentifier()
+ if err != nil {
+ return nil, err
+ }
+
+ return attributeIncludesSelector("class", class), nil
+}
+
+// parseAttributeSelector parses a selector that matches by attribute value.
+func (p *parser) parseAttributeSelector() (Selector, error) {
+ if p.i >= len(p.s) {
+ return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
+ }
+ if p.s[p.i] != '[' {
+ return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
+ }
+
+ p.i++
+ p.skipWhitespace()
+ key, err := p.parseIdentifier()
+ if err != nil {
+ return nil, err
+ }
+
+ p.skipWhitespace()
+ if p.i >= len(p.s) {
+ return nil, errors.New("unexpected EOF in attribute selector")
+ }
+
+ if p.s[p.i] == ']' {
+ p.i++
+ return attributeExistsSelector(key), nil
+ }
+
+ if p.i+2 >= len(p.s) {
+ return nil, errors.New("unexpected EOF in attribute selector")
+ }
+
+ op := p.s[p.i : p.i+2]
+ if op[0] == '=' {
+ op = "="
+ } else if op[1] != '=' {
+ return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
+ }
+ p.i += len(op)
+
+ p.skipWhitespace()
+ if p.i >= len(p.s) {
+ return nil, errors.New("unexpected EOF in attribute selector")
+ }
+ var val string
+ var rx *regexp.Regexp
+ if op == "#=" {
+ rx, err = p.parseRegex()
+ } else {
+ switch p.s[p.i] {
+ case '\'', '"':
+ val, err = p.parseString()
+ default:
+ val, err = p.parseIdentifier()
+ }
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ p.skipWhitespace()
+ if p.i >= len(p.s) {
+ return nil, errors.New("unexpected EOF in attribute selector")
+ }
+ if p.s[p.i] != ']' {
+ return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
+ }
+ p.i++
+
+ switch op {
+ case "=":
+ return attributeEqualsSelector(key, val), nil
+ case "!=":
+ return attributeNotEqualSelector(key, val), nil
+ case "~=":
+ return attributeIncludesSelector(key, val), nil
+ case "|=":
+ return attributeDashmatchSelector(key, val), nil
+ case "^=":
+ return attributePrefixSelector(key, val), nil
+ case "$=":
+ return attributeSuffixSelector(key, val), nil
+ case "*=":
+ return attributeSubstringSelector(key, val), nil
+ case "#=":
+ return attributeRegexSelector(key, rx), nil
+ }
+
+ return nil, fmt.Errorf("attribute operator %q is not supported", op)
+}
+
+var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
+var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
+var errUnmatchedParenthesis = errors.New("unmatched '('")
+
+// parsePseudoclassSelector parses a pseudoclass selector like :not(p).
+func (p *parser) parsePseudoclassSelector() (Selector, error) {
+ if p.i >= len(p.s) {
+ return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
+ }
+ if p.s[p.i] != ':' {
+ return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
+ }
+
+ p.i++
+ name, err := p.parseIdentifier()
+ if err != nil {
+ return nil, err
+ }
+ name = toLowerASCII(name)
+
+ switch name {
+ case "not", "has", "haschild":
+ if !p.consumeParenthesis() {
+ return nil, errExpectedParenthesis
+ }
+ sel, parseErr := p.parseSelectorGroup()
+ if parseErr != nil {
+ return nil, parseErr
+ }
+ if !p.consumeClosingParenthesis() {
+ return nil, errExpectedClosingParenthesis
+ }
+
+ switch name {
+ case "not":
+ return negatedSelector(sel), nil
+ case "has":
+ return hasDescendantSelector(sel), nil
+ case "haschild":
+ return hasChildSelector(sel), nil
+ }
+
+ case "contains", "containsown":
+ if !p.consumeParenthesis() {
+ return nil, errExpectedParenthesis
+ }
+ if p.i == len(p.s) {
+ return nil, errUnmatchedParenthesis
+ }
+ var val string
+ switch p.s[p.i] {
+ case '\'', '"':
+ val, err = p.parseString()
+ default:
+ val, err = p.parseIdentifier()
+ }
+ if err != nil {
+ return nil, err
+ }
+ val = strings.ToLower(val)
+ p.skipWhitespace()
+ if p.i >= len(p.s) {
+ return nil, errors.New("unexpected EOF in pseudo selector")
+ }
+ if !p.consumeClosingParenthesis() {
+ return nil, errExpectedClosingParenthesis
+ }
+
+ switch name {
+ case "contains":
+ return textSubstrSelector(val), nil
+ case "containsown":
+ return ownTextSubstrSelector(val), nil
+ }
+
+ case "matches", "matchesown":
+ if !p.consumeParenthesis() {
+ return nil, errExpectedParenthesis
+ }
+ rx, err := p.parseRegex()
+ if err != nil {
+ return nil, err
+ }
+ if p.i >= len(p.s) {
+ return nil, errors.New("unexpected EOF in pseudo selector")
+ }
+ if !p.consumeClosingParenthesis() {
+ return nil, errExpectedClosingParenthesis
+ }
+
+ switch name {
+ case "matches":
+ return textRegexSelector(rx), nil
+ case "matchesown":
+ return ownTextRegexSelector(rx), nil
+ }
+
+ case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
+ if !p.consumeParenthesis() {
+ return nil, errExpectedParenthesis
+ }
+ a, b, err := p.parseNth()
+ if err != nil {
+ return nil, err
+ }
+ if !p.consumeClosingParenthesis() {
+ return nil, errExpectedClosingParenthesis
+ }
+ if a == 0 {
+ switch name {
+ case "nth-child":
+ return simpleNthChildSelector(b, false), nil
+ case "nth-of-type":
+ return simpleNthChildSelector(b, true), nil
+ case "nth-last-child":
+ return simpleNthLastChildSelector(b, false), nil
+ case "nth-last-of-type":
+ return simpleNthLastChildSelector(b, true), nil
+ }
+ }
+ return nthChildSelector(a, b,
+ name == "nth-last-child" || name == "nth-last-of-type",
+ name == "nth-of-type" || name == "nth-last-of-type"),
+ nil
+
+ case "first-child":
+ return simpleNthChildSelector(1, false), nil
+ case "last-child":
+ return simpleNthLastChildSelector(1, false), nil
+ case "first-of-type":
+ return simpleNthChildSelector(1, true), nil
+ case "last-of-type":
+ return simpleNthLastChildSelector(1, true), nil
+ case "only-child":
+ return onlyChildSelector(false), nil
+ case "only-of-type":
+ return onlyChildSelector(true), nil
+ case "input":
+ return inputSelector, nil
+ case "empty":
+ return emptyElementSelector, nil
+ case "root":
+ return rootSelector, nil
+ }
+
+ return nil, fmt.Errorf("unknown pseudoclass :%s", name)
+}
+
+// parseInteger parses a decimal integer.
+func (p *parser) parseInteger() (int, error) {
+ i := p.i
+ start := i
+ for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' {
+ i++
+ }
+ if i == start {
+ return 0, errors.New("expected integer, but didn't find it")
+ }
+ p.i = i
+
+ val, err := strconv.Atoi(p.s[start:i])
+ if err != nil {
+ return 0, err
+ }
+
+ return val, nil
+}
+
+// parseNth parses the argument for :nth-child (normally of the form an+b).
+func (p *parser) parseNth() (a, b int, err error) {
+ // initial state
+ if p.i >= len(p.s) {
+ goto eof
+ }
+ switch p.s[p.i] {
+ case '-':
+ p.i++
+ goto negativeA
+ case '+':
+ p.i++
+ goto positiveA
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ goto positiveA
+ case 'n', 'N':
+ a = 1
+ p.i++
+ goto readN
+ case 'o', 'O', 'e', 'E':
+ id, nameErr := p.parseName()
+ if nameErr != nil {
+ return 0, 0, nameErr
+ }
+ id = toLowerASCII(id)
+ if id == "odd" {
+ return 2, 1, nil
+ }
+ if id == "even" {
+ return 2, 0, nil
+ }
+ return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id)
+ default:
+ goto invalid
+ }
+
+positiveA:
+ if p.i >= len(p.s) {
+ goto eof
+ }
+ switch p.s[p.i] {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ a, err = p.parseInteger()
+ if err != nil {
+ return 0, 0, err
+ }
+ goto readA
+ case 'n', 'N':
+ a = 1
+ p.i++
+ goto readN
+ default:
+ goto invalid
+ }
+
+negativeA:
+ if p.i >= len(p.s) {
+ goto eof
+ }
+ switch p.s[p.i] {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ a, err = p.parseInteger()
+ if err != nil {
+ return 0, 0, err
+ }
+ a = -a
+ goto readA
+ case 'n', 'N':
+ a = -1
+ p.i++
+ goto readN
+ default:
+ goto invalid
+ }
+
+readA:
+ if p.i >= len(p.s) {
+ goto eof
+ }
+ switch p.s[p.i] {
+ case 'n', 'N':
+ p.i++
+ goto readN
+ default:
+ // The number we read as a is actually b.
+ return 0, a, nil
+ }
+
+readN:
+ p.skipWhitespace()
+ if p.i >= len(p.s) {
+ goto eof
+ }
+ switch p.s[p.i] {
+ case '+':
+ p.i++
+ p.skipWhitespace()
+ b, err = p.parseInteger()
+ if err != nil {
+ return 0, 0, err
+ }
+ return a, b, nil
+ case '-':
+ p.i++
+ p.skipWhitespace()
+ b, err = p.parseInteger()
+ if err != nil {
+ return 0, 0, err
+ }
+ return a, -b, nil
+ default:
+ return a, 0, nil
+ }
+
+eof:
+ return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b")
+
+invalid:
+ return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b")
+}
+
+// parseSimpleSelectorSequence parses a selector sequence that applies to
+// a single element.
+func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
+ var result Selector
+
+ if p.i >= len(p.s) {
+ return nil, errors.New("expected selector, found EOF instead")
+ }
+
+ switch p.s[p.i] {
+ case '*':
+ // It's the universal selector. Just skip over it, since it doesn't affect the meaning.
+ p.i++
+ case '#', '.', '[', ':':
+ // There's no type selector. Wait to process the other till the main loop.
+ default:
+ r, err := p.parseTypeSelector()
+ if err != nil {
+ return nil, err
+ }
+ result = r
+ }
+
+loop:
+ for p.i < len(p.s) {
+ var ns Selector
+ var err error
+ switch p.s[p.i] {
+ case '#':
+ ns, err = p.parseIDSelector()
+ case '.':
+ ns, err = p.parseClassSelector()
+ case '[':
+ ns, err = p.parseAttributeSelector()
+ case ':':
+ ns, err = p.parsePseudoclassSelector()
+ default:
+ break loop
+ }
+ if err != nil {
+ return nil, err
+ }
+ if result == nil {
+ result = ns
+ } else {
+ result = intersectionSelector(result, ns)
+ }
+ }
+
+ if result == nil {
+ result = func(n *html.Node) bool {
+ return n.Type == html.ElementNode
+ }
+ }
+
+ return result, nil
+}
+
+// parseSelector parses a selector that may include combinators.
+func (p *parser) parseSelector() (result Selector, err error) {
+ p.skipWhitespace()
+ result, err = p.parseSimpleSelectorSequence()
+ if err != nil {
+ return
+ }
+
+ for {
+ var combinator byte
+ if p.skipWhitespace() {
+ combinator = ' '
+ }
+ if p.i >= len(p.s) {
+ return
+ }
+
+ switch p.s[p.i] {
+ case '+', '>', '~':
+ combinator = p.s[p.i]
+ p.i++
+ p.skipWhitespace()
+ case ',', ')':
+ // These characters can't begin a selector, but they can legally occur after one.
+ return
+ }
+
+ if combinator == 0 {
+ return
+ }
+
+ c, err := p.parseSimpleSelectorSequence()
+ if err != nil {
+ return nil, err
+ }
+
+ switch combinator {
+ case ' ':
+ result = descendantSelector(result, c)
+ case '>':
+ result = childSelector(result, c)
+ case '+':
+ result = siblingSelector(result, c, true)
+ case '~':
+ result = siblingSelector(result, c, false)
+ }
+ }
+
+ panic("unreachable")
+}
+
+// parseSelectorGroup parses a group of selectors, separated by commas.
+func (p *parser) parseSelectorGroup() (result Selector, err error) {
+ result, err = p.parseSelector()
+ if err != nil {
+ return
+ }
+
+ for p.i < len(p.s) {
+ if p.s[p.i] != ',' {
+ return result, nil
+ }
+ p.i++
+ c, err := p.parseSelector()
+ if err != nil {
+ return nil, err
+ }
+ result = unionSelector(result, c)
+ }
+
+ return
+}
diff --git a/vendor/github.com/andybalholm/cascadia/selector.go b/vendor/github.com/andybalholm/cascadia/selector.go
new file mode 100644
index 000000000..9fb05ccb7
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/selector.go
@@ -0,0 +1,622 @@
+package cascadia
+
+import (
+ "bytes"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// the Selector type, and functions for creating them
+
+// A Selector is a function which tells whether a node matches or not.
+type Selector func(*html.Node) bool
+
+// hasChildMatch returns whether n has any child that matches a.
+func hasChildMatch(n *html.Node, a Selector) bool {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if a(c) {
+ return true
+ }
+ }
+ return false
+}
+
+// hasDescendantMatch performs a depth-first search of n's descendants,
+// testing whether any of them match a. It returns true as soon as a match is
+// found, or false if no match is found.
+func hasDescendantMatch(n *html.Node, a Selector) bool {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
+ return true
+ }
+ }
+ return false
+}
+
+// Compile parses a selector and returns, if successful, a Selector object
+// that can be used to match against html.Node objects.
+func Compile(sel string) (Selector, error) {
+ p := &parser{s: sel}
+ compiled, err := p.parseSelectorGroup()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.i < len(sel) {
+ return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
+ }
+
+ return compiled, nil
+}
+
+// MustCompile is like Compile, but panics instead of returning an error.
+func MustCompile(sel string) Selector {
+ compiled, err := Compile(sel)
+ if err != nil {
+ panic(err)
+ }
+ return compiled
+}
+
+// MatchAll returns a slice of the nodes that match the selector,
+// from n and its children.
+func (s Selector) MatchAll(n *html.Node) []*html.Node {
+ return s.matchAllInto(n, nil)
+}
+
+func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
+ if s(n) {
+ storage = append(storage, n)
+ }
+
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ storage = s.matchAllInto(child, storage)
+ }
+
+ return storage
+}
+
+// Match returns true if the node matches the selector.
+func (s Selector) Match(n *html.Node) bool {
+ return s(n)
+}
+
+// MatchFirst returns the first node that matches s, from n and its children.
+func (s Selector) MatchFirst(n *html.Node) *html.Node {
+ if s.Match(n) {
+ return n
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ m := s.MatchFirst(c)
+ if m != nil {
+ return m
+ }
+ }
+ return nil
+}
+
+// Filter returns the nodes in nodes that match the selector.
+func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
+ for _, n := range nodes {
+ if s(n) {
+ result = append(result, n)
+ }
+ }
+ return result
+}
+
+// typeSelector returns a Selector that matches elements with a given tag name.
+func typeSelector(tag string) Selector {
+ tag = toLowerASCII(tag)
+ return func(n *html.Node) bool {
+ return n.Type == html.ElementNode && n.Data == tag
+ }
+}
+
+// toLowerASCII returns s with all ASCII capital letters lowercased.
+func toLowerASCII(s string) string {
+ var b []byte
+ for i := 0; i < len(s); i++ {
+ if c := s[i]; 'A' <= c && c <= 'Z' {
+ if b == nil {
+ b = make([]byte, len(s))
+ copy(b, s)
+ }
+ b[i] = s[i] + ('a' - 'A')
+ }
+ }
+
+ if b == nil {
+ return s
+ }
+
+ return string(b)
+}
+
+// attributeSelector returns a Selector that matches elements
+// where the attribute named key satisifes the function f.
+func attributeSelector(key string, f func(string) bool) Selector {
+ key = toLowerASCII(key)
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ for _, a := range n.Attr {
+ if a.Key == key && f(a.Val) {
+ return true
+ }
+ }
+ return false
+ }
+}
+
+// attributeExistsSelector returns a Selector that matches elements that have
+// an attribute named key.
+func attributeExistsSelector(key string) Selector {
+ return attributeSelector(key, func(string) bool { return true })
+}
+
+// attributeEqualsSelector returns a Selector that matches elements where
+// the attribute named key has the value val.
+func attributeEqualsSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ return s == val
+ })
+}
+
+// attributeNotEqualSelector returns a Selector that matches elements where
+// the attribute named key does not have the value val.
+func attributeNotEqualSelector(key, val string) Selector {
+ key = toLowerASCII(key)
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ for _, a := range n.Attr {
+ if a.Key == key && a.Val == val {
+ return false
+ }
+ }
+ return true
+ }
+}
+
+// attributeIncludesSelector returns a Selector that matches elements where
+// the attribute named key is a whitespace-separated list that includes val.
+func attributeIncludesSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ for s != "" {
+ i := strings.IndexAny(s, " \t\r\n\f")
+ if i == -1 {
+ return s == val
+ }
+ if s[:i] == val {
+ return true
+ }
+ s = s[i+1:]
+ }
+ return false
+ })
+}
+
+// attributeDashmatchSelector returns a Selector that matches elements where
+// the attribute named key equals val or starts with val plus a hyphen.
+func attributeDashmatchSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ if s == val {
+ return true
+ }
+ if len(s) <= len(val) {
+ return false
+ }
+ if s[:len(val)] == val && s[len(val)] == '-' {
+ return true
+ }
+ return false
+ })
+}
+
+// attributePrefixSelector returns a Selector that matches elements where
+// the attribute named key starts with val.
+func attributePrefixSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ if strings.TrimSpace(s) == "" {
+ return false
+ }
+ return strings.HasPrefix(s, val)
+ })
+}
+
+// attributeSuffixSelector returns a Selector that matches elements where
+// the attribute named key ends with val.
+func attributeSuffixSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ if strings.TrimSpace(s) == "" {
+ return false
+ }
+ return strings.HasSuffix(s, val)
+ })
+}
+
+// attributeSubstringSelector returns a Selector that matches nodes where
+// the attribute named key contains val.
+func attributeSubstringSelector(key, val string) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ if strings.TrimSpace(s) == "" {
+ return false
+ }
+ return strings.Contains(s, val)
+ })
+}
+
+// attributeRegexSelector returns a Selector that matches nodes where
+// the attribute named key matches the regular expression rx
+func attributeRegexSelector(key string, rx *regexp.Regexp) Selector {
+ return attributeSelector(key,
+ func(s string) bool {
+ return rx.MatchString(s)
+ })
+}
+
+// intersectionSelector returns a selector that matches nodes that match
+// both a and b.
+func intersectionSelector(a, b Selector) Selector {
+ return func(n *html.Node) bool {
+ return a(n) && b(n)
+ }
+}
+
+// unionSelector returns a selector that matches elements that match
+// either a or b.
+func unionSelector(a, b Selector) Selector {
+ return func(n *html.Node) bool {
+ return a(n) || b(n)
+ }
+}
+
+// negatedSelector returns a selector that matches elements that do not match a.
+func negatedSelector(a Selector) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ return !a(n)
+ }
+}
+
+// writeNodeText writes the text contained in n and its descendants to b.
+func writeNodeText(n *html.Node, b *bytes.Buffer) {
+ switch n.Type {
+ case html.TextNode:
+ b.WriteString(n.Data)
+ case html.ElementNode:
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ writeNodeText(c, b)
+ }
+ }
+}
+
+// nodeText returns the text contained in n and its descendants.
+func nodeText(n *html.Node) string {
+ var b bytes.Buffer
+ writeNodeText(n, &b)
+ return b.String()
+}
+
+// nodeOwnText returns the contents of the text nodes that are direct
+// children of n.
+func nodeOwnText(n *html.Node) string {
+ var b bytes.Buffer
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.TextNode {
+ b.WriteString(c.Data)
+ }
+ }
+ return b.String()
+}
+
+// textSubstrSelector returns a selector that matches nodes that
+// contain the given text.
+func textSubstrSelector(val string) Selector {
+ return func(n *html.Node) bool {
+ text := strings.ToLower(nodeText(n))
+ return strings.Contains(text, val)
+ }
+}
+
+// ownTextSubstrSelector returns a selector that matches nodes that
+// directly contain the given text
+func ownTextSubstrSelector(val string) Selector {
+ return func(n *html.Node) bool {
+ text := strings.ToLower(nodeOwnText(n))
+ return strings.Contains(text, val)
+ }
+}
+
+// textRegexSelector returns a selector that matches nodes whose text matches
+// the specified regular expression
+func textRegexSelector(rx *regexp.Regexp) Selector {
+ return func(n *html.Node) bool {
+ return rx.MatchString(nodeText(n))
+ }
+}
+
+// ownTextRegexSelector returns a selector that matches nodes whose text
+// directly matches the specified regular expression
+func ownTextRegexSelector(rx *regexp.Regexp) Selector {
+ return func(n *html.Node) bool {
+ return rx.MatchString(nodeOwnText(n))
+ }
+}
+
+// hasChildSelector returns a selector that matches elements
+// with a child that matches a.
+func hasChildSelector(a Selector) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ return hasChildMatch(n, a)
+ }
+}
+
+// hasDescendantSelector returns a selector that matches elements
+// with any descendant that matches a.
+func hasDescendantSelector(a Selector) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ return hasDescendantMatch(n, a)
+ }
+}
+
+// nthChildSelector returns a selector that implements :nth-child(an+b).
+// If last is true, implements :nth-last-child instead.
+// If ofType is true, implements :nth-of-type instead.
+func nthChildSelector(a, b int, last, ofType bool) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
+
+ if parent.Type == html.DocumentNode {
+ return false
+ }
+
+ i := -1
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if c == n {
+ i = count
+ if !last {
+ break
+ }
+ }
+ }
+
+ if i == -1 {
+ // This shouldn't happen, since n should always be one of its parent's children.
+ return false
+ }
+
+ if last {
+ i = count - i + 1
+ }
+
+ i -= b
+ if a == 0 {
+ return i == 0
+ }
+
+ return i%a == 0 && i/a >= 0
+ }
+}
+
+// simpleNthChildSelector returns a selector that implements :nth-child(b).
+// If ofType is true, implements :nth-of-type instead.
+func simpleNthChildSelector(b int, ofType bool) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
+
+ if parent.Type == html.DocumentNode {
+ return false
+ }
+
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if c == n {
+ return count == b
+ }
+ if count >= b {
+ return false
+ }
+ }
+ return false
+ }
+}
+
+// simpleNthLastChildSelector returns a selector that implements
+// :nth-last-child(b). If ofType is true, implements :nth-last-of-type
+// instead.
+func simpleNthLastChildSelector(b int, ofType bool) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
+
+ if parent.Type == html.DocumentNode {
+ return false
+ }
+
+ count := 0
+ for c := parent.LastChild; c != nil; c = c.PrevSibling {
+ if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if c == n {
+ return count == b
+ }
+ if count >= b {
+ return false
+ }
+ }
+ return false
+ }
+}
+
+// onlyChildSelector returns a selector that implements :only-child.
+// If ofType is true, it implements :only-of-type instead.
+func onlyChildSelector(ofType bool) Selector {
+ return func(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+
+ parent := n.Parent
+ if parent == nil {
+ return false
+ }
+
+ if parent.Type == html.DocumentNode {
+ return false
+ }
+
+ count := 0
+ for c := parent.FirstChild; c != nil; c = c.NextSibling {
+ if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
+ continue
+ }
+ count++
+ if count > 1 {
+ return false
+ }
+ }
+
+ return count == 1
+ }
+}
+
+// inputSelector is a Selector that matches input, select, textarea and button elements.
+func inputSelector(n *html.Node) bool {
+ return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
+}
+
+// emptyElementSelector is a Selector that matches empty elements.
+func emptyElementSelector(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ switch c.Type {
+ case html.ElementNode, html.TextNode:
+ return false
+ }
+ }
+
+ return true
+}
+
+// descendantSelector returns a Selector that matches an element if
+// it matches d and has an ancestor that matches a.
+func descendantSelector(a, d Selector) Selector {
+ return func(n *html.Node) bool {
+ if !d(n) {
+ return false
+ }
+
+ for p := n.Parent; p != nil; p = p.Parent {
+ if a(p) {
+ return true
+ }
+ }
+
+ return false
+ }
+}
+
+// childSelector returns a Selector that matches an element if
+// it matches d and its parent matches a.
+func childSelector(a, d Selector) Selector {
+ return func(n *html.Node) bool {
+ return d(n) && n.Parent != nil && a(n.Parent)
+ }
+}
+
+// siblingSelector returns a Selector that matches an element
+// if it matches s2 and in is preceded by an element that matches s1.
+// If adjacent is true, the sibling must be immediately before the element.
+func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
+ return func(n *html.Node) bool {
+ if !s2(n) {
+ return false
+ }
+
+ if adjacent {
+ for n = n.PrevSibling; n != nil; n = n.PrevSibling {
+ if n.Type == html.TextNode || n.Type == html.CommentNode {
+ continue
+ }
+ return s1(n)
+ }
+ return false
+ }
+
+ // Walk backwards looking for element that matches s1
+ for c := n.PrevSibling; c != nil; c = c.PrevSibling {
+ if s1(c) {
+ return true
+ }
+ }
+
+ return false
+ }
+}
+
+// rootSelector implements :root
+func rootSelector(n *html.Node) bool {
+ if n.Type != html.ElementNode {
+ return false
+ }
+ if n.Parent == nil {
+ return false
+ }
+ return n.Parent.Type == html.DocumentNode
+}
diff --git a/vendor/github.com/antchfx/htmlquery/.gitignore b/vendor/github.com/antchfx/htmlquery/.gitignore
new file mode 100644
index 000000000..4d5d27b1d
--- /dev/null
+++ b/vendor/github.com/antchfx/htmlquery/.gitignore
@@ -0,0 +1,32 @@
+# vscode
+.vscode
+debug
+*.test
+
+./build
+
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/htmlquery/.travis.yml b/vendor/github.com/antchfx/htmlquery/.travis.yml
new file mode 100644
index 000000000..1f7225628
--- /dev/null
+++ b/vendor/github.com/antchfx/htmlquery/.travis.yml
@@ -0,0 +1,15 @@
+language: go
+
+go:
+ - 1.6
+ - 1.7
+ - 1.8
+
+install:
+ - go get golang.org/x/net/html/charset
+ - go get golang.org/x/net/html
+ - go get github.com/antchfx/xpath
+ - go get github.com/mattn/goveralls
+
+script:
+ - $HOME/gopath/bin/goveralls -service=travis-ci
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/htmlquery/LICENSE b/vendor/github.com/antchfx/htmlquery/LICENSE
new file mode 100644
index 000000000..e14c37141
--- /dev/null
+++ b/vendor/github.com/antchfx/htmlquery/LICENSE
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/htmlquery/README.md b/vendor/github.com/antchfx/htmlquery/README.md
new file mode 100644
index 000000000..0f466cb0f
--- /dev/null
+++ b/vendor/github.com/antchfx/htmlquery/README.md
@@ -0,0 +1,102 @@
+htmlquery
+====
+[](https://travis-ci.org/antchfx/htmlquery)
+[](https://coveralls.io/github/antchfx/htmlquery?branch=master)
+[](https://godoc.org/github.com/antchfx/htmlquery)
+[](https://goreportcard.com/report/github.com/antchfx/htmlquery)
+
+Overview
+====
+
+htmlquery is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.
+
+Changelogs
+===
+
+2019-02-04
+- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods.
+
+2018-12-28
+- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6)
+
+Installation
+====
+
+> $ go get github.com/antchfx/htmlquery
+
+Getting Started
+====
+
+#### Load HTML document from URL.
+
+```go
+doc, err := htmlquery.LoadURL("http://example.com/")
+```
+
+#### Load HTML document from string.
+
+```go
+s := `....`
+doc, err := htmlquery.Parse(strings.NewReader(s))
+```
+
+#### Find all A elements.
+
+```go
+list := htmlquery.Find(doc, "//a")
+```
+
+#### Find all A elements that have `href` attribute.
+
+```go
+list := range htmlquery.Find(doc, "//a[@href]")
+```
+
+#### Find all A elements and only get `href` attribute self.
+
+```go
+list := range htmlquery.Find(doc, "//a/@href")
+```
+
+### Find the third A element.
+
+```go
+a := htmlquery.FindOne(doc, "//a[3]")
+```
+
+#### Evaluate the number of all IMG element.
+
+```go
+expr, _ := xpath.Compile("count(//img)")
+v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
+fmt.Printf("total count is %f", v)
+```
+
+Quick Tutorial
+===
+
+```go
+func main() {
+ doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
+ if err != nil {
+ panic(err)
+ }
+ // Find all news item.
+ for i, n := range htmlquery.Find(doc, "//ol/li") {
+ a := htmlquery.FindOne(n, "//a")
+ fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
+ }
+}
+```
+
+List of supported XPath query packages
+===
+|Name |Description |
+|--------------------------|----------------|
+|[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document|
+|[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document|
+|[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document|
+
+Questions
+===
+Please let me know if you have any questions.
diff --git a/vendor/github.com/antchfx/htmlquery/query.go b/vendor/github.com/antchfx/htmlquery/query.go
new file mode 100644
index 000000000..37d30b937
--- /dev/null
+++ b/vendor/github.com/antchfx/htmlquery/query.go
@@ -0,0 +1,291 @@
+/*
+Package htmlquery provides extract data from HTML documents using XPath expression.
+*/
+package htmlquery
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "net/http"
+
+ "github.com/antchfx/xpath"
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/charset"
+)
+
+var _ xpath.NodeNavigator = &NodeNavigator{}
+
+// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.
+func CreateXPathNavigator(top *html.Node) *NodeNavigator {
+ return &NodeNavigator{curr: top, root: top, attr: -1}
+}
+
+// Find searches the html.Node that matches by the specified XPath expr.
+func Find(top *html.Node, expr string) []*html.Node {
+ exp, err := xpath.Compile(expr)
+ if err != nil {
+ panic(err)
+ }
+ var elems []*html.Node
+ t := exp.Select(CreateXPathNavigator(top))
+ for t.MoveNext() {
+ nav := t.Current().(*NodeNavigator)
+ n := getCurrentNode(nav)
+ // avoid adding duplicate nodes.
+ if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode &&
+ nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) {
+ continue
+ }
+ elems = append(elems, n)
+ }
+ return elems
+}
+
+// FindOne searches the html.Node that matches by the specified XPath expr,
+// and returns first element of matched html.Node.
+func FindOne(top *html.Node, expr string) *html.Node {
+ var elem *html.Node
+ exp, err := xpath.Compile(expr)
+ if err != nil {
+ panic(err)
+ }
+ t := exp.Select(CreateXPathNavigator(top))
+ if t.MoveNext() {
+ elem = getCurrentNode(t.Current().(*NodeNavigator))
+ }
+ return elem
+}
+
+// LoadURL loads the HTML document from the specified URL.
+func LoadURL(url string) (*html.Node, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
+ if err != nil {
+ return nil, err
+ }
+ return html.Parse(r)
+}
+
+func getCurrentNode(n *NodeNavigator) *html.Node {
+ if n.NodeType() == xpath.AttributeNode {
+ childNode := &html.Node{
+ Type: html.TextNode,
+ Data: n.Value(),
+ }
+ return &html.Node{
+ Type: html.ElementNode,
+ Data: n.LocalName(),
+ FirstChild: childNode,
+ LastChild: childNode,
+ }
+
+ }
+ return n.curr
+}
+
+// Parse returns the parse tree for the HTML from the given Reader.
+func Parse(r io.Reader) (*html.Node, error) {
+ return html.Parse(r)
+}
+
+// InnerText returns the text between the start and end tags of the object.
+func InnerText(n *html.Node) string {
+ var output func(*bytes.Buffer, *html.Node)
+ output = func(buf *bytes.Buffer, n *html.Node) {
+ switch n.Type {
+ case html.TextNode:
+ buf.WriteString(n.Data)
+ return
+ case html.CommentNode:
+ return
+ }
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ output(buf, child)
+ }
+ }
+
+ var buf bytes.Buffer
+ output(&buf, n)
+ return buf.String()
+}
+
+// SelectAttr returns the attribute value with the specified name.
+func SelectAttr(n *html.Node, name string) (val string) {
+ if n == nil {
+ return
+ }
+ if n.Type == html.ElementNode && n.Parent == nil && name == n.Data {
+ return InnerText(n)
+ }
+ for _, attr := range n.Attr {
+ if attr.Key == name {
+ val = attr.Val
+ break
+ }
+ }
+ return
+}
+
+// OutputHTML returns the text including tags name.
+func OutputHTML(n *html.Node, self bool) string {
+ var buf bytes.Buffer
+ if self {
+ html.Render(&buf, n)
+ } else {
+ for n := n.FirstChild; n != nil; n = n.NextSibling {
+ html.Render(&buf, n)
+ }
+ }
+ return buf.String()
+}
+
+type NodeNavigator struct {
+ root, curr *html.Node
+ attr int
+}
+
+func (h *NodeNavigator) Current() *html.Node {
+ return h.curr
+}
+
+func (h *NodeNavigator) NodeType() xpath.NodeType {
+ switch h.curr.Type {
+ case html.CommentNode:
+ return xpath.CommentNode
+ case html.TextNode:
+ return xpath.TextNode
+ case html.DocumentNode:
+ return xpath.RootNode
+ case html.ElementNode:
+ if h.attr != -1 {
+ return xpath.AttributeNode
+ }
+ return xpath.ElementNode
+ case html.DoctypeNode:
+ // ignored declare and as Root-Node type.
+ return xpath.RootNode
+ }
+ panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type))
+}
+
+func (h *NodeNavigator) LocalName() string {
+ if h.attr != -1 {
+ return h.curr.Attr[h.attr].Key
+ }
+ return h.curr.Data
+}
+
+func (*NodeNavigator) Prefix() string {
+ return ""
+}
+
+func (h *NodeNavigator) Value() string {
+ switch h.curr.Type {
+ case html.CommentNode:
+ return h.curr.Data
+ case html.ElementNode:
+ if h.attr != -1 {
+ return h.curr.Attr[h.attr].Val
+ }
+ return InnerText(h.curr)
+ case html.TextNode:
+ return h.curr.Data
+ }
+ return ""
+}
+
+func (h *NodeNavigator) Copy() xpath.NodeNavigator {
+ n := *h
+ return &n
+}
+
+func (h *NodeNavigator) MoveToRoot() {
+ h.curr = h.root
+}
+
+func (h *NodeNavigator) MoveToParent() bool {
+ if h.attr != -1 {
+ h.attr = -1
+ return true
+ } else if node := h.curr.Parent; node != nil {
+ h.curr = node
+ return true
+ }
+ return false
+}
+
+func (h *NodeNavigator) MoveToNextAttribute() bool {
+ if h.attr >= len(h.curr.Attr)-1 {
+ return false
+ }
+ h.attr++
+ return true
+}
+
+func (h *NodeNavigator) MoveToChild() bool {
+ if h.attr != -1 {
+ return false
+ }
+ if node := h.curr.FirstChild; node != nil {
+ h.curr = node
+ return true
+ }
+ return false
+}
+
+func (h *NodeNavigator) MoveToFirst() bool {
+ if h.attr != -1 || h.curr.PrevSibling == nil {
+ return false
+ }
+ for {
+ node := h.curr.PrevSibling
+ if node == nil {
+ break
+ }
+ h.curr = node
+ }
+ return true
+}
+
+func (h *NodeNavigator) String() string {
+ return h.Value()
+}
+
+func (h *NodeNavigator) MoveToNext() bool {
+ if h.attr != -1 {
+ return false
+ }
+ if node := h.curr.NextSibling; node != nil {
+ h.curr = node
+ return true
+ }
+ return false
+}
+
+func (h *NodeNavigator) MoveToPrevious() bool {
+ if h.attr != -1 {
+ return false
+ }
+ if node := h.curr.PrevSibling; node != nil {
+ h.curr = node
+ return true
+ }
+ return false
+}
+
+func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
+ node, ok := other.(*NodeNavigator)
+ if !ok || node.root != h.root {
+ return false
+ }
+
+ h.curr = node.curr
+ h.attr = node.attr
+ return true
+}
diff --git a/vendor/github.com/antchfx/xmlquery/.gitignore b/vendor/github.com/antchfx/xmlquery/.gitignore
new file mode 100644
index 000000000..4d5d27b1d
--- /dev/null
+++ b/vendor/github.com/antchfx/xmlquery/.gitignore
@@ -0,0 +1,32 @@
+# vscode
+.vscode
+debug
+*.test
+
+./build
+
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/xmlquery/.travis.yml b/vendor/github.com/antchfx/xmlquery/.travis.yml
new file mode 100644
index 000000000..d9a7bb893
--- /dev/null
+++ b/vendor/github.com/antchfx/xmlquery/.travis.yml
@@ -0,0 +1,14 @@
+language: go
+
+go:
+ - 1.6
+ - 1.7
+ - 1.8
+
+install:
+ - go get golang.org/x/net/html/charset
+ - go get github.com/antchfx/xpath
+ - go get github.com/mattn/goveralls
+
+script:
+ - $HOME/gopath/bin/goveralls -service=travis-ci
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/xmlquery/LICENSE b/vendor/github.com/antchfx/xmlquery/LICENSE
new file mode 100644
index 000000000..e14c37141
--- /dev/null
+++ b/vendor/github.com/antchfx/xmlquery/LICENSE
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/vendor/github.com/antchfx/xmlquery/README.md b/vendor/github.com/antchfx/xmlquery/README.md
new file mode 100644
index 000000000..6683afd51
--- /dev/null
+++ b/vendor/github.com/antchfx/xmlquery/README.md
@@ -0,0 +1,186 @@
+xmlquery
+====
+[](https://travis-ci.org/antchfx/xmlquery)
+[](https://coveralls.io/github/antchfx/xmlquery?branch=master)
+[](https://godoc.org/github.com/antchfx/xmlquery)
+[](https://goreportcard.com/report/github.com/antchfx/xmlquery)
+
+Overview
+===
+
+xmlquery is an XPath query package for XML document, lets you extract data or evaluate from XML documents by an XPath expression.
+
+Change Logs
+===
+
+**2018-12-23**
+* added XML output will including comment node. [#9](https://github.com/antchfx/xmlquery/issues/9)
+
+**2018-12-03**
+ * added support attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6)
+
+Installation
+====
+
+> $ go get github.com/antchfx/xmlquery
+
+Getting Started
+===
+
+#### Parse a XML from URL.
+
+```go
+doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
+```
+
+#### Parse a XML from string.
+
+```go
+s := `
+
+
+### Backers
+
+Thank you to all our backers! [[Become a backer](https://opencollective.com/colly#backer)]
+
+
+
+
+### Sponsors
+
+Thank you to all our sponsors! (please ask your company to also support this open source project by [becoming a sponsor](https://opencollective.com/colly#sponsor))
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/vendor/github.com/gocolly/colly/LICENSE.txt b/vendor/github.com/gocolly/colly/LICENSE.txt
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/LICENSE.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/github.com/gocolly/colly/README.md b/vendor/github.com/gocolly/colly/README.md
new file mode 100644
index 000000000..06e73cbea
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/README.md
@@ -0,0 +1,112 @@
+# Colly
+
+Lightning Fast and Elegant Scraping Framework for Gophers
+
+Colly provides a clean interface to write any kind of crawler/scraper/spider.
+
+With Colly you can easily extract structured data from websites, which can be used for a wide range of applications, like data mining, data processing or archiving.
+
+[](https://godoc.org/github.com/gocolly/colly)
+[](#backers) [](#sponsors) [](https://travis-ci.org/gocolly/colly)
+[](http://goreportcard.com/report/gocolly/colly)
+[](https://github.com/gocolly/colly/tree/master/_examples)
+[](https://codecov.io/github/gocolly/colly?branch=master)
+[](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_shield)
+[](https://twitter.com/gocolly)
+
+
+## Features
+
+ * Clean API
+ * Fast (>1k request/sec on a single core)
+ * Manages request delays and maximum concurrency per domain
+ * Automatic cookie and session handling
+ * Sync/async/parallel scraping
+ * Caching
+ * Automatic encoding of non-unicode responses
+ * Robots.txt support
+ * Distributed scraping
+ * Configuration via environment variables
+ * Extensions
+
+
+## Example
+
+```go
+func main() {
+ c := colly.NewCollector()
+
+ // Find and visit all links
+ c.OnHTML("a[href]", func(e *colly.HTMLElement) {
+ e.Request.Visit(e.Attr("href"))
+ })
+
+ c.OnRequest(func(r *colly.Request) {
+ fmt.Println("Visiting", r.URL)
+ })
+
+ c.Visit("http://go-colly.org/")
+}
+```
+
+See [examples folder](https://github.com/gocolly/colly/tree/master/_examples) for more detailed examples.
+
+
+## Installation
+
+```
+go get -u github.com/gocolly/colly/...
+```
+
+
+## Bugs
+
+Bugs or suggestions? Visit the [issue tracker](https://github.com/gocolly/colly/issues) or join `#colly` on freenode
+
+
+## Other Projects Using Colly
+
+Below is a list of public, open source projects that use Colly:
+
+ * [greenpeace/check-my-pages](https://github.com/greenpeace/check-my-pages) Scraping script to test the Spanish Greenpeace web archive
+ * [altsab/gowap](https://github.com/altsab/gowap) Wappalyzer implementation in Go
+ * [jesuiscamille/goquotes](https://github.com/jesuiscamille/goquotes) A quotes scrapper, making your day a little better!
+ * [jivesearch/jivesearch](https://github.com/jivesearch/jivesearch) A search engine that doesn't track you.
+ * [Leagify/colly-draft-prospects](https://github.com/Leagify/colly-draft-prospects) A scraper for future NFL Draft prospects.
+ * [lucasepe/go-ps4](https://github.com/lucasepe/go-ps4) Search playstation store for your favorite PS4 games using the command line.
+
+If you are using Colly in a project please send a pull request to add it to the list.
+
+## Contributors
+
+This project exists thanks to all the people who contribute. [[Contribute]](CONTRIBUTING.md).
+
+
+
+## Backers
+
+Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/colly#backer)]
+
+
+
+
+## Sponsors
+
+Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/colly#sponsor)]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## License
+[](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_large)
diff --git a/vendor/github.com/gocolly/colly/VERSION b/vendor/github.com/gocolly/colly/VERSION
new file mode 100644
index 000000000..26aaba0e8
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/VERSION
@@ -0,0 +1 @@
+1.2.0
diff --git a/vendor/github.com/gocolly/colly/colly.go b/vendor/github.com/gocolly/colly/colly.go
new file mode 100644
index 000000000..3fb64db78
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/colly.go
@@ -0,0 +1,1293 @@
+// Copyright 2018 Adam Tauber
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package colly implements a HTTP scraping framework
+package colly
+
+import (
+ "bytes"
+ "context"
+ "crypto/rand"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "hash/fnv"
+ "io"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "net/http/cookiejar"
+ "net/url"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "google.golang.org/appengine/urlfetch"
+
+ "github.com/PuerkitoBio/goquery"
+ "github.com/antchfx/htmlquery"
+ "github.com/antchfx/xmlquery"
+ "github.com/kennygrant/sanitize"
+ "github.com/temoto/robotstxt"
+
+ "github.com/gocolly/colly/debug"
+ "github.com/gocolly/colly/storage"
+)
+
+// Collector provides the scraper instance for a scraping job
+type Collector struct {
+ // UserAgent is the User-Agent string used by HTTP requests
+ UserAgent string
+ // MaxDepth limits the recursion depth of visited URLs.
+ // Set it to 0 for infinite recursion (default).
+ MaxDepth int
+ // AllowedDomains is a domain whitelist.
+ // Leave it blank to allow any domains to be visited
+ AllowedDomains []string
+ // DisallowedDomains is a domain blacklist.
+ DisallowedDomains []string
+ // DisallowedURLFilters is a list of regular expressions which restricts
+ // visiting URLs. If any of the rules matches to a URL the
+ // request will be stopped. DisallowedURLFilters will
+ // be evaluated before URLFilters
+ // Leave it blank to allow any URLs to be visited
+ DisallowedURLFilters []*regexp.Regexp
+ // URLFilters is a list of regular expressions which restricts
+ // visiting URLs. If any of the rules matches to a URL the
+ // request won't be stopped. DisallowedURLFilters will
+ // be evaluated before URLFilters
+
+ // Leave it blank to allow any URLs to be visited
+ URLFilters []*regexp.Regexp
+
+ // AllowURLRevisit allows multiple downloads of the same URL
+ AllowURLRevisit bool
+ // MaxBodySize is the limit of the retrieved response body in bytes.
+ // 0 means unlimited.
+ // The default value for MaxBodySize is 10MB (10 * 1024 * 1024 bytes).
+ MaxBodySize int
+ // CacheDir specifies a location where GET requests are cached as files.
+ // When it's not defined, caching is disabled.
+ CacheDir string
+ // IgnoreRobotsTxt allows the Collector to ignore any restrictions set by
+ // the target host's robots.txt file. See http://www.robotstxt.org/ for more
+ // information.
+ IgnoreRobotsTxt bool
+ // Async turns on asynchronous network communication. Use Collector.Wait() to
+ // be sure all requests have been finished.
+ Async bool
+ // ParseHTTPErrorResponse allows parsing HTTP responses with non 2xx status codes.
+ // By default, Colly parses only successful HTTP responses. Set ParseHTTPErrorResponse
+ // to true to enable it.
+ ParseHTTPErrorResponse bool
+ // ID is the unique identifier of a collector
+ ID uint32
+ // DetectCharset can enable character encoding detection for non-utf8 response bodies
+ // without explicit charset declaration. This feature uses https://github.com/saintfish/chardet
+ DetectCharset bool
+ // RedirectHandler allows control on how a redirect will be managed
+ RedirectHandler func(req *http.Request, via []*http.Request) error
+ // CheckHead performs a HEAD request before every GET to pre-validate the response
+ CheckHead bool
+ store storage.Storage
+ debugger debug.Debugger
+ robotsMap map[string]*robotstxt.RobotsData
+ htmlCallbacks []*htmlCallbackContainer
+ xmlCallbacks []*xmlCallbackContainer
+ requestCallbacks []RequestCallback
+ responseCallbacks []ResponseCallback
+ errorCallbacks []ErrorCallback
+ scrapedCallbacks []ScrapedCallback
+ requestCount uint32
+ responseCount uint32
+ backend *httpBackend
+ wg *sync.WaitGroup
+ lock *sync.RWMutex
+}
+
+// RequestCallback is a type alias for OnRequest callback functions
+type RequestCallback func(*Request)
+
+// ResponseCallback is a type alias for OnResponse callback functions
+type ResponseCallback func(*Response)
+
+// HTMLCallback is a type alias for OnHTML callback functions
+type HTMLCallback func(*HTMLElement)
+
+// XMLCallback is a type alias for OnXML callback functions
+type XMLCallback func(*XMLElement)
+
+// ErrorCallback is a type alias for OnError callback functions
+type ErrorCallback func(*Response, error)
+
+// ScrapedCallback is a type alias for OnScraped callback functions
+type ScrapedCallback func(*Response)
+
+// ProxyFunc is a type alias for proxy setter functions.
+type ProxyFunc func(*http.Request) (*url.URL, error)
+
+type htmlCallbackContainer struct {
+ Selector string
+ Function HTMLCallback
+}
+
+type xmlCallbackContainer struct {
+ Query string
+ Function XMLCallback
+}
+
+type cookieJarSerializer struct {
+ store storage.Storage
+ lock *sync.RWMutex
+}
+
+var collectorCounter uint32
+
+// The key type is unexported to prevent collisions with context keys defined in
+// other packages.
+type key int
+
+// ProxyURLKey is the context key for the request proxy address.
+const ProxyURLKey key = iota
+
+var (
+ // ErrForbiddenDomain is the error thrown if visiting
+ // a domain which is not allowed in AllowedDomains
+ ErrForbiddenDomain = errors.New("Forbidden domain")
+ // ErrMissingURL is the error type for missing URL errors
+ ErrMissingURL = errors.New("Missing URL")
+ // ErrMaxDepth is the error type for exceeding max depth
+ ErrMaxDepth = errors.New("Max depth limit reached")
+ // ErrForbiddenURL is the error thrown if visiting
+ // a URL which is not allowed by URLFilters
+ ErrForbiddenURL = errors.New("ForbiddenURL")
+
+ // ErrNoURLFiltersMatch is the error thrown if visiting
+ // a URL which is not allowed by URLFilters
+ ErrNoURLFiltersMatch = errors.New("No URLFilters match")
+ // ErrAlreadyVisited is the error type for already visited URLs
+ ErrAlreadyVisited = errors.New("URL already visited")
+ // ErrRobotsTxtBlocked is the error type for robots.txt errors
+ ErrRobotsTxtBlocked = errors.New("URL blocked by robots.txt")
+ // ErrNoCookieJar is the error type for missing cookie jar
+ ErrNoCookieJar = errors.New("Cookie jar is not available")
+ // ErrNoPattern is the error type for LimitRules without patterns
+ ErrNoPattern = errors.New("No pattern defined in LimitRule")
+)
+
+var envMap = map[string]func(*Collector, string){
+ "ALLOWED_DOMAINS": func(c *Collector, val string) {
+ c.AllowedDomains = strings.Split(val, ",")
+ },
+ "CACHE_DIR": func(c *Collector, val string) {
+ c.CacheDir = val
+ },
+ "DETECT_CHARSET": func(c *Collector, val string) {
+ c.DetectCharset = isYesString(val)
+ },
+ "DISABLE_COOKIES": func(c *Collector, _ string) {
+ c.backend.Client.Jar = nil
+ },
+ "DISALLOWED_DOMAINS": func(c *Collector, val string) {
+ c.DisallowedDomains = strings.Split(val, ",")
+ },
+ "IGNORE_ROBOTSTXT": func(c *Collector, val string) {
+ c.IgnoreRobotsTxt = isYesString(val)
+ },
+ "FOLLOW_REDIRECTS": func(c *Collector, val string) {
+ if !isYesString(val) {
+ c.RedirectHandler = func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ }
+ }
+ },
+ "MAX_BODY_SIZE": func(c *Collector, val string) {
+ size, err := strconv.Atoi(val)
+ if err == nil {
+ c.MaxBodySize = size
+ }
+ },
+ "MAX_DEPTH": func(c *Collector, val string) {
+ maxDepth, err := strconv.Atoi(val)
+ if err != nil {
+ c.MaxDepth = maxDepth
+ }
+ },
+ "PARSE_HTTP_ERROR_RESPONSE": func(c *Collector, val string) {
+ c.ParseHTTPErrorResponse = isYesString(val)
+ },
+ "USER_AGENT": func(c *Collector, val string) {
+ c.UserAgent = val
+ },
+}
+
+// NewCollector creates a new Collector instance with default configuration
+func NewCollector(options ...func(*Collector)) *Collector {
+ c := &Collector{}
+ c.Init()
+
+ for _, f := range options {
+ f(c)
+ }
+
+ c.parseSettingsFromEnv()
+
+ return c
+}
+
+// UserAgent sets the user agent used by the Collector.
+func UserAgent(ua string) func(*Collector) {
+ return func(c *Collector) {
+ c.UserAgent = ua
+ }
+}
+
+// MaxDepth limits the recursion depth of visited URLs.
+func MaxDepth(depth int) func(*Collector) {
+ return func(c *Collector) {
+ c.MaxDepth = depth
+ }
+}
+
+// AllowedDomains sets the domain whitelist used by the Collector.
+func AllowedDomains(domains ...string) func(*Collector) {
+ return func(c *Collector) {
+ c.AllowedDomains = domains
+ }
+}
+
+// ParseHTTPErrorResponse allows parsing responses with HTTP errors
+func ParseHTTPErrorResponse() func(*Collector) {
+ return func(c *Collector) {
+ c.ParseHTTPErrorResponse = true
+ }
+}
+
+// DisallowedDomains sets the domain blacklist used by the Collector.
+func DisallowedDomains(domains ...string) func(*Collector) {
+ return func(c *Collector) {
+ c.DisallowedDomains = domains
+ }
+}
+
+// DisallowedURLFilters sets the list of regular expressions which restricts
+// visiting URLs. If any of the rules matches to a URL the request will be stopped.
+func DisallowedURLFilters(filters ...*regexp.Regexp) func(*Collector) {
+ return func(c *Collector) {
+ c.DisallowedURLFilters = filters
+ }
+}
+
+// URLFilters sets the list of regular expressions which restricts
+// visiting URLs. If any of the rules matches to a URL the request won't be stopped.
+func URLFilters(filters ...*regexp.Regexp) func(*Collector) {
+ return func(c *Collector) {
+ c.URLFilters = filters
+ }
+}
+
+// AllowURLRevisit instructs the Collector to allow multiple downloads of the same URL
+func AllowURLRevisit() func(*Collector) {
+ return func(c *Collector) {
+ c.AllowURLRevisit = true
+ }
+}
+
+// MaxBodySize sets the limit of the retrieved response body in bytes.
+func MaxBodySize(sizeInBytes int) func(*Collector) {
+ return func(c *Collector) {
+ c.MaxBodySize = sizeInBytes
+ }
+}
+
+// CacheDir specifies the location where GET requests are cached as files.
+func CacheDir(path string) func(*Collector) {
+ return func(c *Collector) {
+ c.CacheDir = path
+ }
+}
+
+// IgnoreRobotsTxt instructs the Collector to ignore any restrictions
+// set by the target host's robots.txt file.
+func IgnoreRobotsTxt() func(*Collector) {
+ return func(c *Collector) {
+ c.IgnoreRobotsTxt = true
+ }
+}
+
+// ID sets the unique identifier of the Collector.
+func ID(id uint32) func(*Collector) {
+ return func(c *Collector) {
+ c.ID = id
+ }
+}
+
+// Async turns on asynchronous network requests.
+func Async(a bool) func(*Collector) {
+ return func(c *Collector) {
+ c.Async = a
+ }
+}
+
+// DetectCharset enables character encoding detection for non-utf8 response bodies
+// without explicit charset declaration. This feature uses https://github.com/saintfish/chardet
+func DetectCharset() func(*Collector) {
+ return func(c *Collector) {
+ c.DetectCharset = true
+ }
+}
+
+// Debugger sets the debugger used by the Collector.
+func Debugger(d debug.Debugger) func(*Collector) {
+ return func(c *Collector) {
+ d.Init()
+ c.debugger = d
+ }
+}
+
+// Init initializes the Collector's private variables and sets default
+// configuration for the Collector
+func (c *Collector) Init() {
+ c.UserAgent = "colly - https://github.com/gocolly/colly"
+ c.MaxDepth = 0
+ c.store = &storage.InMemoryStorage{}
+ c.store.Init()
+ c.MaxBodySize = 10 * 1024 * 1024
+ c.backend = &httpBackend{}
+ jar, _ := cookiejar.New(nil)
+ c.backend.Init(jar)
+ c.backend.Client.CheckRedirect = c.checkRedirectFunc()
+ c.wg = &sync.WaitGroup{}
+ c.lock = &sync.RWMutex{}
+ c.robotsMap = make(map[string]*robotstxt.RobotsData)
+ c.IgnoreRobotsTxt = true
+ c.ID = atomic.AddUint32(&collectorCounter, 1)
+}
+
+// Appengine will replace the Collector's backend http.Client
+// With an Http.Client that is provided by appengine/urlfetch
+// This function should be used when the scraper is run on
+// Google App Engine. Example:
+// func startScraper(w http.ResponseWriter, r *http.Request) {
+// ctx := appengine.NewContext(r)
+// c := colly.NewCollector()
+// c.Appengine(ctx)
+// ...
+// c.Visit("https://google.ca")
+// }
+func (c *Collector) Appengine(ctx context.Context) {
+ client := urlfetch.Client(ctx)
+ client.Jar = c.backend.Client.Jar
+ client.CheckRedirect = c.backend.Client.CheckRedirect
+ client.Timeout = c.backend.Client.Timeout
+
+ c.backend.Client = client
+}
+
+// Visit starts Collector's collecting job by creating a
+// request to the URL specified in parameter.
+// Visit also calls the previously provided callbacks
+func (c *Collector) Visit(URL string) error {
+ if c.CheckHead {
+ if check := c.scrape(URL, "HEAD", 1, nil, nil, nil, true); check != nil {
+ return check
+ }
+ }
+ return c.scrape(URL, "GET", 1, nil, nil, nil, true)
+}
+
+// Head starts a collector job by creating a HEAD request.
+func (c *Collector) Head(URL string) error {
+ return c.scrape(URL, "HEAD", 1, nil, nil, nil, false)
+}
+
+// Post starts a collector job by creating a POST request.
+// Post also calls the previously provided callbacks
+func (c *Collector) Post(URL string, requestData map[string]string) error {
+ return c.scrape(URL, "POST", 1, createFormReader(requestData), nil, nil, true)
+}
+
+// PostRaw starts a collector job by creating a POST request with raw binary data.
+// Post also calls the previously provided callbacks
+func (c *Collector) PostRaw(URL string, requestData []byte) error {
+ return c.scrape(URL, "POST", 1, bytes.NewReader(requestData), nil, nil, true)
+}
+
+// PostMultipart starts a collector job by creating a Multipart POST request
+// with raw binary data. PostMultipart also calls the previously provided callbacks
+func (c *Collector) PostMultipart(URL string, requestData map[string][]byte) error {
+ boundary := randomBoundary()
+ hdr := http.Header{}
+ hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary)
+ hdr.Set("User-Agent", c.UserAgent)
+ return c.scrape(URL, "POST", 1, createMultipartReader(boundary, requestData), nil, hdr, true)
+}
+
+// Request starts a collector job by creating a custom HTTP request
+// where method, context, headers and request data can be specified.
+// Set requestData, ctx, hdr parameters to nil if you don't want to use them.
+// Valid methods:
+// - "GET"
+// - "HEAD"
+// - "POST"
+// - "PUT"
+// - "DELETE"
+// - "PATCH"
+// - "OPTIONS"
+func (c *Collector) Request(method, URL string, requestData io.Reader, ctx *Context, hdr http.Header) error {
+ return c.scrape(URL, method, 1, requestData, ctx, hdr, true)
+}
+
+// SetDebugger attaches a debugger to the collector
+func (c *Collector) SetDebugger(d debug.Debugger) {
+ d.Init()
+ c.debugger = d
+}
+
+// UnmarshalRequest creates a Request from serialized data
+func (c *Collector) UnmarshalRequest(r []byte) (*Request, error) {
+ req := &serializableRequest{}
+ err := json.Unmarshal(r, req)
+ if err != nil {
+ return nil, err
+ }
+
+ u, err := url.Parse(req.URL)
+ if err != nil {
+ return nil, err
+ }
+
+ ctx := NewContext()
+ for k, v := range req.Ctx {
+ ctx.Put(k, v)
+ }
+
+ return &Request{
+ Method: req.Method,
+ URL: u,
+ Body: bytes.NewReader(req.Body),
+ Ctx: ctx,
+ ID: atomic.AddUint32(&c.requestCount, 1),
+ Headers: &req.Headers,
+ collector: c,
+ }, nil
+}
+
+func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, checkRevisit bool) error {
+ if err := c.requestCheck(u, method, depth, checkRevisit); err != nil {
+ return err
+ }
+ parsedURL, err := url.Parse(u)
+ if err != nil {
+ return err
+ }
+ if parsedURL.Scheme == "" {
+ parsedURL.Scheme = "http"
+ }
+ if !c.isDomainAllowed(parsedURL.Host) {
+ return ErrForbiddenDomain
+ }
+ if method != "HEAD" && !c.IgnoreRobotsTxt {
+ if err = c.checkRobots(parsedURL); err != nil {
+ return err
+ }
+ }
+ if hdr == nil {
+ hdr = http.Header{"User-Agent": []string{c.UserAgent}}
+ }
+ rc, ok := requestData.(io.ReadCloser)
+ if !ok && requestData != nil {
+ rc = ioutil.NopCloser(requestData)
+ }
+ req := &http.Request{
+ Method: method,
+ URL: parsedURL,
+ Proto: "HTTP/1.1",
+ ProtoMajor: 1,
+ ProtoMinor: 1,
+ Header: hdr,
+ Body: rc,
+ Host: parsedURL.Host,
+ }
+ setRequestBody(req, requestData)
+ u = parsedURL.String()
+ c.wg.Add(1)
+ if c.Async {
+ go c.fetch(u, method, depth, requestData, ctx, hdr, req)
+ return nil
+ }
+ return c.fetch(u, method, depth, requestData, ctx, hdr, req)
+}
+
+func setRequestBody(req *http.Request, body io.Reader) {
+ if body != nil {
+ switch v := body.(type) {
+ case *bytes.Buffer:
+ req.ContentLength = int64(v.Len())
+ buf := v.Bytes()
+ req.GetBody = func() (io.ReadCloser, error) {
+ r := bytes.NewReader(buf)
+ return ioutil.NopCloser(r), nil
+ }
+ case *bytes.Reader:
+ req.ContentLength = int64(v.Len())
+ snapshot := *v
+ req.GetBody = func() (io.ReadCloser, error) {
+ r := snapshot
+ return ioutil.NopCloser(&r), nil
+ }
+ case *strings.Reader:
+ req.ContentLength = int64(v.Len())
+ snapshot := *v
+ req.GetBody = func() (io.ReadCloser, error) {
+ r := snapshot
+ return ioutil.NopCloser(&r), nil
+ }
+ }
+ if req.GetBody != nil && req.ContentLength == 0 {
+ req.Body = http.NoBody
+ req.GetBody = func() (io.ReadCloser, error) { return http.NoBody, nil }
+ }
+ }
+}
+
+func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, req *http.Request) error {
+ defer c.wg.Done()
+ if ctx == nil {
+ ctx = NewContext()
+ }
+ request := &Request{
+ URL: req.URL,
+ Headers: &req.Header,
+ Ctx: ctx,
+ Depth: depth,
+ Method: method,
+ Body: requestData,
+ collector: c,
+ ID: atomic.AddUint32(&c.requestCount, 1),
+ }
+
+ c.handleOnRequest(request)
+
+ if request.abort {
+ return nil
+ }
+
+ if method == "POST" && req.Header.Get("Content-Type") == "" {
+ req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+ }
+
+ if req.Header.Get("Accept") == "" {
+ req.Header.Set("Accept", "*/*")
+ }
+
+ origURL := req.URL
+ response, err := c.backend.Cache(req, c.MaxBodySize, c.CacheDir)
+ if proxyURL, ok := req.Context().Value(ProxyURLKey).(string); ok {
+ request.ProxyURL = proxyURL
+ }
+ if err := c.handleOnError(response, err, request, ctx); err != nil {
+ return err
+ }
+ if req.URL != origURL {
+ request.URL = req.URL
+ request.Headers = &req.Header
+ }
+ atomic.AddUint32(&c.responseCount, 1)
+ response.Ctx = ctx
+ response.Request = request
+
+ err = response.fixCharset(c.DetectCharset, request.ResponseCharacterEncoding)
+ if err != nil {
+ return err
+ }
+
+ c.handleOnResponse(response)
+
+ err = c.handleOnHTML(response)
+ if err != nil {
+ c.handleOnError(response, err, request, ctx)
+ }
+
+ err = c.handleOnXML(response)
+ if err != nil {
+ c.handleOnError(response, err, request, ctx)
+ }
+
+ c.handleOnScraped(response)
+
+ return err
+}
+
+func (c *Collector) requestCheck(u, method string, depth int, checkRevisit bool) error {
+ if u == "" {
+ return ErrMissingURL
+ }
+ if c.MaxDepth > 0 && c.MaxDepth < depth {
+ return ErrMaxDepth
+ }
+ if len(c.DisallowedURLFilters) > 0 {
+ if isMatchingFilter(c.DisallowedURLFilters, []byte(u)) {
+ return ErrForbiddenURL
+ }
+ }
+ if len(c.URLFilters) > 0 {
+ if !isMatchingFilter(c.URLFilters, []byte(u)) {
+ return ErrNoURLFiltersMatch
+ }
+ }
+ if checkRevisit && !c.AllowURLRevisit && method == "GET" {
+ h := fnv.New64a()
+ h.Write([]byte(u))
+ uHash := h.Sum64()
+ visited, err := c.store.IsVisited(uHash)
+ if err != nil {
+ return err
+ }
+ if visited {
+ return ErrAlreadyVisited
+ }
+ return c.store.Visited(uHash)
+ }
+ return nil
+}
+
+func (c *Collector) isDomainAllowed(domain string) bool {
+ for _, d2 := range c.DisallowedDomains {
+ if d2 == domain {
+ return false
+ }
+ }
+ if c.AllowedDomains == nil || len(c.AllowedDomains) == 0 {
+ return true
+ }
+ for _, d2 := range c.AllowedDomains {
+ if d2 == domain {
+ return true
+ }
+ }
+ return false
+}
+
+func (c *Collector) checkRobots(u *url.URL) error {
+ c.lock.RLock()
+ robot, ok := c.robotsMap[u.Host]
+ c.lock.RUnlock()
+
+ if !ok {
+ // no robots file cached
+ resp, err := c.backend.Client.Get(u.Scheme + "://" + u.Host + "/robots.txt")
+ if err != nil {
+ return err
+ }
+ robot, err = robotstxt.FromResponse(resp)
+ if err != nil {
+ return err
+ }
+ c.lock.Lock()
+ c.robotsMap[u.Host] = robot
+ c.lock.Unlock()
+ }
+
+ uaGroup := robot.FindGroup(c.UserAgent)
+ if uaGroup == nil {
+ return nil
+ }
+
+ if !uaGroup.Test(u.EscapedPath()) {
+ return ErrRobotsTxtBlocked
+ }
+ return nil
+}
+
+// String is the text representation of the collector.
+// It contains useful debug information about the collector's internals
+func (c *Collector) String() string {
+ return fmt.Sprintf(
+ "Requests made: %d (%d responses) | Callbacks: OnRequest: %d, OnHTML: %d, OnResponse: %d, OnError: %d",
+ c.requestCount,
+ c.responseCount,
+ len(c.requestCallbacks),
+ len(c.htmlCallbacks),
+ len(c.responseCallbacks),
+ len(c.errorCallbacks),
+ )
+}
+
+// Wait returns when the collector jobs are finished
+func (c *Collector) Wait() {
+ c.wg.Wait()
+}
+
+// OnRequest registers a function. Function will be executed on every
+// request made by the Collector
+func (c *Collector) OnRequest(f RequestCallback) {
+ c.lock.Lock()
+ if c.requestCallbacks == nil {
+ c.requestCallbacks = make([]RequestCallback, 0, 4)
+ }
+ c.requestCallbacks = append(c.requestCallbacks, f)
+ c.lock.Unlock()
+}
+
+// OnResponse registers a function. Function will be executed on every response
+func (c *Collector) OnResponse(f ResponseCallback) {
+ c.lock.Lock()
+ if c.responseCallbacks == nil {
+ c.responseCallbacks = make([]ResponseCallback, 0, 4)
+ }
+ c.responseCallbacks = append(c.responseCallbacks, f)
+ c.lock.Unlock()
+}
+
+// OnHTML registers a function. Function will be executed on every HTML
+// element matched by the GoQuery Selector parameter.
+// GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery
+func (c *Collector) OnHTML(goquerySelector string, f HTMLCallback) {
+ c.lock.Lock()
+ if c.htmlCallbacks == nil {
+ c.htmlCallbacks = make([]*htmlCallbackContainer, 0, 4)
+ }
+ c.htmlCallbacks = append(c.htmlCallbacks, &htmlCallbackContainer{
+ Selector: goquerySelector,
+ Function: f,
+ })
+ c.lock.Unlock()
+}
+
+// OnXML registers a function. Function will be executed on every XML
+// element matched by the xpath Query parameter.
+// xpath Query is used by https://github.com/antchfx/xmlquery
+func (c *Collector) OnXML(xpathQuery string, f XMLCallback) {
+ c.lock.Lock()
+ if c.xmlCallbacks == nil {
+ c.xmlCallbacks = make([]*xmlCallbackContainer, 0, 4)
+ }
+ c.xmlCallbacks = append(c.xmlCallbacks, &xmlCallbackContainer{
+ Query: xpathQuery,
+ Function: f,
+ })
+ c.lock.Unlock()
+}
+
+// OnHTMLDetach deregister a function. Function will not be execute after detached
+func (c *Collector) OnHTMLDetach(goquerySelector string) {
+ c.lock.Lock()
+ deleteIdx := -1
+ for i, cc := range c.htmlCallbacks {
+ if cc.Selector == goquerySelector {
+ deleteIdx = i
+ break
+ }
+ }
+ if deleteIdx != -1 {
+ c.htmlCallbacks = append(c.htmlCallbacks[:deleteIdx], c.htmlCallbacks[deleteIdx+1:]...)
+ }
+ c.lock.Unlock()
+}
+
+// OnXMLDetach deregister a function. Function will not be execute after detached
+func (c *Collector) OnXMLDetach(xpathQuery string) {
+ c.lock.Lock()
+ deleteIdx := -1
+ for i, cc := range c.xmlCallbacks {
+ if cc.Query == xpathQuery {
+ deleteIdx = i
+ break
+ }
+ }
+ if deleteIdx != -1 {
+ c.xmlCallbacks = append(c.xmlCallbacks[:deleteIdx], c.xmlCallbacks[deleteIdx+1:]...)
+ }
+ c.lock.Unlock()
+}
+
+// OnError registers a function. Function will be executed if an error
+// occurs during the HTTP request.
+func (c *Collector) OnError(f ErrorCallback) {
+ c.lock.Lock()
+ if c.errorCallbacks == nil {
+ c.errorCallbacks = make([]ErrorCallback, 0, 4)
+ }
+ c.errorCallbacks = append(c.errorCallbacks, f)
+ c.lock.Unlock()
+}
+
+// OnScraped registers a function. Function will be executed after
+// OnHTML, as a final part of the scraping.
+func (c *Collector) OnScraped(f ScrapedCallback) {
+ c.lock.Lock()
+ if c.scrapedCallbacks == nil {
+ c.scrapedCallbacks = make([]ScrapedCallback, 0, 4)
+ }
+ c.scrapedCallbacks = append(c.scrapedCallbacks, f)
+ c.lock.Unlock()
+}
+
+// WithTransport allows you to set a custom http.RoundTripper (transport)
+func (c *Collector) WithTransport(transport http.RoundTripper) {
+ c.backend.Client.Transport = transport
+}
+
+// DisableCookies turns off cookie handling
+func (c *Collector) DisableCookies() {
+ c.backend.Client.Jar = nil
+}
+
+// SetCookieJar overrides the previously set cookie jar
+func (c *Collector) SetCookieJar(j *cookiejar.Jar) {
+ c.backend.Client.Jar = j
+}
+
+// SetRequestTimeout overrides the default timeout (10 seconds) for this collector
+func (c *Collector) SetRequestTimeout(timeout time.Duration) {
+ c.backend.Client.Timeout = timeout
+}
+
+// SetStorage overrides the default in-memory storage.
+// Storage stores scraping related data like cookies and visited urls
+func (c *Collector) SetStorage(s storage.Storage) error {
+ if err := s.Init(); err != nil {
+ return err
+ }
+ c.store = s
+ c.backend.Client.Jar = createJar(s)
+ return nil
+}
+
+// SetProxy sets a proxy for the collector. This method overrides the previously
+// used http.Transport if the type of the transport is not http.RoundTripper.
+// The proxy type is determined by the URL scheme. "http"
+// and "socks5" are supported. If the scheme is empty,
+// "http" is assumed.
+func (c *Collector) SetProxy(proxyURL string) error {
+ proxyParsed, err := url.Parse(proxyURL)
+ if err != nil {
+ return err
+ }
+
+ c.SetProxyFunc(http.ProxyURL(proxyParsed))
+
+ return nil
+}
+
+// SetProxyFunc sets a custom proxy setter/switcher function.
+// See built-in ProxyFuncs for more details.
+// This method overrides the previously used http.Transport
+// if the type of the transport is not http.RoundTripper.
+// The proxy type is determined by the URL scheme. "http"
+// and "socks5" are supported. If the scheme is empty,
+// "http" is assumed.
+func (c *Collector) SetProxyFunc(p ProxyFunc) {
+ t, ok := c.backend.Client.Transport.(*http.Transport)
+ if c.backend.Client.Transport != nil && ok {
+ t.Proxy = p
+ } else {
+ c.backend.Client.Transport = &http.Transport{
+ Proxy: p,
+ }
+ }
+}
+
+func createEvent(eventType string, requestID, collectorID uint32, kvargs map[string]string) *debug.Event {
+ return &debug.Event{
+ CollectorID: collectorID,
+ RequestID: requestID,
+ Type: eventType,
+ Values: kvargs,
+ }
+}
+
+func (c *Collector) handleOnRequest(r *Request) {
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("request", r.ID, c.ID, map[string]string{
+ "url": r.URL.String(),
+ }))
+ }
+ for _, f := range c.requestCallbacks {
+ f(r)
+ }
+}
+
+func (c *Collector) handleOnResponse(r *Response) {
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("response", r.Request.ID, c.ID, map[string]string{
+ "url": r.Request.URL.String(),
+ "status": http.StatusText(r.StatusCode),
+ }))
+ }
+ for _, f := range c.responseCallbacks {
+ f(r)
+ }
+}
+
+func (c *Collector) handleOnHTML(resp *Response) error {
+ if len(c.htmlCallbacks) == 0 || !strings.Contains(strings.ToLower(resp.Headers.Get("Content-Type")), "html") {
+ return nil
+ }
+ doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(resp.Body))
+ if err != nil {
+ return err
+ }
+ if href, found := doc.Find("base[href]").Attr("href"); found {
+ resp.Request.baseURL, _ = url.Parse(href)
+ }
+ for _, cc := range c.htmlCallbacks {
+ i := 0
+ doc.Find(cc.Selector).Each(func(_ int, s *goquery.Selection) {
+ for _, n := range s.Nodes {
+ e := NewHTMLElementFromSelectionNode(resp, s, n, i)
+ i++
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("html", resp.Request.ID, c.ID, map[string]string{
+ "selector": cc.Selector,
+ "url": resp.Request.URL.String(),
+ }))
+ }
+ cc.Function(e)
+ }
+ })
+ }
+ return nil
+}
+
+func (c *Collector) handleOnXML(resp *Response) error {
+ if len(c.xmlCallbacks) == 0 {
+ return nil
+ }
+ contentType := strings.ToLower(resp.Headers.Get("Content-Type"))
+ if !strings.Contains(contentType, "html") && !strings.Contains(contentType, "xml") {
+ return nil
+ }
+
+ if strings.Contains(contentType, "html") {
+ doc, err := htmlquery.Parse(bytes.NewBuffer(resp.Body))
+ if err != nil {
+ return err
+ }
+ if e := htmlquery.FindOne(doc, "//base"); e != nil {
+ for _, a := range e.Attr {
+ if a.Key == "href" {
+ resp.Request.baseURL, _ = url.Parse(a.Val)
+ break
+ }
+ }
+ }
+
+ for _, cc := range c.xmlCallbacks {
+ for _, n := range htmlquery.Find(doc, cc.Query) {
+ e := NewXMLElementFromHTMLNode(resp, n)
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{
+ "selector": cc.Query,
+ "url": resp.Request.URL.String(),
+ }))
+ }
+ cc.Function(e)
+ }
+ }
+ } else if strings.Contains(contentType, "xml") {
+ doc, err := xmlquery.Parse(bytes.NewBuffer(resp.Body))
+ if err != nil {
+ return err
+ }
+
+ for _, cc := range c.xmlCallbacks {
+ xmlquery.FindEach(doc, cc.Query, func(i int, n *xmlquery.Node) {
+ e := NewXMLElementFromXMLNode(resp, n)
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{
+ "selector": cc.Query,
+ "url": resp.Request.URL.String(),
+ }))
+ }
+ cc.Function(e)
+ })
+ }
+ }
+ return nil
+}
+
+func (c *Collector) handleOnError(response *Response, err error, request *Request, ctx *Context) error {
+ if err == nil && (c.ParseHTTPErrorResponse || response.StatusCode < 203) {
+ return nil
+ }
+ if err == nil && response.StatusCode >= 203 {
+ err = errors.New(http.StatusText(response.StatusCode))
+ }
+ if response == nil {
+ response = &Response{
+ Request: request,
+ Ctx: ctx,
+ }
+ }
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("error", request.ID, c.ID, map[string]string{
+ "url": request.URL.String(),
+ "status": http.StatusText(response.StatusCode),
+ }))
+ }
+ if response.Request == nil {
+ response.Request = request
+ }
+ if response.Ctx == nil {
+ response.Ctx = request.Ctx
+ }
+ for _, f := range c.errorCallbacks {
+ f(response, err)
+ }
+ return err
+}
+
+func (c *Collector) handleOnScraped(r *Response) {
+ if c.debugger != nil {
+ c.debugger.Event(createEvent("scraped", r.Request.ID, c.ID, map[string]string{
+ "url": r.Request.URL.String(),
+ }))
+ }
+ for _, f := range c.scrapedCallbacks {
+ f(r)
+ }
+}
+
+// Limit adds a new LimitRule to the collector
+func (c *Collector) Limit(rule *LimitRule) error {
+ return c.backend.Limit(rule)
+}
+
+// Limits adds new LimitRules to the collector
+func (c *Collector) Limits(rules []*LimitRule) error {
+ return c.backend.Limits(rules)
+}
+
+// SetCookies handles the receipt of the cookies in a reply for the given URL
+func (c *Collector) SetCookies(URL string, cookies []*http.Cookie) error {
+ if c.backend.Client.Jar == nil {
+ return ErrNoCookieJar
+ }
+ u, err := url.Parse(URL)
+ if err != nil {
+ return err
+ }
+ c.backend.Client.Jar.SetCookies(u, cookies)
+ return nil
+}
+
+// Cookies returns the cookies to send in a request for the given URL.
+func (c *Collector) Cookies(URL string) []*http.Cookie {
+ if c.backend.Client.Jar == nil {
+ return nil
+ }
+ u, err := url.Parse(URL)
+ if err != nil {
+ return nil
+ }
+ return c.backend.Client.Jar.Cookies(u)
+}
+
+// Clone creates an exact copy of a Collector without callbacks.
+// HTTP backend, robots.txt cache and cookie jar are shared
+// between collectors.
+func (c *Collector) Clone() *Collector {
+ return &Collector{
+ AllowedDomains: c.AllowedDomains,
+ AllowURLRevisit: c.AllowURLRevisit,
+ CacheDir: c.CacheDir,
+ DetectCharset: c.DetectCharset,
+ DisallowedDomains: c.DisallowedDomains,
+ ID: atomic.AddUint32(&collectorCounter, 1),
+ IgnoreRobotsTxt: c.IgnoreRobotsTxt,
+ MaxBodySize: c.MaxBodySize,
+ MaxDepth: c.MaxDepth,
+ DisallowedURLFilters: c.DisallowedURLFilters,
+ URLFilters: c.URLFilters,
+ ParseHTTPErrorResponse: c.ParseHTTPErrorResponse,
+ UserAgent: c.UserAgent,
+ store: c.store,
+ backend: c.backend,
+ debugger: c.debugger,
+ Async: c.Async,
+ RedirectHandler: c.RedirectHandler,
+ errorCallbacks: make([]ErrorCallback, 0, 8),
+ htmlCallbacks: make([]*htmlCallbackContainer, 0, 8),
+ xmlCallbacks: make([]*xmlCallbackContainer, 0, 8),
+ scrapedCallbacks: make([]ScrapedCallback, 0, 8),
+ lock: c.lock,
+ requestCallbacks: make([]RequestCallback, 0, 8),
+ responseCallbacks: make([]ResponseCallback, 0, 8),
+ robotsMap: c.robotsMap,
+ wg: &sync.WaitGroup{},
+ }
+}
+
+func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Request) error {
+ return func(req *http.Request, via []*http.Request) error {
+ if !c.isDomainAllowed(req.URL.Host) {
+ return fmt.Errorf("Not following redirect to %s because its not in AllowedDomains", req.URL.Host)
+ }
+
+ if c.RedirectHandler != nil {
+ return c.RedirectHandler(req, via)
+ }
+
+ // Honor golangs default of maximum of 10 redirects
+ if len(via) >= 10 {
+ return http.ErrUseLastResponse
+ }
+
+ lastRequest := via[len(via)-1]
+
+ // Copy the headers from last request
+ for hName, hValues := range lastRequest.Header {
+ for _, hValue := range hValues {
+ req.Header.Set(hName, hValue)
+ }
+ }
+
+ // If domain has changed, remove the Authorization-header if it exists
+ if req.URL.Host != lastRequest.URL.Host {
+ req.Header.Del("Authorization")
+ }
+
+ return nil
+ }
+}
+
+func (c *Collector) parseSettingsFromEnv() {
+ for _, e := range os.Environ() {
+ if !strings.HasPrefix(e, "COLLY_") {
+ continue
+ }
+ pair := strings.SplitN(e[6:], "=", 2)
+ if f, ok := envMap[pair[0]]; ok {
+ f(c, pair[1])
+ } else {
+ log.Println("Unknown environment variable:", pair[0])
+ }
+ }
+}
+
+// SanitizeFileName replaces dangerous characters in a string
+// so the return value can be used as a safe file name.
+func SanitizeFileName(fileName string) string {
+ ext := filepath.Ext(fileName)
+ cleanExt := sanitize.BaseName(ext)
+ if cleanExt == "" {
+ cleanExt = ".unknown"
+ }
+ return strings.Replace(fmt.Sprintf(
+ "%s.%s",
+ sanitize.BaseName(fileName[:len(fileName)-len(ext)]),
+ cleanExt[1:],
+ ), "-", "_", -1)
+}
+
+func createFormReader(data map[string]string) io.Reader {
+ form := url.Values{}
+ for k, v := range data {
+ form.Add(k, v)
+ }
+ return strings.NewReader(form.Encode())
+}
+
+func createMultipartReader(boundary string, data map[string][]byte) io.Reader {
+ dashBoundary := "--" + boundary
+
+ body := []byte{}
+ buffer := bytes.NewBuffer(body)
+
+ buffer.WriteString("Content-type: multipart/form-data; boundary=" + boundary + "\n\n")
+ for contentType, content := range data {
+ buffer.WriteString(dashBoundary + "\n")
+ buffer.WriteString("Content-Disposition: form-data; name=" + contentType + "\n")
+ buffer.WriteString(fmt.Sprintf("Content-Length: %d \n\n", len(content)))
+ buffer.Write(content)
+ buffer.WriteString("\n")
+ }
+ buffer.WriteString(dashBoundary + "--\n\n")
+ return buffer
+}
+
+// randomBoundary was borrowed from
+// github.com/golang/go/mime/multipart/writer.go#randomBoundary
+func randomBoundary() string {
+ var buf [30]byte
+ _, err := io.ReadFull(rand.Reader, buf[:])
+ if err != nil {
+ panic(err)
+ }
+ return fmt.Sprintf("%x", buf[:])
+}
+
+func isYesString(s string) bool {
+ switch strings.ToLower(s) {
+ case "1", "yes", "true", "y":
+ return true
+ }
+ return false
+}
+
+func createJar(s storage.Storage) http.CookieJar {
+ return &cookieJarSerializer{store: s, lock: &sync.RWMutex{}}
+}
+
+func (j *cookieJarSerializer) SetCookies(u *url.URL, cookies []*http.Cookie) {
+ j.lock.Lock()
+ defer j.lock.Unlock()
+ cookieStr := j.store.Cookies(u)
+
+ // Merge existing cookies, new cookies have precedence.
+ cnew := make([]*http.Cookie, len(cookies))
+ copy(cnew, cookies)
+ existing := storage.UnstringifyCookies(cookieStr)
+ for _, c := range existing {
+ if !storage.ContainsCookie(cnew, c.Name) {
+ cnew = append(cnew, c)
+ }
+ }
+ j.store.SetCookies(u, storage.StringifyCookies(cnew))
+}
+
+func (j *cookieJarSerializer) Cookies(u *url.URL) []*http.Cookie {
+ cookies := storage.UnstringifyCookies(j.store.Cookies(u))
+ // Filter.
+ now := time.Now()
+ cnew := make([]*http.Cookie, 0, len(cookies))
+ for _, c := range cookies {
+ // Drop expired cookies.
+ if c.RawExpires != "" && c.Expires.Before(now) {
+ continue
+ }
+ // Drop secure cookies if not over https.
+ if c.Secure && u.Scheme != "https" {
+ continue
+ }
+ cnew = append(cnew, c)
+ }
+ return cnew
+}
+
+func isMatchingFilter(fs []*regexp.Regexp, d []byte) bool {
+ for _, r := range fs {
+ if r.Match(d) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/gocolly/colly/context.go b/vendor/github.com/gocolly/colly/context.go
new file mode 100644
index 000000000..4bc11b95e
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/context.go
@@ -0,0 +1,87 @@
+// Copyright 2018 Adam Tauber
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package colly
+
+import (
+ "sync"
+)
+
+// Context provides a tiny layer for passing data between callbacks
+type Context struct {
+ contextMap map[string]interface{}
+ lock *sync.RWMutex
+}
+
+// NewContext initializes a new Context instance
+func NewContext() *Context {
+ return &Context{
+ contextMap: make(map[string]interface{}),
+ lock: &sync.RWMutex{},
+ }
+}
+
+// UnmarshalBinary decodes Context value to nil
+// This function is used by request caching
+func (c *Context) UnmarshalBinary(_ []byte) error {
+ return nil
+}
+
+// MarshalBinary encodes Context value
+// This function is used by request caching
+func (c *Context) MarshalBinary() (_ []byte, _ error) {
+ return nil, nil
+}
+
+// Put stores a value of any type in Context
+func (c *Context) Put(key string, value interface{}) {
+ c.lock.Lock()
+ c.contextMap[key] = value
+ c.lock.Unlock()
+}
+
+// Get retrieves a string value from Context.
+// Get returns an empty string if key not found
+func (c *Context) Get(key string) string {
+ c.lock.RLock()
+ defer c.lock.RUnlock()
+ if v, ok := c.contextMap[key]; ok {
+ return v.(string)
+ }
+ return ""
+}
+
+// GetAny retrieves a value from Context.
+// GetAny returns nil if key not found
+func (c *Context) GetAny(key string) interface{} {
+ c.lock.RLock()
+ defer c.lock.RUnlock()
+ if v, ok := c.contextMap[key]; ok {
+ return v
+ }
+ return nil
+}
+
+// ForEach iterate context
+func (c *Context) ForEach(fn func(k string, v interface{}) interface{}) []interface{} {
+ c.lock.RLock()
+ defer c.lock.RUnlock()
+
+ ret := make([]interface{}, 0, len(c.contextMap))
+ for k, v := range c.contextMap {
+ ret = append(ret, fn(k, v))
+ }
+
+ return ret
+}
diff --git a/vendor/github.com/gocolly/colly/debug/debug.go b/vendor/github.com/gocolly/colly/debug/debug.go
new file mode 100644
index 000000000..705d0f7ae
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/debug/debug.go
@@ -0,0 +1,36 @@
+// Copyright 2018 Adam Tauber
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package debug
+
+// Event represents an action inside a collector
+type Event struct {
+ // Type is the type of the event
+ Type string
+ // RequestID identifies the HTTP request of the Event
+ RequestID uint32
+ // CollectorID identifies the collector of the Event
+ CollectorID uint32
+ // Values contains the event's key-value pairs. Different type of events
+ // can return different key-value pairs
+ Values map[string]string
+}
+
+// Debugger is an interface for different type of debugging backends
+type Debugger interface {
+ // Init initializes the backend
+ Init() error
+ // Event receives a new collector event.
+ Event(e *Event)
+}
diff --git a/vendor/github.com/gocolly/colly/debug/logdebugger.go b/vendor/github.com/gocolly/colly/debug/logdebugger.go
new file mode 100644
index 000000000..f866b6d8a
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/debug/logdebugger.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Adam Tauber
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package debug
+
+import (
+ "io"
+ "log"
+ "os"
+ "sync/atomic"
+ "time"
+)
+
+// LogDebugger is the simplest debugger which prints log messages to the STDERR
+type LogDebugger struct {
+ // Output is the log destination, anything can be used which implements them
+ // io.Writer interface. Leave it blank to use STDERR
+ Output io.Writer
+ // Prefix appears at the beginning of each generated log line
+ Prefix string
+ // Flag defines the logging properties.
+ Flag int
+ logger *log.Logger
+ counter int32
+ start time.Time
+}
+
+// Init initializes the LogDebugger
+func (l *LogDebugger) Init() error {
+ l.counter = 0
+ l.start = time.Now()
+ if l.Output == nil {
+ l.Output = os.Stderr
+ }
+ l.logger = log.New(l.Output, l.Prefix, l.Flag)
+ return nil
+}
+
+// Event receives Collector events and prints them to STDERR
+func (l *LogDebugger) Event(e *Event) {
+ i := atomic.AddInt32(&l.counter, 1)
+ l.logger.Printf("[%06d] %d [%6d - %s] %q (%s)\n", i, e.CollectorID, e.RequestID, e.Type, e.Values, time.Since(l.start))
+}
diff --git a/vendor/github.com/gocolly/colly/debug/webdebugger.go b/vendor/github.com/gocolly/colly/debug/webdebugger.go
new file mode 100644
index 000000000..e246361e1
--- /dev/null
+++ b/vendor/github.com/gocolly/colly/debug/webdebugger.go
@@ -0,0 +1,146 @@
+// Copyright 2018 Adam Tauber
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package debug
+
+import (
+ "encoding/json"
+ "log"
+ "net/http"
+ "time"
+)
+
+// WebDebugger is a web based debuging frontend for colly
+type WebDebugger struct {
+ // Address is the address of the web server. It is 127.0.0.1:7676 by default.
+ Address string
+ initialized bool
+ CurrentRequests map[uint32]requestInfo
+ RequestLog []requestInfo
+}
+
+type requestInfo struct {
+ URL string
+ Started time.Time
+ Duration time.Duration
+ ResponseStatus string
+ ID uint32
+ CollectorID uint32
+}
+
+// Init initializes the WebDebugger
+func (w *WebDebugger) Init() error {
+ if w.initialized {
+ return nil
+ }
+ defer func() {
+ w.initialized = true
+ }()
+ if w.Address == "" {
+ w.Address = "127.0.0.1:7676"
+ }
+ w.RequestLog = make([]requestInfo, 0)
+ w.CurrentRequests = make(map[uint32]requestInfo)
+ http.HandleFunc("/", w.indexHandler)
+ http.HandleFunc("/status", w.statusHandler)
+ log.Println("Starting debug webserver on", w.Address)
+ go http.ListenAndServe(w.Address, nil)
+ return nil
+}
+
+// Event updates the debugger's status
+func (w *WebDebugger) Event(e *Event) {
+ switch e.Type {
+ case "request":
+ w.CurrentRequests[e.RequestID] = requestInfo{
+ URL: e.Values["url"],
+ Started: time.Now(),
+ ID: e.RequestID,
+ CollectorID: e.CollectorID,
+ }
+ case "response", "error":
+ r := w.CurrentRequests[e.RequestID]
+ r.Duration = time.Since(r.Started)
+ r.ResponseStatus = e.Values["status"]
+ w.RequestLog = append(w.RequestLog, r)
+ delete(w.CurrentRequests, e.RequestID)
+ }
+}
+
+func (w *WebDebugger) indexHandler(wr http.ResponseWriter, r *http.Request) {
+ wr.Write([]byte(`
+
+
+
Current Requests
+
+ Finished Requests
+
+
COPYRIGHT AND PERMISSION NOTICE
+ ++Copyright (c) 1995-2012 International Business Machines Corporation and others +
++All rights reserved. +
++Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies +of the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. +
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, +OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE +USE OR PERFORMANCE OF THIS SOFTWARE. +
++Except as contained in this notice, the name of a copyright holder shall not be +used in advertising or otherwise to promote the sale, use or other dealings in +this Software without prior written authorization of the copyright holder. +
+ ++All trademarks and registered trademarks mentioned herein are the property of their respective owners. +
+ + diff --git a/vendor/github.com/saintfish/chardet/multi_byte.go b/vendor/github.com/saintfish/chardet/multi_byte.go new file mode 100644 index 000000000..1fab34ce8 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/multi_byte.go @@ -0,0 +1,345 @@ +package chardet + +import ( + "errors" + "math" +) + +type recognizerMultiByte struct { + charset string + language string + decoder charDecoder + commonChars []uint16 +} + +type charDecoder interface { + DecodeOneChar([]byte) (c uint16, remain []byte, err error) +} + +func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) { + return recognizerOutput{ + Charset: r.charset, + Language: r.language, + Confidence: r.matchConfidence(input), + } +} + +func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int { + raw := input.raw + var c uint16 + var err error + var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int + for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) { + totalCharCount++ + if err != nil { + badCharCount++ + } else if c <= 0xFF { + singleByteCharCount++ + } else { + doubleByteCharCount++ + if r.commonChars != nil && binarySearch(r.commonChars, c) { + commonCharCount++ + } + } + if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount { + return 0 + } + } + + if doubleByteCharCount <= 10 && badCharCount == 0 { + if doubleByteCharCount == 0 && totalCharCount < 10 { + return 0 + } else { + return 10 + } + } + + if doubleByteCharCount < 20*badCharCount { + return 0 + } + if r.commonChars == nil { + confidence := 30 + doubleByteCharCount - 20*badCharCount + if confidence > 100 { + confidence = 100 + } + return confidence + } + maxVal := math.Log(float64(doubleByteCharCount) / 4) + scaleFactor := 90 / maxVal + confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10) + if confidence > 100 { + confidence = 100 + } + if confidence < 0 { + confidence = 0 + } + return confidence +} + +func binarySearch(l []uint16, c uint16) bool { + start := 0 + end := len(l) - 1 + for start <= end { + mid := (start + end) / 2 + if c == l[mid] { + return true + } else if c < l[mid] { + end = mid - 1 + } else { + start = mid + 1 + } + } + return false +} + +var eobError = errors.New("End of input buffer") +var badCharError = errors.New("Decode a bad char") + +type charDecoder_sjis struct { +} + +func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + c = uint16(first) + remain = input[1:] + if first <= 0x7F || (first > 0xA0 && first <= 0xDF) { + return + } + if len(remain) == 0 { + return c, remain, badCharError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) { + } else { + err = badCharError + } + return +} + +var commonChars_sjis = []uint16{ + 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, + 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, + 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, + 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, + 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, + 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa, +} + +func newRecognizer_sjis() *recognizerMultiByte { + return &recognizerMultiByte{ + "Shift_JIS", + "ja", + charDecoder_sjis{}, + commonChars_sjis, + } +} + +type charDecoder_euc struct { +} + +func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x8D { + return uint16(first), remain, nil + } + if len(remain) == 0 { + return 0, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if first >= 0xA1 && first <= 0xFE { + if second < 0xA1 { + err = badCharError + } + return + } + if first == 0x8E { + if second < 0xA1 { + err = badCharError + } + return + } + if first == 0x8F { + if len(remain) == 0 { + return 0, nil, eobError + } + third := remain[0] + remain = remain[1:] + c = c<<0 | uint16(third) + if third < 0xa1 { + err = badCharError + } + } + return +} + +var commonChars_euc_jp = []uint16{ + 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, + 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, + 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, + 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, + 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, + 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, + 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, + 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, + 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, + 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1, +} + +var commonChars_euc_kr = []uint16{ + 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, + 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, + 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, + 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, + 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, + 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, + 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, + 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, + 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, + 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad, +} + +func newRecognizer_euc_jp() *recognizerMultiByte { + return &recognizerMultiByte{ + "EUC-JP", + "ja", + charDecoder_euc{}, + commonChars_euc_jp, + } +} + +func newRecognizer_euc_kr() *recognizerMultiByte { + return &recognizerMultiByte{ + "EUC-KR", + "ko", + charDecoder_euc{}, + commonChars_euc_kr, + } +} + +type charDecoder_big5 struct { +} + +func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x7F || first == 0xFF { + return + } + if len(remain) == 0 { + return c, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if second < 0x40 || second == 0x7F || second == 0xFF { + err = badCharError + } + return +} + +var commonChars_big5 = []uint16{ + 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, + 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, + 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, + 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, + 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, + 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, + 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, + 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, + 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, + 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f, +} + +func newRecognizer_big5() *recognizerMultiByte { + return &recognizerMultiByte{ + "Big5", + "zh", + charDecoder_big5{}, + commonChars_big5, + } +} + +type charDecoder_gb_18030 struct { +} + +func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x80 { + return + } + if len(remain) == 0 { + return 0, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if first >= 0x81 && first <= 0xFE { + if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) { + return + } + + if second >= 0x30 && second <= 0x39 { + if len(remain) == 0 { + return 0, nil, eobError + } + third := remain[0] + remain = remain[1:] + if third >= 0x81 && third <= 0xFE { + if len(remain) == 0 { + return 0, nil, eobError + } + fourth := remain[0] + remain = remain[1:] + if fourth >= 0x30 && fourth <= 0x39 { + c = c<<16 | uint16(third)<<8 | uint16(fourth) + return + } + } + } + err = badCharError + } + return +} + +var commonChars_gb_18030 = []uint16{ + 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, + 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, + 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, + 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, + 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, + 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, + 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, + 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, + 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, + 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0, +} + +func newRecognizer_gb_18030() *recognizerMultiByte { + return &recognizerMultiByte{ + "GB-18030", + "zh", + charDecoder_gb_18030{}, + commonChars_gb_18030, + } +} diff --git a/vendor/github.com/saintfish/chardet/recognizer.go b/vendor/github.com/saintfish/chardet/recognizer.go new file mode 100644 index 000000000..1bf8461c3 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/recognizer.go @@ -0,0 +1,83 @@ +package chardet + +type recognizer interface { + Match(*recognizerInput) recognizerOutput +} + +type recognizerOutput Result + +type recognizerInput struct { + raw []byte + input []byte + tagStripped bool + byteStats []int + hasC1Bytes bool +} + +func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { + input, stripped := mayStripInput(raw, stripTag) + byteStats := computeByteStats(input) + return &recognizerInput{ + raw: raw, + input: input, + tagStripped: stripped, + byteStats: byteStats, + hasC1Bytes: computeHasC1Bytes(byteStats), + } +} + +func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { + const inputBufferSize = 8192 + out = make([]byte, 0, inputBufferSize) + var badTags, openTags int32 + var inMarkup bool = false + stripped = false + if stripTag { + stripped = true + for _, c := range raw { + if c == '<' { + if inMarkup { + badTags += 1 + } + inMarkup = true + openTags += 1 + } + if !inMarkup { + out = append(out, c) + if len(out) >= inputBufferSize { + break + } + } + if c == '>' { + inMarkup = false + } + } + } + if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { + limit := len(raw) + if limit > inputBufferSize { + limit = inputBufferSize + } + out = make([]byte, limit) + copy(out, raw[:limit]) + stripped = false + } + return +} + +func computeByteStats(input []byte) []int { + r := make([]int, 256) + for _, c := range input { + r[c] += 1 + } + return r +} + +func computeHasC1Bytes(byteStats []int) bool { + for _, count := range byteStats[0x80 : 0x9F+1] { + if count > 0 { + return true + } + } + return false +} diff --git a/vendor/github.com/saintfish/chardet/single_byte.go b/vendor/github.com/saintfish/chardet/single_byte.go new file mode 100644 index 000000000..efe41c901 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/single_byte.go @@ -0,0 +1,882 @@ +package chardet + +// Recognizer for single byte charset family +type recognizerSingleByte struct { + charset string + hasC1ByteCharset string + language string + charMap *[256]byte + ngram *[64]uint32 +} + +func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { + var charset string = r.charset + if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { + charset = r.hasC1ByteCharset + } + return recognizerOutput{ + Charset: charset, + Language: r.language, + Confidence: r.parseNgram(input.input), + } +} + +type ngramState struct { + ngram uint32 + ignoreSpace bool + ngramCount, ngramHit uint32 + table *[64]uint32 +} + +func newNgramState(table *[64]uint32) *ngramState { + return &ngramState{ + ngram: 0, + ignoreSpace: false, + ngramCount: 0, + ngramHit: 0, + table: table, + } +} + +func (s *ngramState) AddByte(b byte) { + const ngramMask = 0xFFFFFF + if !(b == 0x20 && s.ignoreSpace) { + s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask + s.ignoreSpace = (s.ngram == 0x20) + s.ngramCount++ + if s.lookup() { + s.ngramHit++ + } + } + s.ignoreSpace = (b == 0x20) +} + +func (s *ngramState) HitRate() float32 { + if s.ngramCount == 0 { + return 0 + } + return float32(s.ngramHit) / float32(s.ngramCount) +} + +func (s *ngramState) lookup() bool { + var index int + if s.table[index+32] <= s.ngram { + index += 32 + } + if s.table[index+16] <= s.ngram { + index += 16 + } + if s.table[index+8] <= s.ngram { + index += 8 + } + if s.table[index+4] <= s.ngram { + index += 4 + } + if s.table[index+2] <= s.ngram { + index += 2 + } + if s.table[index+1] <= s.ngram { + index += 1 + } + if s.table[index] > s.ngram { + index -= 1 + } + if index < 0 || s.table[index] != s.ngram { + return false + } + return true +} + +func (r *recognizerSingleByte) parseNgram(input []byte) int { + state := newNgramState(r.ngram) + for _, inChar := range input { + c := r.charMap[inChar] + if c != 0 { + state.AddByte(c) + } + } + state.AddByte(0x20) + rate := state.HitRate() + if rate > 0.33 { + return 98 + } + return int(rate * 300) +} + +var charMap_8859_1 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_8859_1_en = [64]uint32{ + 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, + 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, + 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, + 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, +} + +var ngrams_8859_1_da = [64]uint32{ + 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, + 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, + 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, + 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, +} + +var ngrams_8859_1_de = [64]uint32{ + 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, + 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, + 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, + 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, +} + +var ngrams_8859_1_es = [64]uint32{ + 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, + 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, + 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, +} + +var ngrams_8859_1_fr = [64]uint32{ + 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, + 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, + 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, + 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, +} + +var ngrams_8859_1_it = [64]uint32{ + 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, + 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, + 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, + 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, +} + +var ngrams_8859_1_nl = [64]uint32{ + 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, + 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, + 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, + 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, +} + +var ngrams_8859_1_no = [64]uint32{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, + 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, + 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, +} + +var ngrams_8859_1_pt = [64]uint32{ + 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, + 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, + 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, +} + +var ngrams_8859_1_sv = [64]uint32{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, + 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, + 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, +} + +func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-1", + hasC1ByteCharset: "windows-1252", + language: language, + charMap: &charMap_8859_1, + ngram: ngram, + } +} + +func newRecognizer_8859_1_en() *recognizerSingleByte { + return newRecognizer_8859_1("en", &ngrams_8859_1_en) +} +func newRecognizer_8859_1_da() *recognizerSingleByte { + return newRecognizer_8859_1("da", &ngrams_8859_1_da) +} +func newRecognizer_8859_1_de() *recognizerSingleByte { + return newRecognizer_8859_1("de", &ngrams_8859_1_de) +} +func newRecognizer_8859_1_es() *recognizerSingleByte { + return newRecognizer_8859_1("es", &ngrams_8859_1_es) +} +func newRecognizer_8859_1_fr() *recognizerSingleByte { + return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) +} +func newRecognizer_8859_1_it() *recognizerSingleByte { + return newRecognizer_8859_1("it", &ngrams_8859_1_it) +} +func newRecognizer_8859_1_nl() *recognizerSingleByte { + return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) +} +func newRecognizer_8859_1_no() *recognizerSingleByte { + return newRecognizer_8859_1("no", &ngrams_8859_1_no) +} +func newRecognizer_8859_1_pt() *recognizerSingleByte { + return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) +} +func newRecognizer_8859_1_sv() *recognizerSingleByte { + return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) +} + +var charMap_8859_2 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, + 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, + 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, + 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, +} + +var ngrams_8859_2_cs = [64]uint32{ + 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, + 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, + 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, + 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, +} + +var ngrams_8859_2_hu = [64]uint32{ + 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, + 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, + 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, + 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, +} + +var ngrams_8859_2_pl = [64]uint32{ + 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, + 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, + 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, + 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, +} + +var ngrams_8859_2_ro = [64]uint32{ + 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, + 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, + 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, + 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, +} + +func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-2", + hasC1ByteCharset: "windows-1250", + language: language, + charMap: &charMap_8859_2, + ngram: ngram, + } +} + +func newRecognizer_8859_2_cs() *recognizerSingleByte { + return newRecognizer_8859_1("cs", &ngrams_8859_2_cs) +} +func newRecognizer_8859_2_hu() *recognizerSingleByte { + return newRecognizer_8859_1("hu", &ngrams_8859_2_hu) +} +func newRecognizer_8859_2_pl() *recognizerSingleByte { + return newRecognizer_8859_1("pl", &ngrams_8859_2_pl) +} +func newRecognizer_8859_2_ro() *recognizerSingleByte { + return newRecognizer_8859_1("ro", &ngrams_8859_2_ro) +} + +var charMap_8859_5 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, +} + +var ngrams_8859_5_ru = [64]uint32{ + 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, + 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, + 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, + 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, +} + +func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-5", + language: language, + charMap: &charMap_8859_5, + ngram: ngram, + } +} + +func newRecognizer_8859_5_ru() *recognizerSingleByte { + return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) +} + +var charMap_8859_6 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, +} + +var ngrams_8859_6_ar = [64]uint32{ + 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, + 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, + 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, + 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, +} + +func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-6", + language: language, + charMap: &charMap_8859_6, + ngram: ngram, + } +} + +func newRecognizer_8859_6_ar() *recognizerSingleByte { + return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) +} + +var charMap_8859_7 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, + 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, + 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, +} + +var ngrams_8859_7_el = [64]uint32{ + 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, + 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, + 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, + 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, +} + +func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-7", + hasC1ByteCharset: "windows-1253", + language: language, + charMap: &charMap_8859_7, + ngram: ngram, + } +} + +func newRecognizer_8859_7_el() *recognizerSingleByte { + return newRecognizer_8859_7("el", &ngrams_8859_7_el) +} + +var charMap_8859_8 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, +} + +var ngrams_8859_8_I_he = [64]uint32{ + 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, + 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, + 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, + 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, +} + +var ngrams_8859_8_he = [64]uint32{ + 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, + 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, + 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, + 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, +} + +func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-8", + hasC1ByteCharset: "windows-1255", + language: language, + charMap: &charMap_8859_8, + ngram: ngram, + } +} + +func newRecognizer_8859_8_I_he() *recognizerSingleByte { + r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) + r.charset = "ISO-8859-8-I" + return r +} + +func newRecognizer_8859_8_he() *recognizerSingleByte { + return newRecognizer_8859_8("he", &ngrams_8859_8_he) +} + +var charMap_8859_9 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_8859_9_tr = [64]uint32{ + 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, + 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, + 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, + 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, +} + +func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-9", + hasC1ByteCharset: "windows-1254", + language: language, + charMap: &charMap_8859_9, + ngram: ngram, + } +} + +func newRecognizer_8859_9_tr() *recognizerSingleByte { + return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) +} + +var charMap_windows_1256 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, + 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, + 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, + 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, +} + +var ngrams_windows_1256 = [64]uint32{ + 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, + 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, + 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, + 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, +} + +func newRecognizer_windows_1256() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "windows-1256", + language: "ar", + charMap: &charMap_windows_1256, + ngram: &ngrams_windows_1256, + } +} + +var charMap_windows_1251 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, + 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, + 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, + 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, + 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, + 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_windows_1251 = [64]uint32{ + 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, + 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, + 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, + 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, +} + +func newRecognizer_windows_1251() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "windows-1251", + language: "ar", + charMap: &charMap_windows_1251, + ngram: &ngrams_windows_1251, + } +} + +var charMap_KOI8_R = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, +} + +var ngrams_KOI8_R = [64]uint32{ + 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, + 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, + 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, + 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, +} + +func newRecognizer_KOI8_R() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "KOI8-R", + language: "ru", + charMap: &charMap_KOI8_R, + ngram: &ngrams_KOI8_R, + } +} + +var charMap_IBM424_he = [256]byte{ + /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ + /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, + /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, +} + +var ngrams_IBM424_he_rtl = [64]uint32{ + 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, + 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, + 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, + 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, +} + +var ngrams_IBM424_he_ltr = [64]uint32{ + 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, + 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, + 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, + 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, +} + +func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: charset, + language: "he", + charMap: &charMap_IBM424_he, + ngram: ngram, + } +} + +func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { + return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) +} + +func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { + return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) +} + +var charMap_IBM420_ar = [256]byte{ + /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ + /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, + /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, + /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, +} + +var ngrams_IBM420_ar_rtl = [64]uint32{ + 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, + 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, + 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, + 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, +} + +var ngrams_IBM420_ar_ltr = [64]uint32{ + 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, + 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, + 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, + 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, +} + +func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: charset, + language: "ar", + charMap: &charMap_IBM420_ar, + ngram: ngram, + } +} + +func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { + return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) +} + +func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { + return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) +} diff --git a/vendor/github.com/saintfish/chardet/unicode.go b/vendor/github.com/saintfish/chardet/unicode.go new file mode 100644 index 000000000..6f9fa9e67 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/unicode.go @@ -0,0 +1,103 @@ +package chardet + +import ( + "bytes" +) + +var ( + utf16beBom = []byte{0xFE, 0xFF} + utf16leBom = []byte{0xFF, 0xFE} + utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} + utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} +) + +type recognizerUtf16be struct { +} + +func newRecognizer_utf16be() *recognizerUtf16be { + return &recognizerUtf16be{} +} + +func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-16BE", + } + if bytes.HasPrefix(input.raw, utf16beBom) { + output.Confidence = 100 + } + return +} + +type recognizerUtf16le struct { +} + +func newRecognizer_utf16le() *recognizerUtf16le { + return &recognizerUtf16le{} +} + +func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-16LE", + } + if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { + output.Confidence = 100 + } + return +} + +type recognizerUtf32 struct { + name string + bom []byte + decodeChar func(input []byte) uint32 +} + +func decodeUtf32be(input []byte) uint32 { + return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) +} + +func decodeUtf32le(input []byte) uint32 { + return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) +} + +func newRecognizer_utf32be() *recognizerUtf32 { + return &recognizerUtf32{ + "UTF-32BE", + utf32beBom, + decodeUtf32be, + } +} + +func newRecognizer_utf32le() *recognizerUtf32 { + return &recognizerUtf32{ + "UTF-32LE", + utf32leBom, + decodeUtf32le, + } +} + +func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: r.name, + } + hasBom := bytes.HasPrefix(input.raw, r.bom) + var numValid, numInvalid uint32 + for b := input.raw; len(b) >= 4; b = b[4:] { + if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { + numInvalid++ + } else { + numValid++ + } + } + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} diff --git a/vendor/github.com/saintfish/chardet/utf8.go b/vendor/github.com/saintfish/chardet/utf8.go new file mode 100644 index 000000000..ae036ad9b --- /dev/null +++ b/vendor/github.com/saintfish/chardet/utf8.go @@ -0,0 +1,71 @@ +package chardet + +import ( + "bytes" +) + +var utf8Bom = []byte{0xEF, 0xBB, 0xBF} + +type recognizerUtf8 struct { +} + +func newRecognizer_utf8() *recognizerUtf8 { + return &recognizerUtf8{} +} + +func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-8", + } + hasBom := bytes.HasPrefix(input.raw, utf8Bom) + inputLen := len(input.raw) + var numValid, numInvalid uint32 + var trailBytes uint8 + for i := 0; i < inputLen; i++ { + c := input.raw[i] + if c&0x80 == 0 { + continue + } + if c&0xE0 == 0xC0 { + trailBytes = 1 + } else if c&0xF0 == 0xE0 { + trailBytes = 2 + } else if c&0xF8 == 0xF0 { + trailBytes = 3 + } else { + numInvalid++ + if numInvalid > 5 { + break + } + trailBytes = 0 + } + + for i++; i < inputLen; i++ { + c = input.raw[i] + if c&0xC0 != 0x80 { + numInvalid++ + break + } + if trailBytes--; trailBytes == 0 { + numValid++ + break + } + } + } + + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid == 0 && numInvalid == 0 { + // Plain ASCII + output.Confidence = 10 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} diff --git a/vendor/github.com/temoto/robotstxt/.gitignore b/vendor/github.com/temoto/robotstxt/.gitignore new file mode 100644 index 000000000..2ef152f63 --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/.gitignore @@ -0,0 +1,9 @@ +*.cgo?.* +*.o +*.so +*.sublime-* +.DS_Store +_cgo_* +_obj +_test +coverage.txt diff --git a/vendor/github.com/temoto/robotstxt/.travis.yml b/vendor/github.com/temoto/robotstxt/.travis.yml new file mode 100644 index 000000000..94d72beaa --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/.travis.yml @@ -0,0 +1,30 @@ +language: go +sudo: false + +cache: + go: true + directories: + - "$HOME/.cache" +go: +- master +- 1.10.x +- 1.9 +- 1.8 +- 1.7 +- 1.6 + +matrix: + include: + - go: 1.10.x + env: task=bench + - go: 1.7 + env: task=bench + - go: master + env: task=bench + - go: master + env: task=clean + +install: + - go get -u github.com/alecthomas/gometalinter +script: ./script/${task:-test} +after_success: if [[ -z "$task" ]] ; then bash <(curl -s https://codecov.io/bash) ; fi diff --git a/vendor/github.com/temoto/robotstxt/LICENSE b/vendor/github.com/temoto/robotstxt/LICENSE new file mode 100644 index 000000000..c125145b6 --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2010 Sergey Shepelev