summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/internal/triegen
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/internal/triegen')
-rw-r--r--vendor/golang.org/x/text/internal/triegen/LICENSE27
-rw-r--r--vendor/golang.org/x/text/internal/triegen/compact.go58
-rw-r--r--vendor/golang.org/x/text/internal/triegen/print.go251
-rw-r--r--vendor/golang.org/x/text/internal/triegen/triegen.go494
4 files changed, 0 insertions, 830 deletions
diff --git a/vendor/golang.org/x/text/internal/triegen/LICENSE b/vendor/golang.org/x/text/internal/triegen/LICENSE
deleted file mode 100644
index 6a66aea5..00000000
--- a/vendor/golang.org/x/text/internal/triegen/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go
deleted file mode 100644
index 397b975c..00000000
--- a/vendor/golang.org/x/text/internal/triegen/compact.go
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package triegen
-
-// This file defines Compacter and its implementations.
-
-import "io"
-
-// A Compacter generates an alternative, more space-efficient way to store a
-// trie value block. A trie value block holds all possible values for the last
-// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
-// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
-type Compacter interface {
- // Size returns whether the Compacter could encode the given block as well
- // as its size in case it can. len(v) is always 64.
- Size(v []uint64) (sz int, ok bool)
-
- // Store stores the block using the Compacter's compression method.
- // It returns a handle with which the block can be retrieved.
- // len(v) is always 64.
- Store(v []uint64) uint32
-
- // Print writes the data structures associated to the given store to w.
- Print(w io.Writer) error
-
- // Handler returns the name of a function that gets called during trie
- // lookup for blocks generated by the Compacter. The function should be of
- // the form func (n uint32, b byte) uint64, where n is the index returned by
- // the Compacter's Store method and b is the last byte of the UTF-8
- // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
- // block.
- Handler() string
-}
-
-// simpleCompacter is the default Compacter used by builder. It implements a
-// normal trie block.
-type simpleCompacter builder
-
-func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
- return blockSize * b.ValueSize, true
-}
-
-func (b *simpleCompacter) Store(v []uint64) uint32 {
- h := uint32(len(b.ValueBlocks) - blockOffset)
- b.ValueBlocks = append(b.ValueBlocks, v)
- return h
-}
-
-func (b *simpleCompacter) Print(io.Writer) error {
- // Structures are printed in print.go.
- return nil
-}
-
-func (b *simpleCompacter) Handler() string {
- panic("Handler should be special-cased for this Compacter")
-}
diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go
deleted file mode 100644
index 8d9f120b..00000000
--- a/vendor/golang.org/x/text/internal/triegen/print.go
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package triegen
-
-import (
- "bytes"
- "fmt"
- "io"
- "strings"
- "text/template"
-)
-
-// print writes all the data structures as well as the code necessary to use the
-// trie to w.
-func (b *builder) print(w io.Writer) error {
- b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
- b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
- b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
- b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
- b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
-
- // If we only have one root trie, all starter blocks are at position 0 and
- // we can access the arrays directly.
- if len(b.Trie) == 1 {
- // At this point we cannot refer to the generated tables directly.
- b.ASCIIBlock = b.Name + "Values"
- b.StarterBlock = b.Name + "Index"
- } else {
- // Otherwise we need to have explicit starter indexes in the trie
- // structure.
- b.ASCIIBlock = "t.ascii"
- b.StarterBlock = "t.utf8Start"
- }
-
- b.SourceType = "[]byte"
- if err := lookupGen.Execute(w, b); err != nil {
- return err
- }
-
- b.SourceType = "string"
- if err := lookupGen.Execute(w, b); err != nil {
- return err
- }
-
- if err := trieGen.Execute(w, b); err != nil {
- return err
- }
-
- for _, c := range b.Compactions {
- if err := c.c.Print(w); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func printValues(n int, values []uint64) string {
- w := &bytes.Buffer{}
- boff := n * blockSize
- fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
- var newline bool
- for i, v := range values {
- if i%6 == 0 {
- newline = true
- }
- if v != 0 {
- if newline {
- fmt.Fprintf(w, "\n")
- newline = false
- }
- fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
- }
- }
- return w.String()
-}
-
-func printIndex(b *builder, nr int, n *node) string {
- w := &bytes.Buffer{}
- boff := nr * blockSize
- fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
- var newline bool
- for i, c := range n.children {
- if i%8 == 0 {
- newline = true
- }
- if c != nil {
- v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
- if v != 0 {
- if newline {
- fmt.Fprintf(w, "\n")
- newline = false
- }
- fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
- }
- }
- }
- return w.String()
-}
-
-var (
- trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
- "printValues": printValues,
- "printIndex": printIndex,
- "title": strings.Title,
- "dec": func(x int) int { return x - 1 },
- "psize": func(n int) string {
- return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
- },
- }).Parse(trieTemplate))
- lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
-)
-
-// TODO: consider the return type of lookup. It could be uint64, even if the
-// internal value type is smaller. We will have to verify this with the
-// performance of unicode/norm, which is very sensitive to such changes.
-const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
-// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
-type {{.Name}}Trie struct { {{if $multi}}
- ascii []{{.ValueType}} // index for ASCII bytes
- utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
-{{end}}}
-
-func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
- h := {{.Name}}TrieHandles[i]
- return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
-}
-
-type {{.Name}}TrieHandle struct {
- ascii, multi {{.IndexType}}
-}
-
-// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
-var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
-{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
-{{end}}}{{else}}
- return &{{.Name}}Trie{}
-}
-{{end}}
-// lookupValue determines the type of block n and looks up the value for b.
-func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
- switch { {{range $i, $c := .Compactions}}
- {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
- n -= {{$c.Offset}}{{end}}
- return {{print $b.ValueType}}({{$c.Handler}}){{end}}
- }
-}
-
-// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
-// The third block is the zero block.
-var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
-{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
-{{end}}}
-
-// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
-// Block 0 is the zero block.
-var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
-{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
-{{end}}}
-`
-
-// TODO: consider allowing zero-length strings after evaluating performance with
-// unicode/norm.
-const lookupTemplate = `
-// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
-// the width in bytes of this encoding. The size will be 0 if s does not
-// hold enough bytes to complete the encoding. len(s) must be greater than 0.
-func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
- c0 := s[0]
- switch {
- case c0 < 0x80: // is ASCII
- return {{.ASCIIBlock}}[c0], 1
- case c0 < 0xC2:
- return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
- case c0 < 0xE0: // 2-byte UTF-8
- if len(s) < 2 {
- return 0, 0
- }
- i := {{.StarterBlock}}[c0]
- c1 := s[1]
- if c1 < 0x80 || 0xC0 <= c1 {
- return 0, 1 // Illegal UTF-8: not a continuation byte.
- }
- return t.lookupValue(uint32(i), c1), 2
- case c0 < 0xF0: // 3-byte UTF-8
- if len(s) < 3 {
- return 0, 0
- }
- i := {{.StarterBlock}}[c0]
- c1 := s[1]
- if c1 < 0x80 || 0xC0 <= c1 {
- return 0, 1 // Illegal UTF-8: not a continuation byte.
- }
- o := uint32(i)<<6 + uint32(c1)
- i = {{.Name}}Index[o]
- c2 := s[2]
- if c2 < 0x80 || 0xC0 <= c2 {
- return 0, 2 // Illegal UTF-8: not a continuation byte.
- }
- return t.lookupValue(uint32(i), c2), 3
- case c0 < 0xF8: // 4-byte UTF-8
- if len(s) < 4 {
- return 0, 0
- }
- i := {{.StarterBlock}}[c0]
- c1 := s[1]
- if c1 < 0x80 || 0xC0 <= c1 {
- return 0, 1 // Illegal UTF-8: not a continuation byte.
- }
- o := uint32(i)<<6 + uint32(c1)
- i = {{.Name}}Index[o]
- c2 := s[2]
- if c2 < 0x80 || 0xC0 <= c2 {
- return 0, 2 // Illegal UTF-8: not a continuation byte.
- }
- o = uint32(i)<<6 + uint32(c2)
- i = {{.Name}}Index[o]
- c3 := s[3]
- if c3 < 0x80 || 0xC0 <= c3 {
- return 0, 3 // Illegal UTF-8: not a continuation byte.
- }
- return t.lookupValue(uint32(i), c3), 4
- }
- // Illegal rune
- return 0, 1
-}
-
-// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
-// s must start with a full and valid UTF-8 encoded rune.
-func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
- c0 := s[0]
- if c0 < 0x80 { // is ASCII
- return {{.ASCIIBlock}}[c0]
- }
- i := {{.StarterBlock}}[c0]
- if c0 < 0xE0 { // 2-byte UTF-8
- return t.lookupValue(uint32(i), s[1])
- }
- i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
- if c0 < 0xF0 { // 3-byte UTF-8
- return t.lookupValue(uint32(i), s[2])
- }
- i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
- if c0 < 0xF8 { // 4-byte UTF-8
- return t.lookupValue(uint32(i), s[3])
- }
- return 0
-}
-`
diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go
deleted file mode 100644
index adb01081..00000000
--- a/vendor/golang.org/x/text/internal/triegen/triegen.go
+++ /dev/null
@@ -1,494 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package triegen implements a code generator for a trie for associating
-// unsigned integer values with UTF-8 encoded runes.
-//
-// Many of the go.text packages use tries for storing per-rune information. A
-// trie is especially useful if many of the runes have the same value. If this
-// is the case, many blocks can be expected to be shared allowing for
-// information on many runes to be stored in little space.
-//
-// As most of the lookups are done directly on []byte slices, the tries use the
-// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
-// runes and contributes a little bit to better performance. It also naturally
-// provides a fast path for ASCII.
-//
-// Space is also an issue. There are many code points defined in Unicode and as
-// a result tables can get quite large. So every byte counts. The triegen
-// package automatically chooses the smallest integer values to represent the
-// tables. Compacters allow further compression of the trie by allowing for
-// alternative representations of individual trie blocks.
-//
-// triegen allows generating multiple tries as a single structure. This is
-// useful when, for example, one wants to generate tries for several languages
-// that have a lot of values in common. Some existing libraries for
-// internationalization store all per-language data as a dynamically loadable
-// chunk. The go.text packages are designed with the assumption that the user
-// typically wants to compile in support for all supported languages, in line
-// with the approach common to Go to create a single standalone binary. The
-// multi-root trie approach can give significant storage savings in this
-// scenario.
-//
-// triegen generates both tables and code. The code is optimized to use the
-// automatically chosen data types. The following code is generated for a Trie
-// or multiple Tries named "foo":
-// - type fooTrie
-// The trie type.
-//
-// - func newFooTrie(x int) *fooTrie
-// Trie constructor, where x is the index of the trie passed to Gen.
-//
-// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
-// The lookup method, where uintX is automatically chosen.
-//
-// - func lookupString, lookupUnsafe and lookupStringUnsafe
-// Variants of the above.
-//
-// - var fooValues and fooIndex and any tables generated by Compacters.
-// The core trie data.
-//
-// - var fooTrieHandles
-// Indexes of starter blocks in case of multiple trie roots.
-//
-// It is recommended that users test the generated trie by checking the returned
-// value for every rune. Such exhaustive tests are possible as the the number of
-// runes in Unicode is limited.
-package triegen // import "golang.org/x/text/internal/triegen"
-
-// TODO: Arguably, the internally optimized data types would not have to be
-// exposed in the generated API. We could also investigate not generating the
-// code, but using it through a package. We would have to investigate the impact
-// on performance of making such change, though. For packages like unicode/norm,
-// small changes like this could tank performance.
-
-import (
- "encoding/binary"
- "fmt"
- "hash/crc64"
- "io"
- "log"
- "unicode/utf8"
-)
-
-// builder builds a set of tries for associating values with runes. The set of
-// tries can share common index and value blocks.
-type builder struct {
- Name string
-
- // ValueType is the type of the trie values looked up.
- ValueType string
-
- // ValueSize is the byte size of the ValueType.
- ValueSize int
-
- // IndexType is the type of trie index values used for all UTF-8 bytes of
- // a rune except the last one.
- IndexType string
-
- // IndexSize is the byte size of the IndexType.
- IndexSize int
-
- // SourceType is used when generating the lookup functions. If the user
- // requests StringSupport, all lookup functions will be generated for
- // string input as well.
- SourceType string
-
- Trie []*Trie
-
- IndexBlocks []*node
- ValueBlocks [][]uint64
- Compactions []compaction
- Checksum uint64
-
- ASCIIBlock string
- StarterBlock string
-
- indexBlockIdx map[uint64]int
- valueBlockIdx map[uint64]nodeIndex
- asciiBlockIdx map[uint64]int
-
- // Stats are used to fill out the template.
- Stats struct {
- NValueEntries int
- NValueBytes int
- NIndexEntries int
- NIndexBytes int
- NHandleBytes int
- }
-
- err error
-}
-
-// A nodeIndex encodes the index of a node, which is defined by the compaction
-// which stores it and an index within the compaction. For internal nodes, the
-// compaction is always 0.
-type nodeIndex struct {
- compaction int
- index int
-}
-
-// compaction keeps track of stats used for the compaction.
-type compaction struct {
- c Compacter
- blocks []*node
- maxHandle uint32
- totalSize int
-
- // Used by template-based generator and thus exported.
- Cutoff uint32
- Offset uint32
- Handler string
-}
-
-func (b *builder) setError(err error) {
- if b.err == nil {
- b.err = err
- }
-}
-
-// An Option can be passed to Gen.
-type Option func(b *builder) error
-
-// Compact configures the trie generator to use the given Compacter.
-func Compact(c Compacter) Option {
- return func(b *builder) error {
- b.Compactions = append(b.Compactions, compaction{
- c: c,
- Handler: c.Handler() + "(n, b)"})
- return nil
- }
-}
-
-// Gen writes Go code for a shared trie lookup structure to w for the given
-// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
-// return the *nameTrie for tries[x]. A value can be looked up by using one of
-// the various lookup methods defined on nameTrie. It returns the table size of
-// the generated trie.
-func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
- // The index contains two dummy blocks, followed by the zero block. The zero
- // block is at offset 0x80, so that the offset for the zero block for
- // continuation bytes is 0.
- b := &builder{
- Name: name,
- Trie: tries,
- IndexBlocks: []*node{{}, {}, {}},
- Compactions: []compaction{{
- Handler: name + "Values[n<<6+uint32(b)]",
- }},
- // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
- // block.
- indexBlockIdx: map[uint64]int{0: 0},
- valueBlockIdx: map[uint64]nodeIndex{0: {}},
- asciiBlockIdx: map[uint64]int{},
- }
- b.Compactions[0].c = (*simpleCompacter)(b)
-
- for _, f := range opts {
- if err := f(b); err != nil {
- return 0, err
- }
- }
- b.build()
- if b.err != nil {
- return 0, b.err
- }
- if err = b.print(w); err != nil {
- return 0, err
- }
- return b.Size(), nil
-}
-
-// A Trie represents a single root node of a trie. A builder may build several
-// overlapping tries at once.
-type Trie struct {
- root *node
-
- hiddenTrie
-}
-
-// hiddenTrie contains values we want to be visible to the template generator,
-// but hidden from the API documentation.
-type hiddenTrie struct {
- Name string
- Checksum uint64
- ASCIIIndex int
- StarterIndex int
-}
-
-// NewTrie returns a new trie root.
-func NewTrie(name string) *Trie {
- return &Trie{
- &node{
- children: make([]*node, blockSize),
- values: make([]uint64, utf8.RuneSelf),
- },
- hiddenTrie{Name: name},
- }
-}
-
-// Gen is a convenience wrapper around the Gen func passing t as the only trie
-// and uses the name passed to NewTrie. It returns the size of the generated
-// tables.
-func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
- return Gen(w, t.Name, []*Trie{t}, opts...)
-}
-
-// node is a node of the intermediate trie structure.
-type node struct {
- // children holds this node's children. It is always of length 64.
- // A child node may be nil.
- children []*node
-
- // values contains the values of this node. If it is non-nil, this node is
- // either a root or leaf node:
- // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
- // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
- values []uint64
-
- index nodeIndex
-}
-
-// Insert associates value with the given rune. Insert will panic if a non-zero
-// value is passed for an invalid rune.
-func (t *Trie) Insert(r rune, value uint64) {
- if value == 0 {
- return
- }
- s := string(r)
- if []rune(s)[0] != r && value != 0 {
- // Note: The UCD tables will always assign what amounts to a zero value
- // to a surrogate. Allowing a zero value for an illegal rune allows
- // users to iterate over [0..MaxRune] without having to explicitly
- // exclude surrogates, which would be tedious.
- panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
- }
- if len(s) == 1 {
- // It is a root node value (ASCII).
- t.root.values[s[0]] = value
- return
- }
-
- n := t.root
- for ; len(s) > 1; s = s[1:] {
- if n.children == nil {
- n.children = make([]*node, blockSize)
- }
- p := s[0] % blockSize
- c := n.children[p]
- if c == nil {
- c = &node{}
- n.children[p] = c
- }
- if len(s) > 2 && c.values != nil {
- log.Fatalf("triegen: insert(%U): found internal node with values", r)
- }
- n = c
- }
- if n.values == nil {
- n.values = make([]uint64, blockSize)
- }
- if n.children != nil {
- log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
- }
- n.values[s[0]-0x80] = value
-}
-
-// Size returns the number of bytes the generated trie will take to store. It
-// needs to be exported as it is used in the templates.
-func (b *builder) Size() int {
- // Index blocks.
- sz := len(b.IndexBlocks) * blockSize * b.IndexSize
-
- // Skip the first compaction, which represents the normal value blocks, as
- // its totalSize does not account for the ASCII blocks, which are managed
- // separately.
- sz += len(b.ValueBlocks) * blockSize * b.ValueSize
- for _, c := range b.Compactions[1:] {
- sz += c.totalSize
- }
-
- // TODO: this computation does not account for the fixed overhead of a using
- // a compaction, either code or data. As for data, though, the typical
- // overhead of data is in the order of bytes (2 bytes for cases). Further,
- // the savings of using a compaction should anyway be substantial for it to
- // be worth it.
-
- // For multi-root tries, we also need to account for the handles.
- if len(b.Trie) > 1 {
- sz += 2 * b.IndexSize * len(b.Trie)
- }
- return sz
-}
-
-func (b *builder) build() {
- // Compute the sizes of the values.
- var vmax uint64
- for _, t := range b.Trie {
- vmax = maxValue(t.root, vmax)
- }
- b.ValueType, b.ValueSize = getIntType(vmax)
-
- // Compute all block allocations.
- // TODO: first compute the ASCII blocks for all tries and then the other
- // nodes. ASCII blocks are more restricted in placement, as they require two
- // blocks to be placed consecutively. Processing them first may improve
- // sharing (at least one zero block can be expected to be saved.)
- for _, t := range b.Trie {
- b.Checksum += b.buildTrie(t)
- }
-
- // Compute the offsets for all the Compacters.
- offset := uint32(0)
- for i := range b.Compactions {
- c := &b.Compactions[i]
- c.Offset = offset
- offset += c.maxHandle + 1
- c.Cutoff = offset
- }
-
- // Compute the sizes of indexes.
- // TODO: different byte positions could have different sizes. So far we have
- // not found a case where this is beneficial.
- imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
- for _, ib := range b.IndexBlocks {
- if x := uint64(ib.index.index); x > imax {
- imax = x
- }
- }
- b.IndexType, b.IndexSize = getIntType(imax)
-}
-
-func maxValue(n *node, max uint64) uint64 {
- if n == nil {
- return max
- }
- for _, c := range n.children {
- max = maxValue(c, max)
- }
- for _, v := range n.values {
- if max < v {
- max = v
- }
- }
- return max
-}
-
-func getIntType(v uint64) (string, int) {
- switch {
- case v < 1<<8:
- return "uint8", 1
- case v < 1<<16:
- return "uint16", 2
- case v < 1<<32:
- return "uint32", 4
- }
- return "uint64", 8
-}
-
-const (
- blockSize = 64
-
- // Subtract two blocks to offset 0x80, the first continuation byte.
- blockOffset = 2
-
- // Subtract three blocks to offset 0xC0, the first non-ASCII starter.
- rootBlockOffset = 3
-)
-
-var crcTable = crc64.MakeTable(crc64.ISO)
-
-func (b *builder) buildTrie(t *Trie) uint64 {
- n := t.root
-
- // Get the ASCII offset. For the first trie, the ASCII block will be at
- // position 0.
- hasher := crc64.New(crcTable)
- binary.Write(hasher, binary.BigEndian, n.values)
- hash := hasher.Sum64()
-
- v, ok := b.asciiBlockIdx[hash]
- if !ok {
- v = len(b.ValueBlocks)
- b.asciiBlockIdx[hash] = v
-
- b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
- if v == 0 {
- // Add the zero block at position 2 so that it will be assigned a
- // zero reference in the lookup blocks.
- // TODO: always do this? This would allow us to remove a check from
- // the trie lookup, but at the expense of extra space. Analyze
- // performance for unicode/norm.
- b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
- }
- }
- t.ASCIIIndex = v
-
- // Compute remaining offsets.
- t.Checksum = b.computeOffsets(n, true)
- // We already subtracted the normal blockOffset from the index. Subtract the
- // difference for starter bytes.
- t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
- return t.Checksum
-}
-
-func (b *builder) computeOffsets(n *node, root bool) uint64 {
- // For the first trie, the root lookup block will be at position 3, which is
- // the offset for UTF-8 non-ASCII starter bytes.
- first := len(b.IndexBlocks) == rootBlockOffset
- if first {
- b.IndexBlocks = append(b.IndexBlocks, n)
- }
-
- // We special-case the cases where all values recursively are 0. This allows
- // for the use of a zero block to which all such values can be directed.
- hash := uint64(0)
- if n.children != nil || n.values != nil {
- hasher := crc64.New(crcTable)
- for _, c := range n.children {
- var v uint64
- if c != nil {
- v = b.computeOffsets(c, false)
- }
- binary.Write(hasher, binary.BigEndian, v)
- }
- binary.Write(hasher, binary.BigEndian, n.values)
- hash = hasher.Sum64()
- }
-
- if first {
- b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
- }
-
- // Compacters don't apply to internal nodes.
- if n.children != nil {
- v, ok := b.indexBlockIdx[hash]
- if !ok {
- v = len(b.IndexBlocks) - blockOffset
- b.IndexBlocks = append(b.IndexBlocks, n)
- b.indexBlockIdx[hash] = v
- }
- n.index = nodeIndex{0, v}
- } else {
- h, ok := b.valueBlockIdx[hash]
- if !ok {
- bestI, bestSize := 0, blockSize*b.ValueSize
- for i, c := range b.Compactions[1:] {
- if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
- bestI, bestSize = i+1, sz
- }
- }
- c := &b.Compactions[bestI]
- c.totalSize += bestSize
- v := c.c.Store(n.values)
- if c.maxHandle < v {
- c.maxHandle = v
- }
- h = nodeIndex{bestI, int(v)}
- b.valueBlockIdx[hash] = h
- }
- n.index = h
- }
- return hash
-}