diff options
author | Wim <wim@42.be> | 2018-03-04 23:46:13 +0100 |
---|---|---|
committer | Wim <wim@42.be> | 2018-03-04 23:46:13 +0100 |
commit | 25a72113b122f984c904b24c4af23a1cba1eff45 (patch) | |
tree | f0fb7067d7c958d60ac964afa5b8d5fb79ebc339 /vendor/golang.org/x/text/internal | |
parent | 79c4ad5015bd2be47b32141c6d53f0d128bf865b (diff) | |
download | matterbridge-msglm-25a72113b122f984c904b24c4af23a1cba1eff45.tar.gz matterbridge-msglm-25a72113b122f984c904b24c4af23a1cba1eff45.tar.bz2 matterbridge-msglm-25a72113b122f984c904b24c4af23a1cba1eff45.zip |
Add vendor files for spf13/viper
Diffstat (limited to 'vendor/golang.org/x/text/internal')
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/LICENSE | 27 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/bitfield/bitfield.go | 226 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/code.go | 371 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/gen.go | 333 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/LICENSE | 27 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/compact.go | 58 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/print.go | 251 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/triegen.go | 494 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/ucd/LICENSE | 27 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/ucd/ucd.go | 371 |
10 files changed, 2185 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/internal/gen/LICENSE b/vendor/golang.org/x/text/internal/gen/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/text/internal/gen/bitfield/bitfield.go b/vendor/golang.org/x/text/internal/gen/bitfield/bitfield.go new file mode 100644 index 00000000..a8d0a48d --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/bitfield/bitfield.go @@ -0,0 +1,226 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package bitfield converts annotated structs into integer values. +// +// Any field that is marked with a bitfield tag is compacted. The tag value has +// two parts. The part before the comma determines the method name for a +// generated type. If left blank the name of the field is used. +// The part after the comma determines the number of bits to use for the +// representation. +package bitfield + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strconv" + "strings" +) + +// Config determines settings for packing and generation. If a Config is used, +// the same Config should be used for packing and generation. +type Config struct { + // NumBits fixes the maximum allowed bits for the integer representation. + // If NumBits is not 8, 16, 32, or 64, the actual underlying integer size + // will be the next largest available. + NumBits uint + + // If Package is set, code generation will write a package clause. + Package string + + // TypeName is the name for the generated type. By default it is the name + // of the type of the value passed to Gen. + TypeName string +} + +var nullConfig = &Config{} + +// Pack packs annotated bit ranges of struct x in an integer. +// +// Only fields that have a "bitfield" tag are compacted. +func Pack(x interface{}, c *Config) (packed uint64, err error) { + packed, _, err = pack(x, c) + return +} + +func pack(x interface{}, c *Config) (packed uint64, nBit uint, err error) { + if c == nil { + c = nullConfig + } + nBits := c.NumBits + v := reflect.ValueOf(x) + v = reflect.Indirect(v) + t := v.Type() + pos := 64 - nBits + if nBits == 0 { + pos = 0 + } + for i := 0; i < v.NumField(); i++ { + v := v.Field(i) + field := t.Field(i) + f, err := parseField(field) + + if err != nil { + return 0, 0, err + } + if f.nBits == 0 { + continue + } + value := uint64(0) + switch v.Kind() { + case reflect.Bool: + if v.Bool() { + value = 1 + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + value = v.Uint() + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + x := v.Int() + if x < 0 { + return 0, 0, fmt.Errorf("bitfield: negative value for field %q not allowed", field.Name) + } + value = uint64(x) + } + if value > (1<<f.nBits)-1 { + return 0, 0, fmt.Errorf("bitfield: value %#x of field %q does not fit in %d bits", value, field.Name, f.nBits) + } + shift := 64 - pos - f.nBits + if pos += f.nBits; pos > 64 { + return 0, 0, fmt.Errorf("bitfield: no more bits left for field %q", field.Name) + } + packed |= value << shift + } + if nBits == 0 { + nBits = posToBits(pos) + packed >>= (64 - nBits) + } + return packed, nBits, nil +} + +type field struct { + name string + value uint64 + nBits uint +} + +// parseField parses a tag of the form [<name>][:<nBits>][,<pos>[..<end>]] +func parseField(field reflect.StructField) (f field, err error) { + s, ok := field.Tag.Lookup("bitfield") + if !ok { + return f, nil + } + switch field.Type.Kind() { + case reflect.Bool: + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + default: + return f, fmt.Errorf("bitfield: field %q is not an integer or bool type", field.Name) + } + bits := s + f.name = "" + + if i := strings.IndexByte(s, ','); i >= 0 { + bits = s[:i] + f.name = s[i+1:] + } + if bits != "" { + nBits, err := strconv.ParseUint(bits, 10, 8) + if err != nil { + return f, fmt.Errorf("bitfield: invalid bit size for field %q: %v", field.Name, err) + } + f.nBits = uint(nBits) + } + if f.nBits == 0 { + if field.Type.Kind() == reflect.Bool { + f.nBits = 1 + } else { + f.nBits = uint(field.Type.Bits()) + } + } + if f.name == "" { + f.name = field.Name + } + return f, err +} + +func posToBits(pos uint) (bits uint) { + switch { + case pos <= 8: + bits = 8 + case pos <= 16: + bits = 16 + case pos <= 32: + bits = 32 + case pos <= 64: + bits = 64 + default: + panic("unreachable") + } + return bits +} + +// Gen generates code for unpacking integers created with Pack. +func Gen(w io.Writer, x interface{}, c *Config) error { + if c == nil { + c = nullConfig + } + _, nBits, err := pack(x, c) + if err != nil { + return err + } + + t := reflect.TypeOf(x) + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if c.TypeName == "" { + c.TypeName = t.Name() + } + firstChar := []rune(c.TypeName)[0] + + buf := &bytes.Buffer{} + + print := func(w io.Writer, format string, args ...interface{}) { + if _, e := fmt.Fprintf(w, format+"\n", args...); e != nil && err == nil { + err = fmt.Errorf("bitfield: write failed: %v", err) + } + } + + pos := uint(0) + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + f, _ := parseField(field) + if f.nBits == 0 { + continue + } + shift := nBits - pos - f.nBits + pos += f.nBits + + retType := field.Type.Name() + print(buf, "\nfunc (%c %s) %s() %s {", firstChar, c.TypeName, f.name, retType) + if field.Type.Kind() == reflect.Bool { + print(buf, "\tconst bit = 1 << %d", shift) + print(buf, "\treturn %c&bit == bit", firstChar) + } else { + print(buf, "\treturn %s((%c >> %d) & %#x)", retType, firstChar, shift, (1<<f.nBits)-1) + } + print(buf, "}") + } + + if c.Package != "" { + print(w, "// Code generated by golang.org/x/text/internal/gen/bitfield. DO NOT EDIT.\n") + print(w, "package %s\n", c.Package) + } + + bits := posToBits(pos) + + print(w, "type %s uint%d", c.TypeName, bits) + + if _, err := io.Copy(w, buf); err != nil { + return fmt.Errorf("bitfield: write failed: %v", err) + } + return nil +} diff --git a/vendor/golang.org/x/text/internal/gen/code.go b/vendor/golang.org/x/text/internal/gen/code.go new file mode 100644 index 00000000..25aaa033 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/code.go @@ -0,0 +1,371 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gen + +import ( + "bytes" + "encoding/gob" + "fmt" + "hash" + "hash/fnv" + "io" + "log" + "os" + "reflect" + "strings" + "unicode" + "unicode/utf8" +) + +// This file contains utilities for generating code. + +// TODO: other write methods like: +// - slices, maps, types, etc. + +// CodeWriter is a utility for writing structured code. It computes the content +// hash and size of written content. It ensures there are newlines between +// written code blocks. +type CodeWriter struct { + buf bytes.Buffer + Size int + Hash hash.Hash32 // content hash + gob *gob.Encoder + // For comments we skip the usual one-line separator if they are followed by + // a code block. + skipSep bool +} + +func (w *CodeWriter) Write(p []byte) (n int, err error) { + return w.buf.Write(p) +} + +// NewCodeWriter returns a new CodeWriter. +func NewCodeWriter() *CodeWriter { + h := fnv.New32() + return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)} +} + +// WriteGoFile appends the buffer with the total size of all created structures +// and writes it as a Go file to the the given file with the given package name. +func (w *CodeWriter) WriteGoFile(filename, pkg string) { + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg, ""); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteVersionedGoFile appends the buffer with the total size of all created +// structures and writes it as a Go file to the the given file with the given +// package name and build tags for the current Unicode version, +func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) { + tags := buildTags() + if tags != "" { + filename = insertVersion(filename, UnicodeVersion()) + } + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg, tags); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo appends the buffer with the total size of all created structures and +// writes it as a Go file to the the given writer with the given package name. +func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) { + sz := w.Size + w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32()) + defer w.buf.Reset() + return WriteGo(out, pkg, tags, w.buf.Bytes()) +} + +func (w *CodeWriter) printf(f string, x ...interface{}) { + fmt.Fprintf(w, f, x...) +} + +func (w *CodeWriter) insertSep() { + if w.skipSep { + w.skipSep = false + return + } + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n") +} + +// WriteComment writes a comment block. All line starts are prefixed with "//". +// Initial empty lines are gobbled. The indentation for the first line is +// stripped from consecutive lines. +func (w *CodeWriter) WriteComment(comment string, args ...interface{}) { + s := fmt.Sprintf(comment, args...) + s = strings.Trim(s, "\n") + + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n// ") + w.skipSep = true + + // strip first indent level. + sep := "\n" + for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] { + sep += s[:1] + } + + strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s) + + w.printf("\n") +} + +func (w *CodeWriter) writeSizeInfo(size int) { + w.printf("// Size: %d bytes\n", size) +} + +// WriteConst writes a constant of the given name and value. +func (w *CodeWriter) WriteConst(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + + switch v.Type().Kind() { + case reflect.String: + w.printf("const %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + w.printf("\n") + default: + w.printf("const %s = %#v\n", name, x) + } +} + +// WriteVar writes a variable of the given name and value. +func (w *CodeWriter) WriteVar(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + oldSize := w.Size + sz := int(v.Type().Size()) + w.Size += sz + + switch v.Type().Kind() { + case reflect.String: + w.printf("var %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + case reflect.Struct: + w.gob.Encode(x) + fallthrough + case reflect.Slice, reflect.Array: + w.printf("var %s = ", name) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + default: + w.printf("var %s %s = ", name, typeName(x)) + w.gob.Encode(x) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + } + w.printf("\n") +} + +func (w *CodeWriter) writeValue(v reflect.Value) { + x := v.Interface() + switch v.Kind() { + case reflect.String: + w.WriteString(v.String()) + case reflect.Array: + // Don't double count: callers of WriteArray count on the size being + // added, so we need to discount it here. + w.Size -= int(v.Type().Size()) + w.writeSlice(x, true) + case reflect.Slice: + w.writeSlice(x, false) + case reflect.Struct: + w.printf("%s{\n", typeName(v.Interface())) + t := v.Type() + for i := 0; i < v.NumField(); i++ { + w.printf("%s: ", t.Field(i).Name) + w.writeValue(v.Field(i)) + w.printf(",\n") + } + w.printf("}") + default: + w.printf("%#v", x) + } +} + +// WriteString writes a string literal. +func (w *CodeWriter) WriteString(s string) { + io.WriteString(w.Hash, s) // content hash + w.Size += len(s) + + const maxInline = 40 + if len(s) <= maxInline { + w.printf("%q", s) + return + } + + // We will render the string as a multi-line string. + const maxWidth = 80 - 4 - len(`"`) - len(`" +`) + + // When starting on its own line, go fmt indents line 2+ an extra level. + n, max := maxWidth, maxWidth-4 + + // As per https://golang.org/issue/18078, the compiler has trouble + // compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN, + // for large N. We insert redundant, explicit parentheses to work around + // that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 + + // ... + s127) + etc + (etc + ... + sN). + explicitParens, extraComment := len(s) > 128*1024, "" + if explicitParens { + w.printf(`(`) + extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078" + } + + // Print "" +\n, if a string does not start on its own line. + b := w.buf.Bytes() + if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' { + w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment) + n, max = maxWidth, maxWidth + } + + w.printf(`"`) + + for sz, p, nLines := 0, 0, 0; p < len(s); { + var r rune + r, sz = utf8.DecodeRuneInString(s[p:]) + out := s[p : p+sz] + chars := 1 + if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' { + switch sz { + case 1: + out = fmt.Sprintf("\\x%02x", s[p]) + case 2, 3: + out = fmt.Sprintf("\\u%04x", r) + case 4: + out = fmt.Sprintf("\\U%08x", r) + } + chars = len(out) + } else if r == '\\' { + out = "\\" + string(r) + chars = 2 + } + if n -= chars; n < 0 { + nLines++ + if explicitParens && nLines&63 == 63 { + w.printf("\") + (\"") + } + w.printf("\" +\n\"") + n = max - len(out) + } + w.printf("%s", out) + p += sz + } + w.printf(`"`) + if explicitParens { + w.printf(`)`) + } +} + +// WriteSlice writes a slice value. +func (w *CodeWriter) WriteSlice(x interface{}) { + w.writeSlice(x, false) +} + +// WriteArray writes an array value. +func (w *CodeWriter) WriteArray(x interface{}) { + w.writeSlice(x, true) +} + +func (w *CodeWriter) writeSlice(x interface{}, isArray bool) { + v := reflect.ValueOf(x) + w.gob.Encode(v.Len()) + w.Size += v.Len() * int(v.Type().Elem().Size()) + name := typeName(x) + if isArray { + name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:]) + } + if isArray { + w.printf("%s{\n", name) + } else { + w.printf("%s{ // %d elements\n", name, v.Len()) + } + + switch kind := v.Type().Elem().Kind(); kind { + case reflect.String: + for _, s := range x.([]string) { + w.WriteString(s) + w.printf(",\n") + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + // nLine and nBlock are the number of elements per line and block. + nLine, nBlock, format := 8, 64, "%d," + switch kind { + case reflect.Uint8: + format = "%#02x," + case reflect.Uint16: + format = "%#04x," + case reflect.Uint32: + nLine, nBlock, format = 4, 32, "%#08x," + case reflect.Uint, reflect.Uint64: + nLine, nBlock, format = 4, 32, "%#016x," + case reflect.Int8: + nLine = 16 + } + n := nLine + for i := 0; i < v.Len(); i++ { + if i%nBlock == 0 && v.Len() > nBlock { + w.printf("// Entry %X - %X\n", i, i+nBlock-1) + } + x := v.Index(i).Interface() + w.gob.Encode(x) + w.printf(format, x) + if n--; n == 0 { + n = nLine + w.printf("\n") + } + } + w.printf("\n") + case reflect.Struct: + zero := reflect.Zero(v.Type().Elem()).Interface() + for i := 0; i < v.Len(); i++ { + x := v.Index(i).Interface() + w.gob.EncodeValue(v) + if !reflect.DeepEqual(zero, x) { + line := fmt.Sprintf("%#v,\n", x) + line = line[strings.IndexByte(line, '{'):] + w.printf("%d: ", i) + w.printf(line) + } + } + case reflect.Array: + for i := 0; i < v.Len(); i++ { + w.printf("%d: %#v,\n", i, v.Index(i).Interface()) + } + default: + panic("gen: slice elem type not supported") + } + w.printf("}") +} + +// WriteType writes a definition of the type of the given value and returns the +// type name. +func (w *CodeWriter) WriteType(x interface{}) string { + t := reflect.TypeOf(x) + w.printf("type %s struct {\n", t.Name()) + for i := 0; i < t.NumField(); i++ { + w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type) + } + w.printf("}\n") + return t.Name() +} + +// typeName returns the name of the go type of x. +func typeName(x interface{}) string { + t := reflect.ValueOf(x).Type() + return strings.Replace(fmt.Sprint(t), "main.", "", 1) +} diff --git a/vendor/golang.org/x/text/internal/gen/gen.go b/vendor/golang.org/x/text/internal/gen/gen.go new file mode 100644 index 00000000..4c3f7606 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/gen.go @@ -0,0 +1,333 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gen contains common code for the various code generation tools in the +// text repository. Its usage ensures consistency between tools. +// +// This package defines command line flags that are common to most generation +// tools. The flags allow for specifying specific Unicode and CLDR versions +// in the public Unicode data repository (http://www.unicode.org/Public). +// +// A local Unicode data mirror can be set through the flag -local or the +// environment variable UNICODE_DIR. The former takes precedence. The local +// directory should follow the same structure as the public repository. +// +// IANA data can also optionally be mirrored by putting it in the iana directory +// rooted at the top of the local mirror. Beware, though, that IANA data is not +// versioned. So it is up to the developer to use the right version. +package gen // import "golang.org/x/text/internal/gen" + +import ( + "bytes" + "flag" + "fmt" + "go/build" + "go/format" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "path" + "path/filepath" + "strings" + "sync" + "unicode" + + "golang.org/x/text/unicode/cldr" +) + +var ( + url = flag.String("url", + "http://www.unicode.org/Public", + "URL of Unicode database directory") + iana = flag.String("iana", + "http://www.iana.org", + "URL of the IANA repository") + unicodeVersion = flag.String("unicode", + getEnv("UNICODE_VERSION", unicode.Version), + "unicode version to use") + cldrVersion = flag.String("cldr", + getEnv("CLDR_VERSION", cldr.Version), + "cldr version to use") +) + +func getEnv(name, def string) string { + if v := os.Getenv(name); v != "" { + return v + } + return def +} + +// Init performs common initialization for a gen command. It parses the flags +// and sets up the standard logging parameters. +func Init() { + log.SetPrefix("") + log.SetFlags(log.Lshortfile) + flag.Parse() +} + +const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +` + +// UnicodeVersion reports the requested Unicode version. +func UnicodeVersion() string { + return *unicodeVersion +} + +// CLDRVersion reports the requested CLDR version. +func CLDRVersion() string { + return *cldrVersion +} + +var tags = []struct{ version, buildTags string }{ + {"10.0.0", "go1.10"}, + {"", "!go1.10"}, +} + +// buildTags reports the build tags used for the current Unicode version. +func buildTags() string { + v := UnicodeVersion() + for _, x := range tags { + // We should do a numeric comparison, but including the collate package + // would create an import cycle. We approximate it by assuming that + // longer version strings are later. + if len(x.version) <= len(v) { + return x.buildTags + } + if len(x.version) == len(v) && x.version <= v { + return x.buildTags + } + } + return tags[0].buildTags +} + +// IsLocal reports whether data files are available locally. +func IsLocal() bool { + dir, err := localReadmeFile() + if err != nil { + return false + } + if _, err = os.Stat(dir); err != nil { + return false + } + return true +} + +// OpenUCDFile opens the requested UCD file. The file is specified relative to +// the public Unicode root directory. It will call log.Fatal if there are any +// errors. +func OpenUCDFile(file string) io.ReadCloser { + return openUnicode(path.Join(*unicodeVersion, "ucd", file)) +} + +// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there +// are any errors. +func OpenCLDRCoreZip() io.ReadCloser { + return OpenUnicodeFile("cldr", *cldrVersion, "core.zip") +} + +// OpenUnicodeFile opens the requested file of the requested category from the +// root of the Unicode data archive. The file is specified relative to the +// public Unicode root directory. If version is "", it will use the default +// Unicode version. It will call log.Fatal if there are any errors. +func OpenUnicodeFile(category, version, file string) io.ReadCloser { + if version == "" { + version = UnicodeVersion() + } + return openUnicode(path.Join(category, version, file)) +} + +// OpenIANAFile opens the requested IANA file. The file is specified relative +// to the IANA root, which is typically either http://www.iana.org or the +// iana directory in the local mirror. It will call log.Fatal if there are any +// errors. +func OpenIANAFile(path string) io.ReadCloser { + return Open(*iana, "iana", path) +} + +var ( + dirMutex sync.Mutex + localDir string +) + +const permissions = 0755 + +func localReadmeFile() (string, error) { + p, err := build.Import("golang.org/x/text", "", build.FindOnly) + if err != nil { + return "", fmt.Errorf("Could not locate package: %v", err) + } + return filepath.Join(p.Dir, "DATA", "README"), nil +} + +func getLocalDir() string { + dirMutex.Lock() + defer dirMutex.Unlock() + + readme, err := localReadmeFile() + if err != nil { + log.Fatal(err) + } + dir := filepath.Dir(readme) + if _, err := os.Stat(readme); err != nil { + if err := os.MkdirAll(dir, permissions); err != nil { + log.Fatalf("Could not create directory: %v", err) + } + ioutil.WriteFile(readme, []byte(readmeTxt), permissions) + } + return dir +} + +const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT. + +This directory contains downloaded files used to generate the various tables +in the golang.org/x/text subrepo. + +Note that the language subtag repo (iana/assignments/language-subtag-registry) +and all other times in the iana subdirectory are not versioned and will need +to be periodically manually updated. The easiest way to do this is to remove +the entire iana directory. This is mostly of concern when updating the language +package. +` + +// Open opens subdir/path if a local directory is specified and the file exists, +// where subdir is a directory relative to the local root, or fetches it from +// urlRoot/path otherwise. It will call log.Fatal if there are any errors. +func Open(urlRoot, subdir, path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path)) + return open(file, urlRoot, path) +} + +func openUnicode(path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), filepath.FromSlash(path)) + return open(file, *url, path) +} + +// TODO: automatically periodically update non-versioned files. + +func open(file, urlRoot, path string) io.ReadCloser { + if f, err := os.Open(file); err == nil { + return f + } + r := get(urlRoot, path) + defer r.Close() + b, err := ioutil.ReadAll(r) + if err != nil { + log.Fatalf("Could not download file: %v", err) + } + os.MkdirAll(filepath.Dir(file), permissions) + if err := ioutil.WriteFile(file, b, permissions); err != nil { + log.Fatalf("Could not create file: %v", err) + } + return ioutil.NopCloser(bytes.NewReader(b)) +} + +func get(root, path string) io.ReadCloser { + url := root + "/" + path + fmt.Printf("Fetching %s...", url) + defer fmt.Println(" done.") + resp, err := http.Get(url) + if err != nil { + log.Fatalf("HTTP GET: %v", err) + } + if resp.StatusCode != 200 { + log.Fatalf("Bad GET status for %q: %q", url, resp.Status) + } + return resp.Body +} + +// TODO: use Write*Version in all applicable packages. + +// WriteUnicodeVersion writes a constant for the Unicode version from which the +// tables are generated. +func WriteUnicodeVersion(w io.Writer) { + fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion()) +} + +// WriteCLDRVersion writes a constant for the CLDR version from which the +// tables are generated. +func WriteCLDRVersion(w io.Writer) { + fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion()) +} + +// WriteGoFile prepends a standard file comment and package statement to the +// given bytes, applies gofmt, and writes them to a file with the given name. +// It will call log.Fatal if there are any errors. +func WriteGoFile(filename, pkg string, b []byte) { + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, "", b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +func insertVersion(filename, version string) string { + suffix := ".go" + if strings.HasSuffix(filename, "_test.go") { + suffix = "_test.go" + } + return fmt.Sprint(filename[:len(filename)-len(suffix)], version, suffix) +} + +// WriteVersionedGoFile prepends a standard file comment, adds build tags to +// version the file for the current Unicode version, and package statement to +// the given bytes, applies gofmt, and writes them to a file with the given +// name. It will call log.Fatal if there are any errors. +func WriteVersionedGoFile(filename, pkg string, b []byte) { + tags := buildTags() + if tags != "" { + filename = insertVersion(filename, UnicodeVersion()) + } + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, tags, b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo prepends a standard file comment and package statement to the given +// bytes, applies gofmt, and writes them to w. +func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) { + src := []byte(header) + if tags != "" { + src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...) + } + src = append(src, fmt.Sprintf("package %s\n\n", pkg)...) + src = append(src, b...) + formatted, err := format.Source(src) + if err != nil { + // Print the generated code even in case of an error so that the + // returned error can be meaningfully interpreted. + n, _ = w.Write(src) + return n, err + } + return w.Write(formatted) +} + +// Repackage rewrites a Go file from belonging to package main to belonging to +// the given package. +func Repackage(inFile, outFile, pkg string) { + src, err := ioutil.ReadFile(inFile) + if err != nil { + log.Fatalf("reading %s: %v", inFile, err) + } + const toDelete = "package main\n\n" + i := bytes.Index(src, []byte(toDelete)) + if i < 0 { + log.Fatalf("Could not find %q in %s.", toDelete, inFile) + } + w := &bytes.Buffer{} + w.Write(src[i+len(toDelete):]) + WriteGoFile(outFile, pkg, w.Bytes()) +} diff --git a/vendor/golang.org/x/text/internal/triegen/LICENSE b/vendor/golang.org/x/text/internal/triegen/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go new file mode 100644 index 00000000..397b975c --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/compact.go @@ -0,0 +1,58 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +// This file defines Compacter and its implementations. + +import "io" + +// A Compacter generates an alternative, more space-efficient way to store a +// trie value block. A trie value block holds all possible values for the last +// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block +// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). +type Compacter interface { + // Size returns whether the Compacter could encode the given block as well + // as its size in case it can. len(v) is always 64. + Size(v []uint64) (sz int, ok bool) + + // Store stores the block using the Compacter's compression method. + // It returns a handle with which the block can be retrieved. + // len(v) is always 64. + Store(v []uint64) uint32 + + // Print writes the data structures associated to the given store to w. + Print(w io.Writer) error + + // Handler returns the name of a function that gets called during trie + // lookup for blocks generated by the Compacter. The function should be of + // the form func (n uint32, b byte) uint64, where n is the index returned by + // the Compacter's Store method and b is the last byte of the UTF-8 + // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the + // block. + Handler() string +} + +// simpleCompacter is the default Compacter used by builder. It implements a +// normal trie block. +type simpleCompacter builder + +func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { + return blockSize * b.ValueSize, true +} + +func (b *simpleCompacter) Store(v []uint64) uint32 { + h := uint32(len(b.ValueBlocks) - blockOffset) + b.ValueBlocks = append(b.ValueBlocks, v) + return h +} + +func (b *simpleCompacter) Print(io.Writer) error { + // Structures are printed in print.go. + return nil +} + +func (b *simpleCompacter) Handler() string { + panic("Handler should be special-cased for this Compacter") +} diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go new file mode 100644 index 00000000..8d9f120b --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/print.go @@ -0,0 +1,251 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +import ( + "bytes" + "fmt" + "io" + "strings" + "text/template" +) + +// print writes all the data structures as well as the code necessary to use the +// trie to w. +func (b *builder) print(w io.Writer) error { + b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize + b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize + b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize + b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize + b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize + + // If we only have one root trie, all starter blocks are at position 0 and + // we can access the arrays directly. + if len(b.Trie) == 1 { + // At this point we cannot refer to the generated tables directly. + b.ASCIIBlock = b.Name + "Values" + b.StarterBlock = b.Name + "Index" + } else { + // Otherwise we need to have explicit starter indexes in the trie + // structure. + b.ASCIIBlock = "t.ascii" + b.StarterBlock = "t.utf8Start" + } + + b.SourceType = "[]byte" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + b.SourceType = "string" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + if err := trieGen.Execute(w, b); err != nil { + return err + } + + for _, c := range b.Compactions { + if err := c.c.Print(w); err != nil { + return err + } + } + + return nil +} + +func printValues(n int, values []uint64) string { + w := &bytes.Buffer{} + boff := n * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) + var newline bool + for i, v := range values { + if i%6 == 0 { + newline = true + } + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) + } + } + return w.String() +} + +func printIndex(b *builder, nr int, n *node) string { + w := &bytes.Buffer{} + boff := nr * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) + var newline bool + for i, c := range n.children { + if i%8 == 0 { + newline = true + } + if c != nil { + v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) + } + } + } + return w.String() +} + +var ( + trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ + "printValues": printValues, + "printIndex": printIndex, + "title": strings.Title, + "dec": func(x int) int { return x - 1 }, + "psize": func(n int) string { + return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) + }, + }).Parse(trieTemplate)) + lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) +) + +// TODO: consider the return type of lookup. It could be uint64, even if the +// internal value type is smaller. We will have to verify this with the +// performance of unicode/norm, which is very sensitive to such changes. +const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} +// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. +type {{.Name}}Trie struct { {{if $multi}} + ascii []{{.ValueType}} // index for ASCII bytes + utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 +{{end}}} + +func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} + h := {{.Name}}TrieHandles[i] + return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } +} + +type {{.Name}}TrieHandle struct { + ascii, multi {{.IndexType}} +} + +// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes +var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ +{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} +{{end}}}{{else}} + return &{{.Name}}Trie{} +} +{{end}} +// lookupValue determines the type of block n and looks up the value for b. +func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { + switch { {{range $i, $c := .Compactions}} + {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} + n -= {{$c.Offset}}{{end}} + return {{print $b.ValueType}}({{$c.Handler}}){{end}} + } +} + +// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes +// The third block is the zero block. +var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { +{{range $i, $v := .ValueBlocks}}{{printValues $i $v}} +{{end}}} + +// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes +// Block 0 is the zero block. +var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { +{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} +{{end}}} +` + +// TODO: consider allowing zero-length strings after evaluating performance with +// unicode/norm. +const lookupTemplate = ` +// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return {{.ASCIIBlock}}[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = {{.Name}}Index[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return {{.ASCIIBlock}}[c0] + } + i := {{.StarterBlock}}[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} +` diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go new file mode 100644 index 00000000..adb01081 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/triegen.go @@ -0,0 +1,494 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package triegen implements a code generator for a trie for associating +// unsigned integer values with UTF-8 encoded runes. +// +// Many of the go.text packages use tries for storing per-rune information. A +// trie is especially useful if many of the runes have the same value. If this +// is the case, many blocks can be expected to be shared allowing for +// information on many runes to be stored in little space. +// +// As most of the lookups are done directly on []byte slices, the tries use the +// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to +// runes and contributes a little bit to better performance. It also naturally +// provides a fast path for ASCII. +// +// Space is also an issue. There are many code points defined in Unicode and as +// a result tables can get quite large. So every byte counts. The triegen +// package automatically chooses the smallest integer values to represent the +// tables. Compacters allow further compression of the trie by allowing for +// alternative representations of individual trie blocks. +// +// triegen allows generating multiple tries as a single structure. This is +// useful when, for example, one wants to generate tries for several languages +// that have a lot of values in common. Some existing libraries for +// internationalization store all per-language data as a dynamically loadable +// chunk. The go.text packages are designed with the assumption that the user +// typically wants to compile in support for all supported languages, in line +// with the approach common to Go to create a single standalone binary. The +// multi-root trie approach can give significant storage savings in this +// scenario. +// +// triegen generates both tables and code. The code is optimized to use the +// automatically chosen data types. The following code is generated for a Trie +// or multiple Tries named "foo": +// - type fooTrie +// The trie type. +// +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. +// +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. +// +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. +// +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. +// +// It is recommended that users test the generated trie by checking the returned +// value for every rune. Such exhaustive tests are possible as the the number of +// runes in Unicode is limited. +package triegen // import "golang.org/x/text/internal/triegen" + +// TODO: Arguably, the internally optimized data types would not have to be +// exposed in the generated API. We could also investigate not generating the +// code, but using it through a package. We would have to investigate the impact +// on performance of making such change, though. For packages like unicode/norm, +// small changes like this could tank performance. + +import ( + "encoding/binary" + "fmt" + "hash/crc64" + "io" + "log" + "unicode/utf8" +) + +// builder builds a set of tries for associating values with runes. The set of +// tries can share common index and value blocks. +type builder struct { + Name string + + // ValueType is the type of the trie values looked up. + ValueType string + + // ValueSize is the byte size of the ValueType. + ValueSize int + + // IndexType is the type of trie index values used for all UTF-8 bytes of + // a rune except the last one. + IndexType string + + // IndexSize is the byte size of the IndexType. + IndexSize int + + // SourceType is used when generating the lookup functions. If the user + // requests StringSupport, all lookup functions will be generated for + // string input as well. + SourceType string + + Trie []*Trie + + IndexBlocks []*node + ValueBlocks [][]uint64 + Compactions []compaction + Checksum uint64 + + ASCIIBlock string + StarterBlock string + + indexBlockIdx map[uint64]int + valueBlockIdx map[uint64]nodeIndex + asciiBlockIdx map[uint64]int + + // Stats are used to fill out the template. + Stats struct { + NValueEntries int + NValueBytes int + NIndexEntries int + NIndexBytes int + NHandleBytes int + } + + err error +} + +// A nodeIndex encodes the index of a node, which is defined by the compaction +// which stores it and an index within the compaction. For internal nodes, the +// compaction is always 0. +type nodeIndex struct { + compaction int + index int +} + +// compaction keeps track of stats used for the compaction. +type compaction struct { + c Compacter + blocks []*node + maxHandle uint32 + totalSize int + + // Used by template-based generator and thus exported. + Cutoff uint32 + Offset uint32 + Handler string +} + +func (b *builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +// An Option can be passed to Gen. +type Option func(b *builder) error + +// Compact configures the trie generator to use the given Compacter. +func Compact(c Compacter) Option { + return func(b *builder) error { + b.Compactions = append(b.Compactions, compaction{ + c: c, + Handler: c.Handler() + "(n, b)"}) + return nil + } +} + +// Gen writes Go code for a shared trie lookup structure to w for the given +// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will +// return the *nameTrie for tries[x]. A value can be looked up by using one of +// the various lookup methods defined on nameTrie. It returns the table size of +// the generated trie. +func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { + // The index contains two dummy blocks, followed by the zero block. The zero + // block is at offset 0x80, so that the offset for the zero block for + // continuation bytes is 0. + b := &builder{ + Name: name, + Trie: tries, + IndexBlocks: []*node{{}, {}, {}}, + Compactions: []compaction{{ + Handler: name + "Values[n<<6+uint32(b)]", + }}, + // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero + // block. + indexBlockIdx: map[uint64]int{0: 0}, + valueBlockIdx: map[uint64]nodeIndex{0: {}}, + asciiBlockIdx: map[uint64]int{}, + } + b.Compactions[0].c = (*simpleCompacter)(b) + + for _, f := range opts { + if err := f(b); err != nil { + return 0, err + } + } + b.build() + if b.err != nil { + return 0, b.err + } + if err = b.print(w); err != nil { + return 0, err + } + return b.Size(), nil +} + +// A Trie represents a single root node of a trie. A builder may build several +// overlapping tries at once. +type Trie struct { + root *node + + hiddenTrie +} + +// hiddenTrie contains values we want to be visible to the template generator, +// but hidden from the API documentation. +type hiddenTrie struct { + Name string + Checksum uint64 + ASCIIIndex int + StarterIndex int +} + +// NewTrie returns a new trie root. +func NewTrie(name string) *Trie { + return &Trie{ + &node{ + children: make([]*node, blockSize), + values: make([]uint64, utf8.RuneSelf), + }, + hiddenTrie{Name: name}, + } +} + +// Gen is a convenience wrapper around the Gen func passing t as the only trie +// and uses the name passed to NewTrie. It returns the size of the generated +// tables. +func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { + return Gen(w, t.Name, []*Trie{t}, opts...) +} + +// node is a node of the intermediate trie structure. +type node struct { + // children holds this node's children. It is always of length 64. + // A child node may be nil. + children []*node + + // values contains the values of this node. If it is non-nil, this node is + // either a root or leaf node: + // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. + // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. + values []uint64 + + index nodeIndex +} + +// Insert associates value with the given rune. Insert will panic if a non-zero +// value is passed for an invalid rune. +func (t *Trie) Insert(r rune, value uint64) { + if value == 0 { + return + } + s := string(r) + if []rune(s)[0] != r && value != 0 { + // Note: The UCD tables will always assign what amounts to a zero value + // to a surrogate. Allowing a zero value for an illegal rune allows + // users to iterate over [0..MaxRune] without having to explicitly + // exclude surrogates, which would be tedious. + panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) + } + if len(s) == 1 { + // It is a root node value (ASCII). + t.root.values[s[0]] = value + return + } + + n := t.root + for ; len(s) > 1; s = s[1:] { + if n.children == nil { + n.children = make([]*node, blockSize) + } + p := s[0] % blockSize + c := n.children[p] + if c == nil { + c = &node{} + n.children[p] = c + } + if len(s) > 2 && c.values != nil { + log.Fatalf("triegen: insert(%U): found internal node with values", r) + } + n = c + } + if n.values == nil { + n.values = make([]uint64, blockSize) + } + if n.children != nil { + log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) + } + n.values[s[0]-0x80] = value +} + +// Size returns the number of bytes the generated trie will take to store. It +// needs to be exported as it is used in the templates. +func (b *builder) Size() int { + // Index blocks. + sz := len(b.IndexBlocks) * blockSize * b.IndexSize + + // Skip the first compaction, which represents the normal value blocks, as + // its totalSize does not account for the ASCII blocks, which are managed + // separately. + sz += len(b.ValueBlocks) * blockSize * b.ValueSize + for _, c := range b.Compactions[1:] { + sz += c.totalSize + } + + // TODO: this computation does not account for the fixed overhead of a using + // a compaction, either code or data. As for data, though, the typical + // overhead of data is in the order of bytes (2 bytes for cases). Further, + // the savings of using a compaction should anyway be substantial for it to + // be worth it. + + // For multi-root tries, we also need to account for the handles. + if len(b.Trie) > 1 { + sz += 2 * b.IndexSize * len(b.Trie) + } + return sz +} + +func (b *builder) build() { + // Compute the sizes of the values. + var vmax uint64 + for _, t := range b.Trie { + vmax = maxValue(t.root, vmax) + } + b.ValueType, b.ValueSize = getIntType(vmax) + + // Compute all block allocations. + // TODO: first compute the ASCII blocks for all tries and then the other + // nodes. ASCII blocks are more restricted in placement, as they require two + // blocks to be placed consecutively. Processing them first may improve + // sharing (at least one zero block can be expected to be saved.) + for _, t := range b.Trie { + b.Checksum += b.buildTrie(t) + } + + // Compute the offsets for all the Compacters. + offset := uint32(0) + for i := range b.Compactions { + c := &b.Compactions[i] + c.Offset = offset + offset += c.maxHandle + 1 + c.Cutoff = offset + } + + // Compute the sizes of indexes. + // TODO: different byte positions could have different sizes. So far we have + // not found a case where this is beneficial. + imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) + for _, ib := range b.IndexBlocks { + if x := uint64(ib.index.index); x > imax { + imax = x + } + } + b.IndexType, b.IndexSize = getIntType(imax) +} + +func maxValue(n *node, max uint64) uint64 { + if n == nil { + return max + } + for _, c := range n.children { + max = maxValue(c, max) + } + for _, v := range n.values { + if max < v { + max = v + } + } + return max +} + +func getIntType(v uint64) (string, int) { + switch { + case v < 1<<8: + return "uint8", 1 + case v < 1<<16: + return "uint16", 2 + case v < 1<<32: + return "uint32", 4 + } + return "uint64", 8 +} + +const ( + blockSize = 64 + + // Subtract two blocks to offset 0x80, the first continuation byte. + blockOffset = 2 + + // Subtract three blocks to offset 0xC0, the first non-ASCII starter. + rootBlockOffset = 3 +) + +var crcTable = crc64.MakeTable(crc64.ISO) + +func (b *builder) buildTrie(t *Trie) uint64 { + n := t.root + + // Get the ASCII offset. For the first trie, the ASCII block will be at + // position 0. + hasher := crc64.New(crcTable) + binary.Write(hasher, binary.BigEndian, n.values) + hash := hasher.Sum64() + + v, ok := b.asciiBlockIdx[hash] + if !ok { + v = len(b.ValueBlocks) + b.asciiBlockIdx[hash] = v + + b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) + if v == 0 { + // Add the zero block at position 2 so that it will be assigned a + // zero reference in the lookup blocks. + // TODO: always do this? This would allow us to remove a check from + // the trie lookup, but at the expense of extra space. Analyze + // performance for unicode/norm. + b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) + } + } + t.ASCIIIndex = v + + // Compute remaining offsets. + t.Checksum = b.computeOffsets(n, true) + // We already subtracted the normal blockOffset from the index. Subtract the + // difference for starter bytes. + t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) + return t.Checksum +} + +func (b *builder) computeOffsets(n *node, root bool) uint64 { + // For the first trie, the root lookup block will be at position 3, which is + // the offset for UTF-8 non-ASCII starter bytes. + first := len(b.IndexBlocks) == rootBlockOffset + if first { + b.IndexBlocks = append(b.IndexBlocks, n) + } + + // We special-case the cases where all values recursively are 0. This allows + // for the use of a zero block to which all such values can be directed. + hash := uint64(0) + if n.children != nil || n.values != nil { + hasher := crc64.New(crcTable) + for _, c := range n.children { + var v uint64 + if c != nil { + v = b.computeOffsets(c, false) + } + binary.Write(hasher, binary.BigEndian, v) + } + binary.Write(hasher, binary.BigEndian, n.values) + hash = hasher.Sum64() + } + + if first { + b.indexBlockIdx[hash] = rootBlockOffset - blockOffset + } + + // Compacters don't apply to internal nodes. + if n.children != nil { + v, ok := b.indexBlockIdx[hash] + if !ok { + v = len(b.IndexBlocks) - blockOffset + b.IndexBlocks = append(b.IndexBlocks, n) + b.indexBlockIdx[hash] = v + } + n.index = nodeIndex{0, v} + } else { + h, ok := b.valueBlockIdx[hash] + if !ok { + bestI, bestSize := 0, blockSize*b.ValueSize + for i, c := range b.Compactions[1:] { + if sz, ok := c.c.Size(n.values); ok && bestSize > sz { + bestI, bestSize = i+1, sz + } + } + c := &b.Compactions[bestI] + c.totalSize += bestSize + v := c.c.Store(n.values) + if c.maxHandle < v { + c.maxHandle = v + } + h = nodeIndex{bestI, int(v)} + b.valueBlockIdx[hash] = h + } + n.index = h + } + return hash +} diff --git a/vendor/golang.org/x/text/internal/ucd/LICENSE b/vendor/golang.org/x/text/internal/ucd/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/text/internal/ucd/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go new file mode 100644 index 00000000..8c45b5f3 --- /dev/null +++ b/vendor/golang.org/x/text/internal/ucd/ucd.go @@ -0,0 +1,371 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ucd provides a parser for Unicode Character Database files, the +// format of which is defined in http://www.unicode.org/reports/tr44/. See +// http://www.unicode.org/Public/UCD/latest/ucd/ for example files. +// +// It currently does not support substitutions of missing fields. +package ucd // import "golang.org/x/text/internal/ucd" + +import ( + "bufio" + "errors" + "fmt" + "io" + "log" + "regexp" + "strconv" + "strings" +) + +// UnicodeData.txt fields. +const ( + CodePoint = iota + Name + GeneralCategory + CanonicalCombiningClass + BidiClass + DecompMapping + DecimalValue + DigitValue + NumericValue + BidiMirrored + Unicode1Name + ISOComment + SimpleUppercaseMapping + SimpleLowercaseMapping + SimpleTitlecaseMapping +) + +// Parse calls f for each entry in the given reader of a UCD file. It will close +// the reader upon return. It will call log.Fatal if any error occurred. +// +// This implements the most common usage pattern of using Parser. +func Parse(r io.ReadCloser, f func(p *Parser)) { + defer r.Close() + + p := New(r) + for p.Next() { + f(p) + } + if err := p.Err(); err != nil { + r.Close() // os.Exit will cause defers not to be called. + log.Fatal(err) + } +} + +// An Option is used to configure a Parser. +type Option func(p *Parser) + +func keepRanges(p *Parser) { + p.keepRanges = true +} + +var ( + // KeepRanges prevents the expansion of ranges. The raw ranges can be + // obtained by calling Range(0) on the parser. + KeepRanges Option = keepRanges +) + +// The Part option register a handler for lines starting with a '@'. The text +// after a '@' is available as the first field. Comments are handled as usual. +func Part(f func(p *Parser)) Option { + return func(p *Parser) { + p.partHandler = f + } +} + +// The CommentHandler option passes comments that are on a line by itself to +// a given handler. +func CommentHandler(f func(s string)) Option { + return func(p *Parser) { + p.commentHandler = f + } +} + +// A Parser parses Unicode Character Database (UCD) files. +type Parser struct { + scanner *bufio.Scanner + + keepRanges bool // Don't expand rune ranges in field 0. + + err error + comment string + field []string + // parsedRange is needed in case Range(0) is called more than once for one + // field. In some cases this requires scanning ahead. + line int + parsedRange bool + rangeStart, rangeEnd rune + + partHandler func(p *Parser) + commentHandler func(s string) +} + +func (p *Parser) setError(err error, msg string) { + if p.err == nil && err != nil { + if msg == "" { + p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err) + } else { + p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err) + } + } +} + +func (p *Parser) getField(i int) string { + if i >= len(p.field) { + return "" + } + return p.field[i] +} + +// Err returns a non-nil error if any error occurred during parsing. +func (p *Parser) Err() error { + return p.err +} + +// New returns a Parser for the given Reader. +func New(r io.Reader, o ...Option) *Parser { + p := &Parser{ + scanner: bufio.NewScanner(r), + } + for _, f := range o { + f(p) + } + return p +} + +// Next parses the next line in the file. It returns true if a line was parsed +// and false if it reached the end of the file. +func (p *Parser) Next() bool { + if !p.keepRanges && p.rangeStart < p.rangeEnd { + p.rangeStart++ + return true + } + p.comment = "" + p.field = p.field[:0] + p.parsedRange = false + + for p.scanner.Scan() && p.err == nil { + p.line++ + s := p.scanner.Text() + if s == "" { + continue + } + if s[0] == '#' { + if p.commentHandler != nil { + p.commentHandler(strings.TrimSpace(s[1:])) + } + continue + } + + // Parse line + if i := strings.IndexByte(s, '#'); i != -1 { + p.comment = strings.TrimSpace(s[i+1:]) + s = s[:i] + } + if s[0] == '@' { + if p.partHandler != nil { + p.field = append(p.field, strings.TrimSpace(s[1:])) + p.partHandler(p) + p.field = p.field[:0] + } + p.comment = "" + continue + } + for { + i := strings.IndexByte(s, ';') + if i == -1 { + p.field = append(p.field, strings.TrimSpace(s)) + break + } + p.field = append(p.field, strings.TrimSpace(s[:i])) + s = s[i+1:] + } + if !p.keepRanges { + p.rangeStart, p.rangeEnd = p.getRange(0) + } + return true + } + p.setError(p.scanner.Err(), "scanner failed") + return false +} + +func parseRune(b string) (rune, error) { + if len(b) > 2 && b[0] == 'U' && b[1] == '+' { + b = b[2:] + } + x, err := strconv.ParseUint(b, 16, 32) + return rune(x), err +} + +func (p *Parser) parseRune(s string) rune { + x, err := parseRune(s) + p.setError(err, "failed to parse rune") + return x +} + +// Rune parses and returns field i as a rune. +func (p *Parser) Rune(i int) rune { + if i > 0 || p.keepRanges { + return p.parseRune(p.getField(i)) + } + return p.rangeStart +} + +// Runes interprets and returns field i as a sequence of runes. +func (p *Parser) Runes(i int) (runes []rune) { + add := func(s string) { + if s = strings.TrimSpace(s); len(s) > 0 { + runes = append(runes, p.parseRune(s)) + } + } + for b := p.getField(i); ; { + i := strings.IndexByte(b, ' ') + if i == -1 { + add(b) + break + } + add(b[:i]) + b = b[i+1:] + } + return +} + +var ( + errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>") + + // reRange matches one line of a legacy rune range. + reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$") +) + +// Range parses and returns field i as a rune range. A range is inclusive at +// both ends. If the field only has one rune, first and last will be identical. +// It supports the legacy format for ranges used in UnicodeData.txt. +func (p *Parser) Range(i int) (first, last rune) { + if !p.keepRanges { + return p.rangeStart, p.rangeStart + } + return p.getRange(i) +} + +func (p *Parser) getRange(i int) (first, last rune) { + b := p.getField(i) + if k := strings.Index(b, ".."); k != -1 { + return p.parseRune(b[:k]), p.parseRune(b[k+2:]) + } + // The first field may not be a rune, in which case we may ignore any error + // and set the range as 0..0. + x, err := parseRune(b) + if err != nil { + // Disable range parsing henceforth. This ensures that an error will be + // returned if the user subsequently will try to parse this field as + // a Rune. + p.keepRanges = true + } + // Special case for UnicodeData that was retained for backwards compatibility. + if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") { + if p.parsedRange { + return p.rangeStart, p.rangeEnd + } + mf := reRange.FindStringSubmatch(p.scanner.Text()) + p.line++ + if mf == nil || !p.scanner.Scan() { + p.setError(errIncorrectLegacyRange, "") + return x, x + } + // Using Bytes would be more efficient here, but Text is a lot easier + // and this is not a frequent case. + ml := reRange.FindStringSubmatch(p.scanner.Text()) + if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] { + p.setError(errIncorrectLegacyRange, "") + return x, x + } + p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])]) + p.parsedRange = true + return p.rangeStart, p.rangeEnd + } + return x, x +} + +// bools recognizes all valid UCD boolean values. +var bools = map[string]bool{ + "": false, + "N": false, + "No": false, + "F": false, + "False": false, + "Y": true, + "Yes": true, + "T": true, + "True": true, +} + +// Bool parses and returns field i as a boolean value. +func (p *Parser) Bool(i int) bool { + f := p.getField(i) + for s, v := range bools { + if f == s { + return v + } + } + p.setError(strconv.ErrSyntax, "error parsing bool") + return false +} + +// Int parses and returns field i as an integer value. +func (p *Parser) Int(i int) int { + x, err := strconv.ParseInt(string(p.getField(i)), 10, 64) + p.setError(err, "error parsing int") + return int(x) +} + +// Uint parses and returns field i as an unsigned integer value. +func (p *Parser) Uint(i int) uint { + x, err := strconv.ParseUint(string(p.getField(i)), 10, 64) + p.setError(err, "error parsing uint") + return uint(x) +} + +// Float parses and returns field i as a decimal value. +func (p *Parser) Float(i int) float64 { + x, err := strconv.ParseFloat(string(p.getField(i)), 64) + p.setError(err, "error parsing float") + return x +} + +// String parses and returns field i as a string value. +func (p *Parser) String(i int) string { + return string(p.getField(i)) +} + +// Strings parses and returns field i as a space-separated list of strings. +func (p *Parser) Strings(i int) []string { + ss := strings.Split(string(p.getField(i)), " ") + for i, s := range ss { + ss[i] = strings.TrimSpace(s) + } + return ss +} + +// Comment returns the comments for the current line. +func (p *Parser) Comment() string { + return string(p.comment) +} + +var errUndefinedEnum = errors.New("ucd: undefined enum value") + +// Enum interprets and returns field i as a value that must be one of the values +// in enum. +func (p *Parser) Enum(i int, enum ...string) string { + f := p.getField(i) + for _, s := range enum { + if f == s { + return s + } + } + p.setError(errUndefinedEnum, "error parsing enum") + return "" +} |