diff options
Diffstat (limited to 'vendor/golang.org/x/text/unicode')
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/gen.go | 133 | ||||
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/gen_ranges.go | 57 | ||||
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/gen_trieval.go | 64 | ||||
-rw-r--r-- | vendor/golang.org/x/text/unicode/norm/maketables.go | 986 | ||||
-rw-r--r-- | vendor/golang.org/x/text/unicode/norm/triegen.go | 117 | ||||
-rw-r--r-- | vendor/golang.org/x/text/unicode/rangetable/gen.go | 115 |
6 files changed, 0 insertions, 1472 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/gen.go b/vendor/golang.org/x/text/unicode/bidi/gen.go deleted file mode 100644 index 987fc169..00000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "flag" - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/triegen" - "golang.org/x/text/internal/ucd" -) - -var outputFile = flag.String("out", "tables.go", "output file") - -func main() { - gen.Init() - gen.Repackage("gen_trieval.go", "trieval.go", "bidi") - gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi") - - genTables() -} - -// bidiClass names and codes taken from class "bc" in -// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt -var bidiClass = map[string]Class{ - "AL": AL, // ArabicLetter - "AN": AN, // ArabicNumber - "B": B, // ParagraphSeparator - "BN": BN, // BoundaryNeutral - "CS": CS, // CommonSeparator - "EN": EN, // EuropeanNumber - "ES": ES, // EuropeanSeparator - "ET": ET, // EuropeanTerminator - "L": L, // LeftToRight - "NSM": NSM, // NonspacingMark - "ON": ON, // OtherNeutral - "R": R, // RightToLeft - "S": S, // SegmentSeparator - "WS": WS, // WhiteSpace - - "FSI": Control, - "PDF": Control, - "PDI": Control, - "LRE": Control, - "LRI": Control, - "LRO": Control, - "RLE": Control, - "RLI": Control, - "RLO": Control, -} - -func genTables() { - if numClass > 0x0F { - log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass) - } - w := gen.NewCodeWriter() - defer w.WriteVersionedGoFile(*outputFile, "bidi") - - gen.WriteUnicodeVersion(w) - - t := triegen.NewTrie("bidi") - - // Build data about bracket mapping. These bits need to be or-ed with - // any other bits. - orMask := map[rune]uint64{} - - xorMap := map[rune]int{} - xorMasks := []rune{0} // First value is no-op. - - ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) { - r1 := p.Rune(0) - r2 := p.Rune(1) - xor := r1 ^ r2 - if _, ok := xorMap[xor]; !ok { - xorMap[xor] = len(xorMasks) - xorMasks = append(xorMasks, xor) - } - entry := uint64(xorMap[xor]) << xorMaskShift - switch p.String(2) { - case "o": - entry |= openMask - case "c", "n": - default: - log.Fatalf("Unknown bracket class %q.", p.String(2)) - } - orMask[r1] = entry - }) - - w.WriteComment(` - xorMasks contains masks to be xor-ed with brackets to get the reverse - version.`) - w.WriteVar("xorMasks", xorMasks) - - done := map[rune]bool{} - - insert := func(r rune, c Class) { - if !done[r] { - t.Insert(r, orMask[r]|uint64(c)) - done[r] = true - } - } - - // Insert the derived BiDi properties. - ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) { - r := p.Rune(0) - class, ok := bidiClass[p.String(1)] - if !ok { - log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1)) - } - insert(r, class) - }) - visitDefaults(insert) - - // TODO: use sparse blocks. This would reduce table size considerably - // from the looks of it. - - sz, err := t.Gen(w) - if err != nil { - log.Fatal(err) - } - w.Size += sz -} - -// dummy values to make methods in gen_common compile. The real versions -// will be generated by this file to tables.go. -var ( - xorMasks []rune -) diff --git a/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go b/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go deleted file mode 100644 index 02c3b505..00000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen_ranges.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "unicode" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/ucd" - "golang.org/x/text/unicode/rangetable" -) - -// These tables are hand-extracted from: -// https://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt -func visitDefaults(fn func(r rune, c Class)) { - // first write default values for ranges listed above. - visitRunes(fn, AL, []rune{ - 0x0600, 0x07BF, // Arabic - 0x08A0, 0x08FF, // Arabic Extended-A - 0xFB50, 0xFDCF, // Arabic Presentation Forms - 0xFDF0, 0xFDFF, - 0xFE70, 0xFEFF, - 0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols - }) - visitRunes(fn, R, []rune{ - 0x0590, 0x05FF, // Hebrew - 0x07C0, 0x089F, // Nko et al. - 0xFB1D, 0xFB4F, - 0x00010800, 0x00010FFF, // Cypriot Syllabary et. al. - 0x0001E800, 0x0001EDFF, - 0x0001EF00, 0x0001EFFF, - }) - visitRunes(fn, ET, []rune{ // European Terminator - 0x20A0, 0x20Cf, // Currency symbols - }) - rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) { - fn(r, BN) // Boundary Neutral - }) - ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) { - if p.String(1) == "Default_Ignorable_Code_Point" { - fn(p.Rune(0), BN) // Boundary Neutral - } - }) -} - -func visitRunes(fn func(r rune, c Class), c Class, runes []rune) { - for i := 0; i < len(runes); i += 2 { - lo, hi := runes[i], runes[i+1] - for j := lo; j <= hi; j++ { - fn(j, c) - } - } -} diff --git a/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go b/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go deleted file mode 100644 index 9cb99428..00000000 --- a/vendor/golang.org/x/text/unicode/bidi/gen_trieval.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// Class is the Unicode BiDi class. Each rune has a single class. -type Class uint - -const ( - L Class = iota // LeftToRight - R // RightToLeft - EN // EuropeanNumber - ES // EuropeanSeparator - ET // EuropeanTerminator - AN // ArabicNumber - CS // CommonSeparator - B // ParagraphSeparator - S // SegmentSeparator - WS // WhiteSpace - ON // OtherNeutral - BN // BoundaryNeutral - NSM // NonspacingMark - AL // ArabicLetter - Control // Control LRO - PDI - - numClass - - LRO // LeftToRightOverride - RLO // RightToLeftOverride - LRE // LeftToRightEmbedding - RLE // RightToLeftEmbedding - PDF // PopDirectionalFormat - LRI // LeftToRightIsolate - RLI // RightToLeftIsolate - FSI // FirstStrongIsolate - PDI // PopDirectionalIsolate - - unknownClass = ^Class(0) -) - -var controlToClass = map[rune]Class{ - 0x202D: LRO, // LeftToRightOverride, - 0x202E: RLO, // RightToLeftOverride, - 0x202A: LRE, // LeftToRightEmbedding, - 0x202B: RLE, // RightToLeftEmbedding, - 0x202C: PDF, // PopDirectionalFormat, - 0x2066: LRI, // LeftToRightIsolate, - 0x2067: RLI, // RightToLeftIsolate, - 0x2068: FSI, // FirstStrongIsolate, - 0x2069: PDI, // PopDirectionalIsolate, -} - -// A trie entry has the following bits: -// 7..5 XOR mask for brackets -// 4 1: Bracket open, 0: Bracket close -// 3..0 Class type - -const ( - openMask = 0x10 - xorMaskShift = 5 -) diff --git a/vendor/golang.org/x/text/unicode/norm/maketables.go b/vendor/golang.org/x/text/unicode/norm/maketables.go deleted file mode 100644 index 30a3aa93..00000000 --- a/vendor/golang.org/x/text/unicode/norm/maketables.go +++ /dev/null @@ -1,986 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Normalization table generator. -// Data read from the web. -// See forminfo.go for a description of the trie values associated with each rune. - -package main - -import ( - "bytes" - "encoding/binary" - "flag" - "fmt" - "io" - "log" - "sort" - "strconv" - "strings" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/triegen" - "golang.org/x/text/internal/ucd" -) - -func main() { - gen.Init() - loadUnicodeData() - compactCCC() - loadCompositionExclusions() - completeCharFields(FCanonical) - completeCharFields(FCompatibility) - computeNonStarterCounts() - verifyComputed() - printChars() - testDerived() - printTestdata() - makeTables() -} - -var ( - tablelist = flag.String("tables", - "all", - "comma-separated list of which tables to generate; "+ - "can be 'decomp', 'recomp', 'info' and 'all'") - test = flag.Bool("test", - false, - "test existing tables against DerivedNormalizationProps and generate test data for regression testing") - verbose = flag.Bool("verbose", - false, - "write data to stdout as it is parsed") -) - -const MaxChar = 0x10FFFF // anything above this shouldn't exist - -// Quick Check properties of runes allow us to quickly -// determine whether a rune may occur in a normal form. -// For a given normal form, a rune may be guaranteed to occur -// verbatim (QC=Yes), may or may not combine with another -// rune (QC=Maybe), or may not occur (QC=No). -type QCResult int - -const ( - QCUnknown QCResult = iota - QCYes - QCNo - QCMaybe -) - -func (r QCResult) String() string { - switch r { - case QCYes: - return "Yes" - case QCNo: - return "No" - case QCMaybe: - return "Maybe" - } - return "***UNKNOWN***" -} - -const ( - FCanonical = iota // NFC or NFD - FCompatibility // NFKC or NFKD - FNumberOfFormTypes -) - -const ( - MComposed = iota // NFC or NFKC - MDecomposed // NFD or NFKD - MNumberOfModes -) - -// This contains only the properties we're interested in. -type Char struct { - name string - codePoint rune // if zero, this index is not a valid code point. - ccc uint8 // canonical combining class - origCCC uint8 - excludeInComp bool // from CompositionExclusions.txt - compatDecomp bool // it has a compatibility expansion - - nTrailingNonStarters uint8 - nLeadingNonStarters uint8 // must be equal to trailing if non-zero - - forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility - - state State -} - -var chars = make([]Char, MaxChar+1) -var cccMap = make(map[uint8]uint8) - -func (c Char) String() string { - buf := new(bytes.Buffer) - - fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name) - fmt.Fprintf(buf, " ccc: %v\n", c.ccc) - fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp) - fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp) - fmt.Fprintf(buf, " state: %v\n", c.state) - fmt.Fprintf(buf, " NFC:\n") - fmt.Fprint(buf, c.forms[FCanonical]) - fmt.Fprintf(buf, " NFKC:\n") - fmt.Fprint(buf, c.forms[FCompatibility]) - - return buf.String() -} - -// In UnicodeData.txt, some ranges are marked like this: -// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; -// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; -// parseCharacter keeps a state variable indicating the weirdness. -type State int - -const ( - SNormal State = iota // known to be zero for the type - SFirst - SLast - SMissing -) - -var lastChar = rune('\u0000') - -func (c Char) isValid() bool { - return c.codePoint != 0 && c.state != SMissing -} - -type FormInfo struct { - quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed - verified [MNumberOfModes]bool // index: MComposed or MDecomposed - - combinesForward bool // May combine with rune on the right - combinesBackward bool // May combine with rune on the left - isOneWay bool // Never appears in result - inDecomp bool // Some decompositions result in this char. - decomp Decomposition - expandedDecomp Decomposition -} - -func (f FormInfo) String() string { - buf := bytes.NewBuffer(make([]byte, 0)) - - fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed]) - fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed]) - fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward) - fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward) - fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay) - fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp) - fmt.Fprintf(buf, " decomposition: %X\n", f.decomp) - fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp) - - return buf.String() -} - -type Decomposition []rune - -func parseDecomposition(s string, skipfirst bool) (a []rune, err error) { - decomp := strings.Split(s, " ") - if len(decomp) > 0 && skipfirst { - decomp = decomp[1:] - } - for _, d := range decomp { - point, err := strconv.ParseUint(d, 16, 64) - if err != nil { - return a, err - } - a = append(a, rune(point)) - } - return a, nil -} - -func loadUnicodeData() { - f := gen.OpenUCDFile("UnicodeData.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - r := p.Rune(ucd.CodePoint) - char := &chars[r] - - char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) - decmap := p.String(ucd.DecompMapping) - - exp, err := parseDecomposition(decmap, false) - isCompat := false - if err != nil { - if len(decmap) > 0 { - exp, err = parseDecomposition(decmap, true) - if err != nil { - log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) - } - isCompat = true - } - } - - char.name = p.String(ucd.Name) - char.codePoint = r - char.forms[FCompatibility].decomp = exp - if !isCompat { - char.forms[FCanonical].decomp = exp - } else { - char.compatDecomp = true - } - if len(decmap) > 0 { - char.forms[FCompatibility].decomp = exp - } - } - if err := p.Err(); err != nil { - log.Fatal(err) - } -} - -// compactCCC converts the sparse set of CCC values to a continguous one, -// reducing the number of bits needed from 8 to 6. -func compactCCC() { - m := make(map[uint8]uint8) - for i := range chars { - c := &chars[i] - m[c.ccc] = 0 - } - cccs := []int{} - for v, _ := range m { - cccs = append(cccs, int(v)) - } - sort.Ints(cccs) - for i, c := range cccs { - cccMap[uint8(i)] = uint8(c) - m[uint8(c)] = uint8(i) - } - for i := range chars { - c := &chars[i] - c.origCCC = c.ccc - c.ccc = m[c.ccc] - } - if len(m) >= 1<<6 { - log.Fatalf("too many difference CCC values: %d >= 64", len(m)) - } -} - -// CompositionExclusions.txt has form: -// 0958 # ... -// See https://unicode.org/reports/tr44/ for full explanation -func loadCompositionExclusions() { - f := gen.OpenUCDFile("CompositionExclusions.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - c := &chars[p.Rune(0)] - if c.excludeInComp { - log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) - } - c.excludeInComp = true - } - if e := p.Err(); e != nil { - log.Fatal(e) - } -} - -// hasCompatDecomp returns true if any of the recursive -// decompositions contains a compatibility expansion. -// In this case, the character may not occur in NFK*. -func hasCompatDecomp(r rune) bool { - c := &chars[r] - if c.compatDecomp { - return true - } - for _, d := range c.forms[FCompatibility].decomp { - if hasCompatDecomp(d) { - return true - } - } - return false -} - -// Hangul related constants. -const ( - HangulBase = 0xAC00 - HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28) - - JamoLBase = 0x1100 - JamoLEnd = 0x1113 - JamoVBase = 0x1161 - JamoVEnd = 0x1176 - JamoTBase = 0x11A8 - JamoTEnd = 0x11C3 - - JamoLVTCount = 19 * 21 * 28 - JamoTCount = 28 -) - -func isHangul(r rune) bool { - return HangulBase <= r && r < HangulEnd -} - -func isHangulWithoutJamoT(r rune) bool { - if !isHangul(r) { - return false - } - r -= HangulBase - return r < JamoLVTCount && r%JamoTCount == 0 -} - -func ccc(r rune) uint8 { - return chars[r].ccc -} - -// Insert a rune in a buffer, ordered by Canonical Combining Class. -func insertOrdered(b Decomposition, r rune) Decomposition { - n := len(b) - b = append(b, 0) - cc := ccc(r) - if cc > 0 { - // Use bubble sort. - for ; n > 0; n-- { - if ccc(b[n-1]) <= cc { - break - } - b[n] = b[n-1] - } - } - b[n] = r - return b -} - -// Recursively decompose. -func decomposeRecursive(form int, r rune, d Decomposition) Decomposition { - dcomp := chars[r].forms[form].decomp - if len(dcomp) == 0 { - return insertOrdered(d, r) - } - for _, c := range dcomp { - d = decomposeRecursive(form, c, d) - } - return d -} - -func completeCharFields(form int) { - // Phase 0: pre-expand decomposition. - for i := range chars { - f := &chars[i].forms[form] - if len(f.decomp) == 0 { - continue - } - exp := make(Decomposition, 0) - for _, c := range f.decomp { - exp = decomposeRecursive(form, c, exp) - } - f.expandedDecomp = exp - } - - // Phase 1: composition exclusion, mark decomposition. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - // Marks script-specific exclusions and version restricted. - f.isOneWay = c.excludeInComp - - // Singletons - f.isOneWay = f.isOneWay || len(f.decomp) == 1 - - // Non-starter decompositions - if len(f.decomp) > 1 { - chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0 - f.isOneWay = f.isOneWay || chk - } - - // Runes that decompose into more than two runes. - f.isOneWay = f.isOneWay || len(f.decomp) > 2 - - if form == FCompatibility { - f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint) - } - - for _, r := range f.decomp { - chars[r].forms[form].inDecomp = true - } - } - - // Phase 2: forward and backward combining. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - if !f.isOneWay && len(f.decomp) == 2 { - f0 := &chars[f.decomp[0]].forms[form] - f1 := &chars[f.decomp[1]].forms[form] - if !f0.isOneWay { - f0.combinesForward = true - } - if !f1.isOneWay { - f1.combinesBackward = true - } - } - if isHangulWithoutJamoT(rune(i)) { - f.combinesForward = true - } - } - - // Phase 3: quick check values. - for i := range chars { - c := &chars[i] - f := &c.forms[form] - - switch { - case len(f.decomp) > 0: - f.quickCheck[MDecomposed] = QCNo - case isHangul(rune(i)): - f.quickCheck[MDecomposed] = QCNo - default: - f.quickCheck[MDecomposed] = QCYes - } - switch { - case f.isOneWay: - f.quickCheck[MComposed] = QCNo - case (i & 0xffff00) == JamoLBase: - f.quickCheck[MComposed] = QCYes - if JamoLBase <= i && i < JamoLEnd { - f.combinesForward = true - } - if JamoVBase <= i && i < JamoVEnd { - f.quickCheck[MComposed] = QCMaybe - f.combinesBackward = true - f.combinesForward = true - } - if JamoTBase <= i && i < JamoTEnd { - f.quickCheck[MComposed] = QCMaybe - f.combinesBackward = true - } - case !f.combinesBackward: - f.quickCheck[MComposed] = QCYes - default: - f.quickCheck[MComposed] = QCMaybe - } - } -} - -func computeNonStarterCounts() { - // Phase 4: leading and trailing non-starter count - for i := range chars { - c := &chars[i] - - runes := []rune{rune(i)} - // We always use FCompatibility so that the CGJ insertion points do not - // change for repeated normalizations with different forms. - if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 { - runes = exp - } - // We consider runes that combine backwards to be non-starters for the - // purpose of Stream-Safe Text Processing. - for _, r := range runes { - if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { - break - } - c.nLeadingNonStarters++ - } - for i := len(runes) - 1; i >= 0; i-- { - if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { - break - } - c.nTrailingNonStarters++ - } - if c.nTrailingNonStarters > 3 { - log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes) - } - - if isHangul(rune(i)) { - c.nTrailingNonStarters = 2 - if isHangulWithoutJamoT(rune(i)) { - c.nTrailingNonStarters = 1 - } - } - - if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t { - log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t) - } - if t := c.nTrailingNonStarters; t > 3 { - log.Fatalf("%U: number of trailing non-starters is %d > 3", t) - } - } -} - -func printBytes(w io.Writer, b []byte, name string) { - fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b)) - fmt.Fprintf(w, "var %s = [...]byte {", name) - for i, c := range b { - switch { - case i%64 == 0: - fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63) - case i%8 == 0: - fmt.Fprintf(w, "\n") - } - fmt.Fprintf(w, "0x%.2X, ", c) - } - fmt.Fprint(w, "\n}\n\n") -} - -// See forminfo.go for format. -func makeEntry(f *FormInfo, c *Char) uint16 { - e := uint16(0) - if r := c.codePoint; HangulBase <= r && r < HangulEnd { - e |= 0x40 - } - if f.combinesForward { - e |= 0x20 - } - if f.quickCheck[MDecomposed] == QCNo { - e |= 0x4 - } - switch f.quickCheck[MComposed] { - case QCYes: - case QCNo: - e |= 0x10 - case QCMaybe: - e |= 0x18 - default: - log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed]) - } - e |= uint16(c.nTrailingNonStarters) - return e -} - -// decompSet keeps track of unique decompositions, grouped by whether -// the decomposition is followed by a trailing and/or leading CCC. -type decompSet [7]map[string]bool - -const ( - normalDecomp = iota - firstMulti - firstCCC - endMulti - firstLeadingCCC - firstCCCZeroExcept - firstStarterWithNLead - lastDecomp -) - -var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"} - -func makeDecompSet() decompSet { - m := decompSet{} - for i := range m { - m[i] = make(map[string]bool) - } - return m -} -func (m *decompSet) insert(key int, s string) { - m[key][s] = true -} - -func printCharInfoTables(w io.Writer) int { - mkstr := func(r rune, f *FormInfo) (int, string) { - d := f.expandedDecomp - s := string([]rune(d)) - if max := 1 << 6; len(s) >= max { - const msg = "%U: too many bytes in decomposition: %d >= %d" - log.Fatalf(msg, r, len(s), max) - } - head := uint8(len(s)) - if f.quickCheck[MComposed] != QCYes { - head |= 0x40 - } - if f.combinesForward { - head |= 0x80 - } - s = string([]byte{head}) + s - - lccc := ccc(d[0]) - tccc := ccc(d[len(d)-1]) - cc := ccc(r) - if cc != 0 && lccc == 0 && tccc == 0 { - log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc) - } - if tccc < lccc && lccc != 0 { - const msg = "%U: lccc (%d) must be <= tcc (%d)" - log.Fatalf(msg, r, lccc, tccc) - } - index := normalDecomp - nTrail := chars[r].nTrailingNonStarters - nLead := chars[r].nLeadingNonStarters - if tccc > 0 || lccc > 0 || nTrail > 0 { - tccc <<= 2 - tccc |= nTrail - s += string([]byte{tccc}) - index = endMulti - for _, r := range d[1:] { - if ccc(r) == 0 { - index = firstCCC - } - } - if lccc > 0 || nLead > 0 { - s += string([]byte{lccc}) - if index == firstCCC { - log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r) - } - index = firstLeadingCCC - } - if cc != lccc { - if cc != 0 { - log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc) - } - index = firstCCCZeroExcept - } - } else if len(d) > 1 { - index = firstMulti - } - return index, s - } - - decompSet := makeDecompSet() - const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail. - decompSet.insert(firstStarterWithNLead, nLeadStr) - - // Store the uniqued decompositions in a byte buffer, - // preceded by their byte length. - for _, c := range chars { - for _, f := range c.forms { - if len(f.expandedDecomp) == 0 { - continue - } - if f.combinesBackward { - log.Fatalf("%U: combinesBackward and decompose", c.codePoint) - } - index, s := mkstr(c.codePoint, &f) - decompSet.insert(index, s) - } - } - - decompositions := bytes.NewBuffer(make([]byte, 0, 10000)) - size := 0 - positionMap := make(map[string]uint16) - decompositions.WriteString("\000") - fmt.Fprintln(w, "const (") - for i, m := range decompSet { - sa := []string{} - for s := range m { - sa = append(sa, s) - } - sort.Strings(sa) - for _, s := range sa { - p := decompositions.Len() - decompositions.WriteString(s) - positionMap[s] = uint16(p) - } - if cname[i] != "" { - fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len()) - } - } - fmt.Fprintln(w, "maxDecomp = 0x8000") - fmt.Fprintln(w, ")") - b := decompositions.Bytes() - printBytes(w, b, "decomps") - size += len(b) - - varnames := []string{"nfc", "nfkc"} - for i := 0; i < FNumberOfFormTypes; i++ { - trie := triegen.NewTrie(varnames[i]) - - for r, c := range chars { - f := c.forms[i] - d := f.expandedDecomp - if len(d) != 0 { - _, key := mkstr(c.codePoint, &f) - trie.Insert(rune(r), uint64(positionMap[key])) - if c.ccc != ccc(d[0]) { - // We assume the lead ccc of a decomposition !=0 in this case. - if ccc(d[0]) == 0 { - log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc) - } - } - } else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward { - // Handle cases where it can't be detected that the nLead should be equal - // to nTrail. - trie.Insert(c.codePoint, uint64(positionMap[nLeadStr])) - } else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 { - trie.Insert(c.codePoint, uint64(0x8000|v)) - } - } - sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]})) - if err != nil { - log.Fatal(err) - } - size += sz - } - return size -} - -func contains(sa []string, s string) bool { - for _, a := range sa { - if a == s { - return true - } - } - return false -} - -func makeTables() { - w := &bytes.Buffer{} - - size := 0 - if *tablelist == "" { - return - } - list := strings.Split(*tablelist, ",") - if *tablelist == "all" { - list = []string{"recomp", "info"} - } - - // Compute maximum decomposition size. - max := 0 - for _, c := range chars { - if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { - max = n - } - } - fmt.Fprintln(w, `import "sync"`) - fmt.Fprintln(w) - - fmt.Fprintln(w, "const (") - fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") - fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) - fmt.Fprintln(w) - fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") - fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") - fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") - fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") - fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) - fmt.Fprintln(w, ")\n") - - // Print the CCC remap table. - size += len(cccMap) - fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) - for i := 0; i < len(cccMap); i++ { - if i%8 == 0 { - fmt.Fprintln(w) - } - fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) - } - fmt.Fprintln(w, "\n}\n") - - if contains(list, "info") { - size += printCharInfoTables(w) - } - - if contains(list, "recomp") { - // Note that we use 32 bit keys, instead of 64 bit. - // This clips the bits of three entries, but we know - // this won't cause a collision. The compiler will catch - // any changes made to UnicodeData.txt that introduces - // a collision. - // Note that the recomposition map for NFC and NFKC - // are identical. - - // Recomposition map - nrentries := 0 - for _, c := range chars { - f := c.forms[FCanonical] - if !f.isOneWay && len(f.decomp) > 0 { - nrentries++ - } - } - sz := nrentries * 8 - size += sz - fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) - fmt.Fprintln(w, "var recompMap map[uint32]rune") - fmt.Fprintln(w, "var recompMapOnce sync.Once\n") - fmt.Fprintln(w, `const recompMapPacked = "" +`) - var buf [8]byte - for i, c := range chars { - f := c.forms[FCanonical] - d := f.decomp - if !f.isOneWay && len(d) > 0 { - key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) - binary.BigEndian.PutUint32(buf[:4], key) - binary.BigEndian.PutUint32(buf[4:], uint32(i)) - fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i)) - } - } - // hack so we don't have to special case the trailing plus sign - fmt.Fprintf(w, ` ""`) - fmt.Fprintln(w) - } - - fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) - gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes()) -} - -func printChars() { - if *verbose { - for _, c := range chars { - if !c.isValid() || c.state == SMissing { - continue - } - fmt.Println(c) - } - } -} - -// verifyComputed does various consistency tests. -func verifyComputed() { - for i, c := range chars { - for _, f := range c.forms { - isNo := (f.quickCheck[MDecomposed] == QCNo) - if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) { - log.Fatalf("%U: NF*D QC must be No if rune decomposes", i) - } - - isMaybe := f.quickCheck[MComposed] == QCMaybe - if f.combinesBackward != isMaybe { - log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i) - } - if len(f.decomp) > 0 && f.combinesForward && isMaybe { - log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i) - } - - if len(f.expandedDecomp) != 0 { - continue - } - if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b { - // We accept these runes to be treated differently (it only affects - // segment breaking in iteration, most likely on improper use), but - // reconsider if more characters are added. - // U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;; - // U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;; - // U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;; - // U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;; - // U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;; - // U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;; - if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) { - log.Fatalf("%U: nLead was %v; want %v", i, a, b) - } - } - } - nfc := c.forms[FCanonical] - nfkc := c.forms[FCompatibility] - if nfc.combinesBackward != nfkc.combinesBackward { - log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint) - } - } -} - -// Use values in DerivedNormalizationProps.txt to compare against the -// values we computed. -// DerivedNormalizationProps.txt has form: -// 00C0..00C5 ; NFD_QC; N # ... -// 0374 ; NFD_QC; N # ... -// See https://unicode.org/reports/tr44/ for full explanation -func testDerived() { - f := gen.OpenUCDFile("DerivedNormalizationProps.txt") - defer f.Close() - p := ucd.New(f) - for p.Next() { - r := p.Rune(0) - c := &chars[r] - - var ftype, mode int - qt := p.String(1) - switch qt { - case "NFC_QC": - ftype, mode = FCanonical, MComposed - case "NFD_QC": - ftype, mode = FCanonical, MDecomposed - case "NFKC_QC": - ftype, mode = FCompatibility, MComposed - case "NFKD_QC": - ftype, mode = FCompatibility, MDecomposed - default: - continue - } - var qr QCResult - switch p.String(2) { - case "Y": - qr = QCYes - case "N": - qr = QCNo - case "M": - qr = QCMaybe - default: - log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) - } - if got := c.forms[ftype].quickCheck[mode]; got != qr { - log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) - } - c.forms[ftype].verified[mode] = true - } - if err := p.Err(); err != nil { - log.Fatal(err) - } - // Any unspecified value must be QCYes. Verify this. - for i, c := range chars { - for j, fd := range c.forms { - for k, qr := range fd.quickCheck { - if !fd.verified[k] && qr != QCYes { - m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" - log.Printf(m, i, j, k, qr, c.name) - } - } - } - } -} - -var testHeader = `const ( - Yes = iota - No - Maybe -) - -type formData struct { - qc uint8 - combinesForward bool - decomposition string -} - -type runeData struct { - r rune - ccc uint8 - nLead uint8 - nTrail uint8 - f [2]formData // 0: canonical; 1: compatibility -} - -func f(qc uint8, cf bool, dec string) [2]formData { - return [2]formData{{qc, cf, dec}, {qc, cf, dec}} -} - -func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData { - return [2]formData{{qc, cf, d}, {qck, cfk, dk}} -} - -var testData = []runeData{ -` - -func printTestdata() { - type lastInfo struct { - ccc uint8 - nLead uint8 - nTrail uint8 - f string - } - - last := lastInfo{} - w := &bytes.Buffer{} - fmt.Fprintf(w, testHeader) - for r, c := range chars { - f := c.forms[FCanonical] - qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) - f = c.forms[FCompatibility] - qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) - s := "" - if d == dk && qc == qck && cf == cfk { - s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d) - } else { - s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk) - } - current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s} - if last != current { - fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s) - last = current - } - } - fmt.Fprintln(w, "}") - gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes()) -} diff --git a/vendor/golang.org/x/text/unicode/norm/triegen.go b/vendor/golang.org/x/text/unicode/norm/triegen.go deleted file mode 100644 index 45d71190..00000000 --- a/vendor/golang.org/x/text/unicode/norm/triegen.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Trie table generator. -// Used by make*tables tools to generate a go file with trie data structures -// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte -// sequence are used to lookup offsets in the index table to be used for the -// next byte. The last byte is used to index into a table with 16-bit values. - -package main - -import ( - "fmt" - "io" -) - -const maxSparseEntries = 16 - -type normCompacter struct { - sparseBlocks [][]uint64 - sparseOffset []uint16 - sparseCount int - name string -} - -func mostFrequentStride(a []uint64) int { - counts := make(map[int]int) - var v int - for _, x := range a { - if stride := int(x) - v; v != 0 && stride >= 0 { - counts[stride]++ - } - v = int(x) - } - var maxs, maxc int - for stride, cnt := range counts { - if cnt > maxc || (cnt == maxc && stride < maxs) { - maxs, maxc = stride, cnt - } - } - return maxs -} - -func countSparseEntries(a []uint64) int { - stride := mostFrequentStride(a) - var v, count int - for _, tv := range a { - if int(tv)-v != stride { - if tv != 0 { - count++ - } - } - v = int(tv) - } - return count -} - -func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { - if n := countSparseEntries(v); n <= maxSparseEntries { - return (n+1)*4 + 2, true - } - return 0, false -} - -func (c *normCompacter) Store(v []uint64) uint32 { - h := uint32(len(c.sparseOffset)) - c.sparseBlocks = append(c.sparseBlocks, v) - c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) - c.sparseCount += countSparseEntries(v) + 1 - return h -} - -func (c *normCompacter) Handler() string { - return c.name + "Sparse.lookup" -} - -func (c *normCompacter) Print(w io.Writer) (retErr error) { - p := func(f string, x ...interface{}) { - if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { - retErr = err - } - } - - ls := len(c.sparseBlocks) - p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) - p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) - - ns := c.sparseCount - p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) - p("var %sSparseValues = [%d]valueRange {", c.name, ns) - for i, b := range c.sparseBlocks { - p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) - var v int - stride := mostFrequentStride(b) - n := countSparseEntries(b) - p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) - for i, nv := range b { - if int(nv)-v != stride { - if v != 0 { - p(",hi:%#02x},", 0x80+i-1) - } - if nv != 0 { - p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) - } - } - v = int(nv) - } - if v != 0 { - p(",hi:%#02x},", 0x80+len(b)-1) - } - } - p("\n}\n\n") - return -} diff --git a/vendor/golang.org/x/text/unicode/rangetable/gen.go b/vendor/golang.org/x/text/unicode/rangetable/gen.go deleted file mode 100644 index c2d36741..00000000 --- a/vendor/golang.org/x/text/unicode/rangetable/gen.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "flag" - "fmt" - "io" - "log" - "reflect" - "strings" - "unicode" - - "golang.org/x/text/collate" - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/ucd" - "golang.org/x/text/language" - "golang.org/x/text/unicode/rangetable" -) - -var versionList = flag.String("versions", "", - "list of versions for which to generate RangeTables") - -const bootstrapMessage = `No versions specified. -To bootstrap the code generation, run: - go run gen.go --versions=4.1.0,5.0.0,6.0.0,6.1.0,6.2.0,6.3.0,7.0.0 - -and ensure that the latest versions are included by checking: - https://www.unicode.org/Public/` - -func getVersions() []string { - if *versionList == "" { - log.Fatal(bootstrapMessage) - } - - c := collate.New(language.Und, collate.Numeric) - versions := strings.Split(*versionList, ",") - c.SortStrings(versions) - - // Ensure that at least the current version is included. - for _, v := range versions { - if v == gen.UnicodeVersion() { - return versions - } - } - - versions = append(versions, gen.UnicodeVersion()) - c.SortStrings(versions) - return versions -} - -func main() { - gen.Init() - - versions := getVersions() - - w := &bytes.Buffer{} - - fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ",")) - fmt.Fprintf(w, "import \"unicode\"\n\n") - - vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) } - - fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n") - for _, v := range versions { - fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v)) - } - fmt.Fprintf(w, "}\n\n") - - var size int - for _, v := range versions { - assigned := []rune{} - - r := gen.Open("https://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt") - ucd.Parse(r, func(p *ucd.Parser) { - assigned = append(assigned, p.Rune(0)) - }) - - rt := rangetable.New(assigned...) - sz := int(reflect.TypeOf(unicode.RangeTable{}).Size()) - sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16) - sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32) - - fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024) - fmt.Fprintf(w, "var assigned%s = ", vstr(v)) - print(w, rt) - - size += sz - } - - fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024) - - gen.WriteVersionedGoFile("tables.go", "rangetable", w.Bytes()) -} - -func print(w io.Writer, rt *unicode.RangeTable) { - fmt.Fprintln(w, "&unicode.RangeTable{") - fmt.Fprintln(w, "\tR16: []unicode.Range16{") - for _, r := range rt.R16 { - fmt.Fprintf(w, "\t\t{%#04x, %#04x, %d},\n", r.Lo, r.Hi, r.Stride) - } - fmt.Fprintln(w, "\t},") - fmt.Fprintln(w, "\tR32: []unicode.Range32{") - for _, r := range rt.R32 { - fmt.Fprintf(w, "\t\t{%#08x, %#08x, %d},\n", r.Lo, r.Hi, r.Stride) - } - fmt.Fprintln(w, "\t},") - fmt.Fprintf(w, "\tLatinOffset: %d,\n", rt.LatinOffset) - fmt.Fprintf(w, "}\n\n") -} |