summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/language/gen.go
diff options
context:
space:
mode:
authorWim <wim@42.be>2018-05-11 21:54:32 +0200
committerWim <wim@42.be>2018-05-11 21:54:32 +0200
commitbf0b9959d1b81d6362a390baaee70f2102fb2d58 (patch)
tree61f2e71367884883cb9f68d7ac55cdcbb5dce908 /vendor/golang.org/x/text/language/gen.go
parent406a54b597271add1e297231e6f260895eff8f1d (diff)
downloadmatterbridge-msglm-bf0b9959d1b81d6362a390baaee70f2102fb2d58.tar.gz
matterbridge-msglm-bf0b9959d1b81d6362a390baaee70f2102fb2d58.tar.bz2
matterbridge-msglm-bf0b9959d1b81d6362a390baaee70f2102fb2d58.zip
Add vendor github.com/dfordsoft/golib/ic
Diffstat (limited to 'vendor/golang.org/x/text/language/gen.go')
-rw-r--r--vendor/golang.org/x/text/language/gen.go305
1 files changed, 305 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/language/gen.go b/vendor/golang.org/x/text/language/gen.go
new file mode 100644
index 00000000..3004eb42
--- /dev/null
+++ b/vendor/golang.org/x/text/language/gen.go
@@ -0,0 +1,305 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Language tag table generator.
+// Data read from the web.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "sort"
+ "strconv"
+ "strings"
+
+ "golang.org/x/text/internal/gen"
+ "golang.org/x/text/internal/language"
+ "golang.org/x/text/unicode/cldr"
+)
+
+var (
+ test = flag.Bool("test",
+ false,
+ "test existing tables; can be used to compare web data with package data.")
+ outputFile = flag.String("output",
+ "tables.go",
+ "output file for generated tables")
+)
+
+func main() {
+ gen.Init()
+
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile("tables.go", "language")
+
+ b := newBuilder(w)
+ gen.WriteCLDRVersion(w)
+
+ b.writeConstants()
+ b.writeMatchData()
+}
+
+type builder struct {
+ w *gen.CodeWriter
+ hw io.Writer // MultiWriter for w and w.Hash
+ data *cldr.CLDR
+ supp *cldr.SupplementalData
+}
+
+func (b *builder) langIndex(s string) uint16 {
+ return uint16(language.MustParseBase(s))
+}
+
+func (b *builder) regionIndex(s string) int {
+ return int(language.MustParseRegion(s))
+}
+
+func (b *builder) scriptIndex(s string) int {
+ return int(language.MustParseScript(s))
+}
+
+func newBuilder(w *gen.CodeWriter) *builder {
+ r := gen.OpenCLDRCoreZip()
+ defer r.Close()
+ d := &cldr.Decoder{}
+ data, err := d.DecodeZip(r)
+ if err != nil {
+ log.Fatal(err)
+ }
+ b := builder{
+ w: w,
+ hw: io.MultiWriter(w, w.Hash),
+ data: data,
+ supp: data.Supplemental(),
+ }
+ return &b
+}
+
+// writeConsts computes f(v) for all v in values and writes the results
+// as constants named _v to a single constant block.
+func (b *builder) writeConsts(f func(string) int, values ...string) {
+ fmt.Fprintln(b.w, "const (")
+ for _, v := range values {
+ fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
+ }
+ fmt.Fprintln(b.w, ")")
+}
+
+// TODO: region inclusion data will probably not be use used in future matchers.
+
+var langConsts = []string{
+ "de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
+}
+
+var scriptConsts = []string{
+ "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
+ "Zzzz",
+}
+
+var regionConsts = []string{
+ "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
+ "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
+}
+
+func (b *builder) writeConstants() {
+ b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
+ b.writeConsts(b.regionIndex, regionConsts...)
+ b.writeConsts(b.scriptIndex, scriptConsts...)
+}
+
+type mutualIntelligibility struct {
+ want, have uint16
+ distance uint8
+ oneway bool
+}
+
+type scriptIntelligibility struct {
+ wantLang, haveLang uint16
+ wantScript, haveScript uint8
+ distance uint8
+ // Always oneway
+}
+
+type regionIntelligibility struct {
+ lang uint16 // compact language id
+ script uint8 // 0 means any
+ group uint8 // 0 means any; if bit 7 is set it means inverse
+ distance uint8
+ // Always twoway.
+}
+
+// writeMatchData writes tables with languages and scripts for which there is
+// mutual intelligibility. The data is based on CLDR's languageMatching data.
+// Note that we use a different algorithm than the one defined by CLDR and that
+// we slightly modify the data. For example, we convert scores to confidence levels.
+// We also drop all region-related data as we use a different algorithm to
+// determine region equivalence.
+func (b *builder) writeMatchData() {
+ lm := b.supp.LanguageMatching.LanguageMatches
+ cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
+
+ regionHierarchy := map[string][]string{}
+ for _, g := range b.supp.TerritoryContainment.Group {
+ regions := strings.Split(g.Contains, " ")
+ regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
+ }
+ regionToGroups := make([]uint8, language.NumRegions)
+
+ idToIndex := map[string]uint8{}
+ for i, mv := range lm[0].MatchVariable {
+ if i > 6 {
+ log.Fatalf("Too many groups: %d", i)
+ }
+ idToIndex[mv.Id] = uint8(i + 1)
+ // TODO: also handle '-'
+ for _, r := range strings.Split(mv.Value, "+") {
+ todo := []string{r}
+ for k := 0; k < len(todo); k++ {
+ r := todo[k]
+ regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
+ todo = append(todo, regionHierarchy[r]...)
+ }
+ }
+ }
+ b.w.WriteVar("regionToGroups", regionToGroups)
+
+ // maps language id to in- and out-of-group region.
+ paradigmLocales := [][3]uint16{}
+ locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
+ for i := 0; i < len(locales); i += 2 {
+ x := [3]uint16{}
+ for j := 0; j < 2; j++ {
+ pc := strings.SplitN(locales[i+j], "-", 2)
+ x[0] = b.langIndex(pc[0])
+ if len(pc) == 2 {
+ x[1+j] = uint16(b.regionIndex(pc[1]))
+ }
+ }
+ paradigmLocales = append(paradigmLocales, x)
+ }
+ b.w.WriteVar("paradigmLocales", paradigmLocales)
+
+ b.w.WriteType(mutualIntelligibility{})
+ b.w.WriteType(scriptIntelligibility{})
+ b.w.WriteType(regionIntelligibility{})
+
+ matchLang := []mutualIntelligibility{}
+ matchScript := []scriptIntelligibility{}
+ matchRegion := []regionIntelligibility{}
+ // Convert the languageMatch entries in lists keyed by desired language.
+ for _, m := range lm[0].LanguageMatch {
+ // Different versions of CLDR use different separators.
+ desired := strings.Replace(m.Desired, "-", "_", -1)
+ supported := strings.Replace(m.Supported, "-", "_", -1)
+ d := strings.Split(desired, "_")
+ s := strings.Split(supported, "_")
+ if len(d) != len(s) {
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ continue
+ }
+ distance, _ := strconv.ParseInt(m.Distance, 10, 8)
+ switch len(d) {
+ case 2:
+ if desired == supported && desired == "*_*" {
+ continue
+ }
+ // language-script pair.
+ matchScript = append(matchScript, scriptIntelligibility{
+ wantLang: uint16(b.langIndex(d[0])),
+ haveLang: uint16(b.langIndex(s[0])),
+ wantScript: uint8(b.scriptIndex(d[1])),
+ haveScript: uint8(b.scriptIndex(s[1])),
+ distance: uint8(distance),
+ })
+ if m.Oneway != "true" {
+ matchScript = append(matchScript, scriptIntelligibility{
+ wantLang: uint16(b.langIndex(s[0])),
+ haveLang: uint16(b.langIndex(d[0])),
+ wantScript: uint8(b.scriptIndex(s[1])),
+ haveScript: uint8(b.scriptIndex(d[1])),
+ distance: uint8(distance),
+ })
+ }
+ case 1:
+ if desired == supported && desired == "*" {
+ continue
+ }
+ if distance == 1 {
+ // nb == no is already handled by macro mapping. Check there
+ // really is only this case.
+ if d[0] != "no" || s[0] != "nb" {
+ log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
+ }
+ continue
+ }
+ // TODO: consider dropping oneway field and just doubling the entry.
+ matchLang = append(matchLang, mutualIntelligibility{
+ want: uint16(b.langIndex(d[0])),
+ have: uint16(b.langIndex(s[0])),
+ distance: uint8(distance),
+ oneway: m.Oneway == "true",
+ })
+ case 3:
+ if desired == supported && desired == "*_*_*" {
+ continue
+ }
+ if desired != supported {
+ // This is now supported by CLDR, but only one case, which
+ // should already be covered by paradigm locales. For instance,
+ // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
+ // testdata/CLDRLocaleMatcherTest.txt tests this.
+ if supported != "en_*_GB" {
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ }
+ continue
+ }
+ ri := regionIntelligibility{
+ lang: b.langIndex(d[0]),
+ distance: uint8(distance),
+ }
+ if d[1] != "*" {
+ ri.script = uint8(b.scriptIndex(d[1]))
+ }
+ switch {
+ case d[2] == "*":
+ ri.group = 0x80 // not contained in anything
+ case strings.HasPrefix(d[2], "$!"):
+ ri.group = 0x80
+ d[2] = "$" + d[2][len("$!"):]
+ fallthrough
+ case strings.HasPrefix(d[2], "$"):
+ ri.group |= idToIndex[d[2]]
+ }
+ matchRegion = append(matchRegion, ri)
+ default:
+ log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+ }
+ }
+ sort.SliceStable(matchLang, func(i, j int) bool {
+ return matchLang[i].distance < matchLang[j].distance
+ })
+ b.w.WriteComment(`
+ matchLang holds pairs of langIDs of base languages that are typically
+ mutually intelligible. Each pair is associated with a confidence and
+ whether the intelligibility goes one or both ways.`)
+ b.w.WriteVar("matchLang", matchLang)
+
+ b.w.WriteComment(`
+ matchScript holds pairs of scriptIDs where readers of one script
+ can typically also read the other. Each is associated with a confidence.`)
+ sort.SliceStable(matchScript, func(i, j int) bool {
+ return matchScript[i].distance < matchScript[j].distance
+ })
+ b.w.WriteVar("matchScript", matchScript)
+
+ sort.SliceStable(matchRegion, func(i, j int) bool {
+ return matchRegion[i].distance < matchRegion[j].distance
+ })
+ b.w.WriteVar("matchRegion", matchRegion)
+}