diff options
Diffstat (limited to 'vendor/golang.org/x/text/internal/language/lookup.go')
-rw-r--r-- | vendor/golang.org/x/text/internal/language/lookup.go | 412 |
1 files changed, 0 insertions, 412 deletions
diff --git a/vendor/golang.org/x/text/internal/language/lookup.go b/vendor/golang.org/x/text/internal/language/lookup.go deleted file mode 100644 index 6294b815..00000000 --- a/vendor/golang.org/x/text/internal/language/lookup.go +++ /dev/null @@ -1,412 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package language - -import ( - "bytes" - "fmt" - "sort" - "strconv" - - "golang.org/x/text/internal/tag" -) - -// findIndex tries to find the given tag in idx and returns a standardized error -// if it could not be found. -func findIndex(idx tag.Index, key []byte, form string) (index int, err error) { - if !tag.FixCase(form, key) { - return 0, ErrSyntax - } - i := idx.Index(key) - if i == -1 { - return 0, NewValueError(key) - } - return i, nil -} - -func searchUint(imap []uint16, key uint16) int { - return sort.Search(len(imap), func(i int) bool { - return imap[i] >= key - }) -} - -type Language uint16 - -// getLangID returns the langID of s if s is a canonical subtag -// or langUnknown if s is not a canonical subtag. -func getLangID(s []byte) (Language, error) { - if len(s) == 2 { - return getLangISO2(s) - } - return getLangISO3(s) -} - -// TODO language normalization as well as the AliasMaps could be moved to the -// higher level package, but it is a bit tricky to separate the generation. - -func (id Language) Canonicalize() (Language, AliasType) { - return normLang(id) -} - -// mapLang returns the mapped langID of id according to mapping m. -func normLang(id Language) (Language, AliasType) { - k := sort.Search(len(AliasMap), func(i int) bool { - return AliasMap[i].From >= uint16(id) - }) - if k < len(AliasMap) && AliasMap[k].From == uint16(id) { - return Language(AliasMap[k].To), AliasTypes[k] - } - return id, AliasTypeUnknown -} - -// getLangISO2 returns the langID for the given 2-letter ISO language code -// or unknownLang if this does not exist. -func getLangISO2(s []byte) (Language, error) { - if !tag.FixCase("zz", s) { - return 0, ErrSyntax - } - if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 { - return Language(i), nil - } - return 0, NewValueError(s) -} - -const base = 'z' - 'a' + 1 - -func strToInt(s []byte) uint { - v := uint(0) - for i := 0; i < len(s); i++ { - v *= base - v += uint(s[i] - 'a') - } - return v -} - -// converts the given integer to the original ASCII string passed to strToInt. -// len(s) must match the number of characters obtained. -func intToStr(v uint, s []byte) { - for i := len(s) - 1; i >= 0; i-- { - s[i] = byte(v%base) + 'a' - v /= base - } -} - -// getLangISO3 returns the langID for the given 3-letter ISO language code -// or unknownLang if this does not exist. -func getLangISO3(s []byte) (Language, error) { - if tag.FixCase("und", s) { - // first try to match canonical 3-letter entries - for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) { - if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] { - // We treat "und" as special and always translate it to "unspecified". - // Note that ZZ and Zzzz are private use and are not treated as - // unspecified by default. - id := Language(i) - if id == nonCanonicalUnd { - return 0, nil - } - return id, nil - } - } - if i := altLangISO3.Index(s); i != -1 { - return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil - } - n := strToInt(s) - if langNoIndex[n/8]&(1<<(n%8)) != 0 { - return Language(n) + langNoIndexOffset, nil - } - // Check for non-canonical uses of ISO3. - for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) { - if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] { - return Language(i), nil - } - } - return 0, NewValueError(s) - } - return 0, ErrSyntax -} - -// StringToBuf writes the string to b and returns the number of bytes -// written. cap(b) must be >= 3. -func (id Language) StringToBuf(b []byte) int { - if id >= langNoIndexOffset { - intToStr(uint(id)-langNoIndexOffset, b[:3]) - return 3 - } else if id == 0 { - return copy(b, "und") - } - l := lang[id<<2:] - if l[3] == 0 { - return copy(b, l[:3]) - } - return copy(b, l[:2]) -} - -// String returns the BCP 47 representation of the langID. -// Use b as variable name, instead of id, to ensure the variable -// used is consistent with that of Base in which this type is embedded. -func (b Language) String() string { - if b == 0 { - return "und" - } else if b >= langNoIndexOffset { - b -= langNoIndexOffset - buf := [3]byte{} - intToStr(uint(b), buf[:]) - return string(buf[:]) - } - l := lang.Elem(int(b)) - if l[3] == 0 { - return l[:3] - } - return l[:2] -} - -// ISO3 returns the ISO 639-3 language code. -func (b Language) ISO3() string { - if b == 0 || b >= langNoIndexOffset { - return b.String() - } - l := lang.Elem(int(b)) - if l[3] == 0 { - return l[:3] - } else if l[2] == 0 { - return altLangISO3.Elem(int(l[3]))[:3] - } - // This allocation will only happen for 3-letter ISO codes - // that are non-canonical BCP 47 language identifiers. - return l[0:1] + l[2:4] -} - -// IsPrivateUse reports whether this language code is reserved for private use. -func (b Language) IsPrivateUse() bool { - return langPrivateStart <= b && b <= langPrivateEnd -} - -// SuppressScript returns the script marked as SuppressScript in the IANA -// language tag repository, or 0 if there is no such script. -func (b Language) SuppressScript() Script { - if b < langNoIndexOffset { - return Script(suppressScript[b]) - } - return 0 -} - -type Region uint16 - -// getRegionID returns the region id for s if s is a valid 2-letter region code -// or unknownRegion. -func getRegionID(s []byte) (Region, error) { - if len(s) == 3 { - if isAlpha(s[0]) { - return getRegionISO3(s) - } - if i, err := strconv.ParseUint(string(s), 10, 10); err == nil { - return getRegionM49(int(i)) - } - } - return getRegionISO2(s) -} - -// getRegionISO2 returns the regionID for the given 2-letter ISO country code -// or unknownRegion if this does not exist. -func getRegionISO2(s []byte) (Region, error) { - i, err := findIndex(regionISO, s, "ZZ") - if err != nil { - return 0, err - } - return Region(i) + isoRegionOffset, nil -} - -// getRegionISO3 returns the regionID for the given 3-letter ISO country code -// or unknownRegion if this does not exist. -func getRegionISO3(s []byte) (Region, error) { - if tag.FixCase("ZZZ", s) { - for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) { - if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] { - return Region(i) + isoRegionOffset, nil - } - } - for i := 0; i < len(altRegionISO3); i += 3 { - if tag.Compare(altRegionISO3[i:i+3], s) == 0 { - return Region(altRegionIDs[i/3]), nil - } - } - return 0, NewValueError(s) - } - return 0, ErrSyntax -} - -func getRegionM49(n int) (Region, error) { - if 0 < n && n <= 999 { - const ( - searchBits = 7 - regionBits = 9 - regionMask = 1<<regionBits - 1 - ) - idx := n >> searchBits - buf := fromM49[m49Index[idx]:m49Index[idx+1]] - val := uint16(n) << regionBits // we rely on bits shifting out - i := sort.Search(len(buf), func(i int) bool { - return buf[i] >= val - }) - if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val { - return Region(r & regionMask), nil - } - } - var e ValueError - fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n) - return 0, e -} - -// normRegion returns a region if r is deprecated or 0 otherwise. -// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ). -// TODO: consider mapping split up regions to new most populous one (like CLDR). -func normRegion(r Region) Region { - m := regionOldMap - k := sort.Search(len(m), func(i int) bool { - return m[i].From >= uint16(r) - }) - if k < len(m) && m[k].From == uint16(r) { - return Region(m[k].To) - } - return 0 -} - -const ( - iso3166UserAssigned = 1 << iota - ccTLD - bcp47Region -) - -func (r Region) typ() byte { - return regionTypes[r] -} - -// String returns the BCP 47 representation for the region. -// It returns "ZZ" for an unspecified region. -func (r Region) String() string { - if r < isoRegionOffset { - if r == 0 { - return "ZZ" - } - return fmt.Sprintf("%03d", r.M49()) - } - r -= isoRegionOffset - return regionISO.Elem(int(r))[:2] -} - -// ISO3 returns the 3-letter ISO code of r. -// Note that not all regions have a 3-letter ISO code. -// In such cases this method returns "ZZZ". -func (r Region) ISO3() string { - if r < isoRegionOffset { - return "ZZZ" - } - r -= isoRegionOffset - reg := regionISO.Elem(int(r)) - switch reg[2] { - case 0: - return altRegionISO3[reg[3]:][:3] - case ' ': - return "ZZZ" - } - return reg[0:1] + reg[2:4] -} - -// M49 returns the UN M.49 encoding of r, or 0 if this encoding -// is not defined for r. -func (r Region) M49() int { - return int(m49[r]) -} - -// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This -// may include private-use tags that are assigned by CLDR and used in this -// implementation. So IsPrivateUse and IsCountry can be simultaneously true. -func (r Region) IsPrivateUse() bool { - return r.typ()&iso3166UserAssigned != 0 -} - -type Script uint8 - -// getScriptID returns the script id for string s. It assumes that s -// is of the format [A-Z][a-z]{3}. -func getScriptID(idx tag.Index, s []byte) (Script, error) { - i, err := findIndex(idx, s, "Zzzz") - return Script(i), err -} - -// String returns the script code in title case. -// It returns "Zzzz" for an unspecified script. -func (s Script) String() string { - if s == 0 { - return "Zzzz" - } - return script.Elem(int(s)) -} - -// IsPrivateUse reports whether this script code is reserved for private use. -func (s Script) IsPrivateUse() bool { - return _Qaaa <= s && s <= _Qabx -} - -const ( - maxAltTaglen = len("en-US-POSIX") - maxLen = maxAltTaglen -) - -var ( - // grandfatheredMap holds a mapping from legacy and grandfathered tags to - // their base language or index to more elaborate tag. - grandfatheredMap = map[[maxLen]byte]int16{ - [maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban - [maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami - [maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn - [maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak - [maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon - [maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux - [maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo - [maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn - [maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao - [maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay - [maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu - [maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok - [maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn - [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR - [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL - [maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE - [maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu - [maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka - [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan - [maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang - - // Grandfathered tags with no modern replacement will be converted as - // follows: - [maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish - [maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed - [maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default - [maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian - [maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo - [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min - - // CLDR-specific tag. - [maxLen]byte{'r', 'o', 'o', 't'}: 0, // root - [maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX" - } - - altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102} - - altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix" -) - -func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) { - if v, ok := grandfatheredMap[s]; ok { - if v < 0 { - return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true - } - t.LangID = Language(v) - return t, true - } - return t, false -} |