diff options
Diffstat (limited to 'vendor/golang.org/x/net/idna/idna10.0.0.go')
-rw-r--r-- | vendor/golang.org/x/net/idna/idna10.0.0.go | 734 |
1 files changed, 734 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/idna/idna10.0.0.go b/vendor/golang.org/x/net/idna/idna10.0.0.go new file mode 100644 index 00000000..a98a31f4 --- /dev/null +++ b/vendor/golang.org/x/net/idna/idna10.0.0.go @@ -0,0 +1,734 @@ +// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.10 + +// Package idna implements IDNA2008 using the compatibility processing +// defined by UTS (Unicode Technical Standard) #46, which defines a standard to +// deal with the transition from IDNA2003. +// +// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC +// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. +// UTS #46 is defined in https://www.unicode.org/reports/tr46. +// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the +// differences between these two standards. +package idna // import "golang.org/x/net/idna" + +import ( + "fmt" + "strings" + "unicode/utf8" + + "golang.org/x/text/secure/bidirule" + "golang.org/x/text/unicode/bidi" + "golang.org/x/text/unicode/norm" +) + +// NOTE: Unlike common practice in Go APIs, the functions will return a +// sanitized domain name in case of errors. Browsers sometimes use a partially +// evaluated string as lookup. +// TODO: the current error handling is, in my opinion, the least opinionated. +// Other strategies are also viable, though: +// Option 1) Return an empty string in case of error, but allow the user to +// specify explicitly which errors to ignore. +// Option 2) Return the partially evaluated string if it is itself a valid +// string, otherwise return the empty string in case of error. +// Option 3) Option 1 and 2. +// Option 4) Always return an empty string for now and implement Option 1 as +// needed, and document that the return string may not be empty in case of +// error in the future. +// I think Option 1 is best, but it is quite opinionated. + +// ToASCII is a wrapper for Punycode.ToASCII. +func ToASCII(s string) (string, error) { + return Punycode.process(s, true) +} + +// ToUnicode is a wrapper for Punycode.ToUnicode. +func ToUnicode(s string) (string, error) { + return Punycode.process(s, false) +} + +// An Option configures a Profile at creation time. +type Option func(*options) + +// Transitional sets a Profile to use the Transitional mapping as defined in UTS +// #46. This will cause, for example, "ß" to be mapped to "ss". Using the +// transitional mapping provides a compromise between IDNA2003 and IDNA2008 +// compatibility. It is used by most browsers when resolving domain names. This +// option is only meaningful if combined with MapForLookup. +func Transitional(transitional bool) Option { + return func(o *options) { o.transitional = true } +} + +// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts +// are longer than allowed by the RFC. +func VerifyDNSLength(verify bool) Option { + return func(o *options) { o.verifyDNSLength = verify } +} + +// RemoveLeadingDots removes leading label separators. Leading runes that map to +// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. +// +// This is the behavior suggested by the UTS #46 and is adopted by some +// browsers. +func RemoveLeadingDots(remove bool) Option { + return func(o *options) { o.removeLeadingDots = remove } +} + +// ValidateLabels sets whether to check the mandatory label validation criteria +// as defined in Section 5.4 of RFC 5891. This includes testing for correct use +// of hyphens ('-'), normalization, validity of runes, and the context rules. +func ValidateLabels(enable bool) Option { + return func(o *options) { + // Don't override existing mappings, but set one that at least checks + // normalization if it is not set. + if o.mapping == nil && enable { + o.mapping = normalize + } + o.trie = trie + o.validateLabels = enable + o.fromPuny = validateFromPunycode + } +} + +// StrictDomainName limits the set of permissible ASCII characters to those +// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the +// hyphen). This is set by default for MapForLookup and ValidateForRegistration. +// +// This option is useful, for instance, for browsers that allow characters +// outside this range, for example a '_' (U+005F LOW LINE). See +// http://www.rfc-editor.org/std/std3.txt for more details This option +// corresponds to the UseSTD3ASCIIRules option in UTS #46. +func StrictDomainName(use bool) Option { + return func(o *options) { + o.trie = trie + o.useSTD3Rules = use + o.fromPuny = validateFromPunycode + } +} + +// NOTE: the following options pull in tables. The tables should not be linked +// in as long as the options are not used. + +// BidiRule enables the Bidi rule as defined in RFC 5893. Any application +// that relies on proper validation of labels should include this rule. +func BidiRule() Option { + return func(o *options) { o.bidirule = bidirule.ValidString } +} + +// ValidateForRegistration sets validation options to verify that a given IDN is +// properly formatted for registration as defined by Section 4 of RFC 5891. +func ValidateForRegistration() Option { + return func(o *options) { + o.mapping = validateRegistration + StrictDomainName(true)(o) + ValidateLabels(true)(o) + VerifyDNSLength(true)(o) + BidiRule()(o) + } +} + +// MapForLookup sets validation and mapping options such that a given IDN is +// transformed for domain name lookup according to the requirements set out in +// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, +// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option +// to add this check. +// +// The mappings include normalization and mapping case, width and other +// compatibility mappings. +func MapForLookup() Option { + return func(o *options) { + o.mapping = validateAndMap + StrictDomainName(true)(o) + ValidateLabels(true)(o) + } +} + +type options struct { + transitional bool + useSTD3Rules bool + validateLabels bool + verifyDNSLength bool + removeLeadingDots bool + + trie *idnaTrie + + // fromPuny calls validation rules when converting A-labels to U-labels. + fromPuny func(p *Profile, s string) error + + // mapping implements a validation and mapping step as defined in RFC 5895 + // or UTS 46, tailored to, for example, domain registration or lookup. + mapping func(p *Profile, s string) (mapped string, isBidi bool, err error) + + // bidirule, if specified, checks whether s conforms to the Bidi Rule + // defined in RFC 5893. + bidirule func(s string) bool +} + +// A Profile defines the configuration of an IDNA mapper. +type Profile struct { + options +} + +func apply(o *options, opts []Option) { + for _, f := range opts { + f(o) + } +} + +// New creates a new Profile. +// +// With no options, the returned Profile is the most permissive and equals the +// Punycode Profile. Options can be passed to further restrict the Profile. The +// MapForLookup and ValidateForRegistration options set a collection of options, +// for lookup and registration purposes respectively, which can be tailored by +// adding more fine-grained options, where later options override earlier +// options. +func New(o ...Option) *Profile { + p := &Profile{} + apply(&p.options, o) + return p +} + +// ToASCII converts a domain or domain label to its ASCII form. For example, +// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and +// ToASCII("golang") is "golang". If an error is encountered it will return +// an error and a (partially) processed result. +func (p *Profile) ToASCII(s string) (string, error) { + return p.process(s, true) +} + +// ToUnicode converts a domain or domain label to its Unicode form. For example, +// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and +// ToUnicode("golang") is "golang". If an error is encountered it will return +// an error and a (partially) processed result. +func (p *Profile) ToUnicode(s string) (string, error) { + pp := *p + pp.transitional = false + return pp.process(s, false) +} + +// String reports a string with a description of the profile for debugging +// purposes. The string format may change with different versions. +func (p *Profile) String() string { + s := "" + if p.transitional { + s = "Transitional" + } else { + s = "NonTransitional" + } + if p.useSTD3Rules { + s += ":UseSTD3Rules" + } + if p.validateLabels { + s += ":ValidateLabels" + } + if p.verifyDNSLength { + s += ":VerifyDNSLength" + } + return s +} + +var ( + // Punycode is a Profile that does raw punycode processing with a minimum + // of validation. + Punycode *Profile = punycode + + // Lookup is the recommended profile for looking up domain names, according + // to Section 5 of RFC 5891. The exact configuration of this profile may + // change over time. + Lookup *Profile = lookup + + // Display is the recommended profile for displaying domain names. + // The configuration of this profile may change over time. + Display *Profile = display + + // Registration is the recommended profile for checking whether a given + // IDN is valid for registration, according to Section 4 of RFC 5891. + Registration *Profile = registration + + punycode = &Profile{} + lookup = &Profile{options{ + transitional: true, + useSTD3Rules: true, + validateLabels: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, + }} + display = &Profile{options{ + useSTD3Rules: true, + validateLabels: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, + }} + registration = &Profile{options{ + useSTD3Rules: true, + validateLabels: true, + verifyDNSLength: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateRegistration, + bidirule: bidirule.ValidString, + }} + + // TODO: profiles + // Register: recommended for approving domain names: don't do any mappings + // but rather reject on invalid input. Bundle or block deviation characters. +) + +type labelError struct{ label, code_ string } + +func (e labelError) code() string { return e.code_ } +func (e labelError) Error() string { + return fmt.Sprintf("idna: invalid label %q", e.label) +} + +type runeError rune + +func (e runeError) code() string { return "P1" } +func (e runeError) Error() string { + return fmt.Sprintf("idna: disallowed rune %U", e) +} + +// process implements the algorithm described in section 4 of UTS #46, +// see https://www.unicode.org/reports/tr46. +func (p *Profile) process(s string, toASCII bool) (string, error) { + var err error + var isBidi bool + if p.mapping != nil { + s, isBidi, err = p.mapping(p, s) + } + // Remove leading empty labels. + if p.removeLeadingDots { + for ; len(s) > 0 && s[0] == '.'; s = s[1:] { + } + } + // TODO: allow for a quick check of the tables data. + // It seems like we should only create this error on ToASCII, but the + // UTS 46 conformance tests suggests we should always check this. + if err == nil && p.verifyDNSLength && s == "" { + err = &labelError{s, "A4"} + } + labels := labelIter{orig: s} + for ; !labels.done(); labels.next() { + label := labels.label() + if label == "" { + // Empty labels are not okay. The label iterator skips the last + // label if it is empty. + if err == nil && p.verifyDNSLength { + err = &labelError{s, "A4"} + } + continue + } + if strings.HasPrefix(label, acePrefix) { + u, err2 := decode(label[len(acePrefix):]) + if err2 != nil { + if err == nil { + err = err2 + } + // Spec says keep the old label. + continue + } + isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight + labels.set(u) + if err == nil && p.validateLabels { + err = p.fromPuny(p, u) + } + if err == nil { + // This should be called on NonTransitional, according to the + // spec, but that currently does not have any effect. Use the + // original profile to preserve options. + err = p.validateLabel(u) + } + } else if err == nil { + err = p.validateLabel(label) + } + } + if isBidi && p.bidirule != nil && err == nil { + for labels.reset(); !labels.done(); labels.next() { + if !p.bidirule(labels.label()) { + err = &labelError{s, "B"} + break + } + } + } + if toASCII { + for labels.reset(); !labels.done(); labels.next() { + label := labels.label() + if !ascii(label) { + a, err2 := encode(acePrefix, label) + if err == nil { + err = err2 + } + label = a + labels.set(a) + } + n := len(label) + if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { + err = &labelError{label, "A4"} + } + } + } + s = labels.result() + if toASCII && p.verifyDNSLength && err == nil { + // Compute the length of the domain name minus the root label and its dot. + n := len(s) + if n > 0 && s[n-1] == '.' { + n-- + } + if len(s) < 1 || n > 253 { + err = &labelError{s, "A4"} + } + } + return s, err +} + +func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) { + // TODO: consider first doing a quick check to see if any of these checks + // need to be done. This will make it slower in the general case, but + // faster in the common case. + mapped = norm.NFC.String(s) + isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft + return mapped, isBidi, nil +} + +func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) { + // TODO: filter need for normalization in loop below. + if !norm.NFC.IsNormalString(s) { + return s, false, &labelError{s, "V1"} + } + for i := 0; i < len(s); { + v, sz := trie.lookupString(s[i:]) + if sz == 0 { + return s, bidi, runeError(utf8.RuneError) + } + bidi = bidi || info(v).isBidi(s[i:]) + // Copy bytes not copied so far. + switch p.simplify(info(v).category()) { + // TODO: handle the NV8 defined in the Unicode idna data set to allow + // for strict conformance to IDNA2008. + case valid, deviation: + case disallowed, mapped, unknown, ignored: + r, _ := utf8.DecodeRuneInString(s[i:]) + return s, bidi, runeError(r) + } + i += sz + } + return s, bidi, nil +} + +func (c info) isBidi(s string) bool { + if !c.isMapped() { + return c&attributesMask == rtl + } + // TODO: also store bidi info for mapped data. This is possible, but a bit + // cumbersome and not for the common case. + p, _ := bidi.LookupString(s) + switch p.Class() { + case bidi.R, bidi.AL, bidi.AN: + return true + } + return false +} + +func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) { + var ( + b []byte + k int + ) + // combinedInfoBits contains the or-ed bits of all runes. We use this + // to derive the mayNeedNorm bit later. This may trigger normalization + // overeagerly, but it will not do so in the common case. The end result + // is another 10% saving on BenchmarkProfile for the common case. + var combinedInfoBits info + for i := 0; i < len(s); { + v, sz := trie.lookupString(s[i:]) + if sz == 0 { + b = append(b, s[k:i]...) + b = append(b, "\ufffd"...) + k = len(s) + if err == nil { + err = runeError(utf8.RuneError) + } + break + } + combinedInfoBits |= info(v) + bidi = bidi || info(v).isBidi(s[i:]) + start := i + i += sz + // Copy bytes not copied so far. + switch p.simplify(info(v).category()) { + case valid: + continue + case disallowed: + if err == nil { + r, _ := utf8.DecodeRuneInString(s[start:]) + err = runeError(r) + } + continue + case mapped, deviation: + b = append(b, s[k:start]...) + b = info(v).appendMapping(b, s[start:i]) + case ignored: + b = append(b, s[k:start]...) + // drop the rune + case unknown: + b = append(b, s[k:start]...) + b = append(b, "\ufffd"...) + } + k = i + } + if k == 0 { + // No changes so far. + if combinedInfoBits&mayNeedNorm != 0 { + s = norm.NFC.String(s) + } + } else { + b = append(b, s[k:]...) + if norm.NFC.QuickSpan(b) != len(b) { + b = norm.NFC.Bytes(b) + } + // TODO: the punycode converters require strings as input. + s = string(b) + } + return s, bidi, err +} + +// A labelIter allows iterating over domain name labels. +type labelIter struct { + orig string + slice []string + curStart int + curEnd int + i int +} + +func (l *labelIter) reset() { + l.curStart = 0 + l.curEnd = 0 + l.i = 0 +} + +func (l *labelIter) done() bool { + return l.curStart >= len(l.orig) +} + +func (l *labelIter) result() string { + if l.slice != nil { + return strings.Join(l.slice, ".") + } + return l.orig +} + +func (l *labelIter) label() string { + if l.slice != nil { + return l.slice[l.i] + } + p := strings.IndexByte(l.orig[l.curStart:], '.') + l.curEnd = l.curStart + p + if p == -1 { + l.curEnd = len(l.orig) + } + return l.orig[l.curStart:l.curEnd] +} + +// next sets the value to the next label. It skips the last label if it is empty. +func (l *labelIter) next() { + l.i++ + if l.slice != nil { + if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { + l.curStart = len(l.orig) + } + } else { + l.curStart = l.curEnd + 1 + if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { + l.curStart = len(l.orig) + } + } +} + +func (l *labelIter) set(s string) { + if l.slice == nil { + l.slice = strings.Split(l.orig, ".") + } + l.slice[l.i] = s +} + +// acePrefix is the ASCII Compatible Encoding prefix. +const acePrefix = "xn--" + +func (p *Profile) simplify(cat category) category { + switch cat { + case disallowedSTD3Mapped: + if p.useSTD3Rules { + cat = disallowed + } else { + cat = mapped + } + case disallowedSTD3Valid: + if p.useSTD3Rules { + cat = disallowed + } else { + cat = valid + } + case deviation: + if !p.transitional { + cat = valid + } + case validNV8, validXV8: + // TODO: handle V2008 + cat = valid + } + return cat +} + +func validateFromPunycode(p *Profile, s string) error { + if !norm.NFC.IsNormalString(s) { + return &labelError{s, "V1"} + } + // TODO: detect whether string may have to be normalized in the following + // loop. + for i := 0; i < len(s); { + v, sz := trie.lookupString(s[i:]) + if sz == 0 { + return runeError(utf8.RuneError) + } + if c := p.simplify(info(v).category()); c != valid && c != deviation { + return &labelError{s, "V6"} + } + i += sz + } + return nil +} + +const ( + zwnj = "\u200c" + zwj = "\u200d" +) + +type joinState int8 + +const ( + stateStart joinState = iota + stateVirama + stateBefore + stateBeforeVirama + stateAfter + stateFAIL +) + +var joinStates = [][numJoinTypes]joinState{ + stateStart: { + joiningL: stateBefore, + joiningD: stateBefore, + joinZWNJ: stateFAIL, + joinZWJ: stateFAIL, + joinVirama: stateVirama, + }, + stateVirama: { + joiningL: stateBefore, + joiningD: stateBefore, + }, + stateBefore: { + joiningL: stateBefore, + joiningD: stateBefore, + joiningT: stateBefore, + joinZWNJ: stateAfter, + joinZWJ: stateFAIL, + joinVirama: stateBeforeVirama, + }, + stateBeforeVirama: { + joiningL: stateBefore, + joiningD: stateBefore, + joiningT: stateBefore, + }, + stateAfter: { + joiningL: stateFAIL, + joiningD: stateBefore, + joiningT: stateAfter, + joiningR: stateStart, + joinZWNJ: stateFAIL, + joinZWJ: stateFAIL, + joinVirama: stateAfter, // no-op as we can't accept joiners here + }, + stateFAIL: { + 0: stateFAIL, + joiningL: stateFAIL, + joiningD: stateFAIL, + joiningT: stateFAIL, + joiningR: stateFAIL, + joinZWNJ: stateFAIL, + joinZWJ: stateFAIL, + joinVirama: stateFAIL, + }, +} + +// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are +// already implicitly satisfied by the overall implementation. +func (p *Profile) validateLabel(s string) (err error) { + if s == "" { + if p.verifyDNSLength { + return &labelError{s, "A4"} + } + return nil + } + if !p.validateLabels { + return nil + } + trie := p.trie // p.validateLabels is only set if trie is set. + if len(s) > 4 && s[2] == '-' && s[3] == '-' { + return &labelError{s, "V2"} + } + if s[0] == '-' || s[len(s)-1] == '-' { + return &labelError{s, "V3"} + } + // TODO: merge the use of this in the trie. + v, sz := trie.lookupString(s) + x := info(v) + if x.isModifier() { + return &labelError{s, "V5"} + } + // Quickly return in the absence of zero-width (non) joiners. + if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { + return nil + } + st := stateStart + for i := 0; ; { + jt := x.joinType() + if s[i:i+sz] == zwj { + jt = joinZWJ + } else if s[i:i+sz] == zwnj { + jt = joinZWNJ + } + st = joinStates[st][jt] + if x.isViramaModifier() { + st = joinStates[st][joinVirama] + } + if i += sz; i == len(s) { + break + } + v, sz = trie.lookupString(s[i:]) + x = info(v) + } + if st == stateFAIL || st == stateAfter { + return &labelError{s, "C"} + } + return nil +} + +func ascii(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] >= utf8.RuneSelf { + return false + } + } + return true +} |