summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/encoding/encoding.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding/encoding.go')
-rw-r--r--vendor/golang.org/x/text/encoding/encoding.go335
1 files changed, 335 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/encoding/encoding.go b/vendor/golang.org/x/text/encoding/encoding.go
new file mode 100644
index 00000000..221f175c
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/encoding.go
@@ -0,0 +1,335 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package encoding defines an interface for character encodings, such as Shift
+// JIS and Windows 1252, that can convert to and from UTF-8.
+//
+// Encoding implementations are provided in other packages, such as
+// golang.org/x/text/encoding/charmap and
+// golang.org/x/text/encoding/japanese.
+package encoding // import "golang.org/x/text/encoding"
+
+import (
+ "errors"
+ "io"
+ "strconv"
+ "unicode/utf8"
+
+ "golang.org/x/text/encoding/internal/identifier"
+ "golang.org/x/text/transform"
+)
+
+// TODO:
+// - There seems to be some inconsistency in when decoders return errors
+// and when not. Also documentation seems to suggest they shouldn't return
+// errors at all (except for UTF-16).
+// - Encoders seem to rely on or at least benefit from the input being in NFC
+// normal form. Perhaps add an example how users could prepare their output.
+
+// Encoding is a character set encoding that can be transformed to and from
+// UTF-8.
+type Encoding interface {
+ // NewDecoder returns a Decoder.
+ NewDecoder() *Decoder
+
+ // NewEncoder returns an Encoder.
+ NewEncoder() *Encoder
+}
+
+// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
+//
+// Transforming source bytes that are not of that encoding will not result in an
+// error per se. Each byte that cannot be transcoded will be represented in the
+// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
+type Decoder struct {
+ transform.Transformer
+
+ // This forces external creators of Decoders to use names in struct
+ // initializers, allowing for future extendibility without having to break
+ // code.
+ _ struct{}
+}
+
+// Bytes converts the given encoded bytes to UTF-8. It returns the converted
+// bytes or nil, err if any error occurred.
+func (d *Decoder) Bytes(b []byte) ([]byte, error) {
+ b, _, err := transform.Bytes(d, b)
+ if err != nil {
+ return nil, err
+ }
+ return b, nil
+}
+
+// String converts the given encoded string to UTF-8. It returns the converted
+// string or "", err if any error occurred.
+func (d *Decoder) String(s string) (string, error) {
+ s, _, err := transform.String(d, s)
+ if err != nil {
+ return "", err
+ }
+ return s, nil
+}
+
+// Reader wraps another Reader to decode its bytes.
+//
+// The Decoder may not be used for any other operation as long as the returned
+// Reader is in use.
+func (d *Decoder) Reader(r io.Reader) io.Reader {
+ return transform.NewReader(r, d)
+}
+
+// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
+//
+// Each rune that cannot be transcoded will result in an error. In this case,
+// the transform will consume all source byte up to, not including the offending
+// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
+// `\uFFFD`. To return early with an error instead, use transform.Chain to
+// preprocess the data with a UTF8Validator.
+type Encoder struct {
+ transform.Transformer
+
+ // This forces external creators of Encoders to use names in struct
+ // initializers, allowing for future extendibility without having to break
+ // code.
+ _ struct{}
+}
+
+// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
+// any error occurred.
+func (e *Encoder) Bytes(b []byte) ([]byte, error) {
+ b, _, err := transform.Bytes(e, b)
+ if err != nil {
+ return nil, err
+ }
+ return b, nil
+}
+
+// String converts a string from UTF-8. It returns the converted string or
+// "", err if any error occurred.
+func (e *Encoder) String(s string) (string, error) {
+ s, _, err := transform.String(e, s)
+ if err != nil {
+ return "", err
+ }
+ return s, nil
+}
+
+// Writer wraps another Writer to encode its UTF-8 output.
+//
+// The Encoder may not be used for any other operation as long as the returned
+// Writer is in use.
+func (e *Encoder) Writer(w io.Writer) io.Writer {
+ return transform.NewWriter(w, e)
+}
+
+// ASCIISub is the ASCII substitute character, as recommended by
+// http://unicode.org/reports/tr36/#Text_Comparison
+const ASCIISub = '\x1a'
+
+// Nop is the nop encoding. Its transformed bytes are the same as the source
+// bytes; it does not replace invalid UTF-8 sequences.
+var Nop Encoding = nop{}
+
+type nop struct{}
+
+func (nop) NewDecoder() *Decoder {
+ return &Decoder{Transformer: transform.Nop}
+}
+func (nop) NewEncoder() *Encoder {
+ return &Encoder{Transformer: transform.Nop}
+}
+
+// Replacement is the replacement encoding. Decoding from the replacement
+// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
+// the replacement encoding yields the same as the source bytes except that
+// invalid UTF-8 is converted to '\uFFFD'.
+//
+// It is defined at http://encoding.spec.whatwg.org/#replacement
+var Replacement Encoding = replacement{}
+
+type replacement struct{}
+
+func (replacement) NewDecoder() *Decoder {
+ return &Decoder{Transformer: replacementDecoder{}}
+}
+
+func (replacement) NewEncoder() *Encoder {
+ return &Encoder{Transformer: replacementEncoder{}}
+}
+
+func (replacement) ID() (mib identifier.MIB, other string) {
+ return identifier.Replacement, ""
+}
+
+type replacementDecoder struct{ transform.NopResetter }
+
+func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ if len(dst) < 3 {
+ return 0, 0, transform.ErrShortDst
+ }
+ if atEOF {
+ const fffd = "\ufffd"
+ dst[0] = fffd[0]
+ dst[1] = fffd[1]
+ dst[2] = fffd[2]
+ nDst = 3
+ }
+ return nDst, len(src), nil
+}
+
+type replacementEncoder struct{ transform.NopResetter }
+
+func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ r, size := rune(0), 0
+
+ for ; nSrc < len(src); nSrc += size {
+ r = rune(src[nSrc])
+
+ // Decode a 1-byte rune.
+ if r < utf8.RuneSelf {
+ size = 1
+
+ } else {
+ // Decode a multi-byte rune.
+ r, size = utf8.DecodeRune(src[nSrc:])
+ if size == 1 {
+ // All valid runes of size 1 (those below utf8.RuneSelf) were
+ // handled above. We have invalid UTF-8 or we haven't seen the
+ // full character yet.
+ if !atEOF && !utf8.FullRune(src[nSrc:]) {
+ err = transform.ErrShortSrc
+ break
+ }
+ r = '\ufffd'
+ }
+ }
+
+ if nDst+utf8.RuneLen(r) > len(dst) {
+ err = transform.ErrShortDst
+ break
+ }
+ nDst += utf8.EncodeRune(dst[nDst:], r)
+ }
+ return nDst, nSrc, err
+}
+
+// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
+// repertoire of the destination encoding with HTML escape sequences.
+//
+// This wrapper exists to comply to URL and HTML forms requiring a
+// non-terminating legacy encoder. The produced sequences may lead to data
+// loss as they are indistinguishable from legitimate input. To avoid this
+// issue, use UTF-8 encodings whenever possible.
+func HTMLEscapeUnsupported(e *Encoder) *Encoder {
+ return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
+}
+
+// ReplaceUnsupported wraps encoders to replace source runes outside the
+// repertoire of the destination encoding with an encoding-specific
+// replacement.
+//
+// This wrapper is only provided for backwards compatibility and legacy
+// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
+func ReplaceUnsupported(e *Encoder) *Encoder {
+ return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
+}
+
+type errorHandler struct {
+ *Encoder
+ handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
+}
+
+// TODO: consider making this error public in some form.
+type repertoireError interface {
+ Replacement() byte
+}
+
+func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
+ for err != nil {
+ rerr, ok := err.(repertoireError)
+ if !ok {
+ return nDst, nSrc, err
+ }
+ r, sz := utf8.DecodeRune(src[nSrc:])
+ n, ok := h.handler(dst[nDst:], r, rerr)
+ if !ok {
+ return nDst, nSrc, transform.ErrShortDst
+ }
+ err = nil
+ nDst += n
+ if nSrc += sz; nSrc < len(src) {
+ var dn, sn int
+ dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
+ nDst += dn
+ nSrc += sn
+ }
+ }
+ return nDst, nSrc, err
+}
+
+func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
+ buf := [8]byte{}
+ b := strconv.AppendUint(buf[:0], uint64(r), 10)
+ if n = len(b) + len("&#;"); n >= len(dst) {
+ return 0, false
+ }
+ dst[0] = '&'
+ dst[1] = '#'
+ dst[copy(dst[2:], b)+2] = ';'
+ return n, true
+}
+
+func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
+ if len(dst) == 0 {
+ return 0, false
+ }
+ dst[0] = err.Replacement()
+ return 1, true
+}
+
+// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
+var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
+
+// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
+// input byte that is not valid UTF-8.
+var UTF8Validator transform.Transformer = utf8Validator{}
+
+type utf8Validator struct{ transform.NopResetter }
+
+func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ n := len(src)
+ if n > len(dst) {
+ n = len(dst)
+ }
+ for i := 0; i < n; {
+ if c := src[i]; c < utf8.RuneSelf {
+ dst[i] = c
+ i++
+ continue
+ }
+ _, size := utf8.DecodeRune(src[i:])
+ if size == 1 {
+ // All valid runes of size 1 (those below utf8.RuneSelf) were
+ // handled above. We have invalid UTF-8 or we haven't seen the
+ // full character yet.
+ err = ErrInvalidUTF8
+ if !atEOF && !utf8.FullRune(src[i:]) {
+ err = transform.ErrShortSrc
+ }
+ return i, i, err
+ }
+ if i+size > len(dst) {
+ return i, i, transform.ErrShortDst
+ }
+ for ; size > 0; size-- {
+ dst[i] = src[i]
+ i++
+ }
+ }
+ if len(src) > len(dst) {
+ err = transform.ErrShortDst
+ }
+ return n, n, err
+}