From a0938d93869904ebf6d9938485c248b976150fac Mon Sep 17 00:00:00 2001 From: Wim Date: Fri, 7 Jul 2017 23:34:05 +0200 Subject: Add go-charset and chardet to vendor --- .../paulrosania/go-charset/charset/utf8.go | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 vendor/github.com/paulrosania/go-charset/charset/utf8.go (limited to 'vendor/github.com/paulrosania/go-charset/charset/utf8.go') diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf8.go b/vendor/github.com/paulrosania/go-charset/charset/utf8.go new file mode 100644 index 00000000..23980b33 --- /dev/null +++ b/vendor/github.com/paulrosania/go-charset/charset/utf8.go @@ -0,0 +1,51 @@ +package charset + +import ( + "unicode/utf8" +) + +func init() { + registerClass("utf8", toUTF8, toUTF8) +} + +type translateToUTF8 struct { + scratch []byte +} + +var errorBytes = []byte(string(utf8.RuneError)) + +const errorRuneLen = len(string(utf8.RuneError)) + +func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) { + p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen) + buf := p.scratch[:0] + for i := 0; i < len(data); { + // fast path for ASCII + if b := data[i]; b < utf8.RuneSelf { + buf = append(buf, b) + i++ + continue + } + _, size := utf8.DecodeRune(data[i:]) + if size == 1 { + if !eof && !utf8.FullRune(data) { + // When DecodeRune has converted only a single + // byte, we know there must be some kind of error + // because we know the byte's not ASCII. + // If we aren't at EOF, and it's an incomplete + // rune encoding, then we return to process + // the final bytes in a subsequent call. + return i, buf, nil + } + buf = append(buf, errorBytes...) + } else { + buf = append(buf, data[i:i+size]...) + } + i += size + } + return len(data), buf, nil +} + +func toUTF8(arg string) (Translator, error) { + return new(translateToUTF8), nil +} -- cgit v1.2.3