diff options
author | Wim <wim@42.be> | 2017-07-07 23:34:05 +0200 |
---|---|---|
committer | Wim <wim@42.be> | 2017-07-07 23:34:05 +0200 |
commit | a0938d93869904ebf6d9938485c248b976150fac (patch) | |
tree | a12fad5acdceeec93a28efb600ca62b9fdfb40a5 /vendor/github.com/paulrosania/go-charset/charset/utf8.go | |
parent | 2338c69d402ad3779f4e2a2f38ac800ceca656b9 (diff) | |
download | matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.gz matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.bz2 matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.zip |
Add go-charset and chardet to vendor
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/utf8.go')
-rw-r--r-- | vendor/github.com/paulrosania/go-charset/charset/utf8.go | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf8.go b/vendor/github.com/paulrosania/go-charset/charset/utf8.go new file mode 100644 index 00000000..23980b33 --- /dev/null +++ b/vendor/github.com/paulrosania/go-charset/charset/utf8.go @@ -0,0 +1,51 @@ +package charset + +import ( + "unicode/utf8" +) + +func init() { + registerClass("utf8", toUTF8, toUTF8) +} + +type translateToUTF8 struct { + scratch []byte +} + +var errorBytes = []byte(string(utf8.RuneError)) + +const errorRuneLen = len(string(utf8.RuneError)) + +func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) { + p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen) + buf := p.scratch[:0] + for i := 0; i < len(data); { + // fast path for ASCII + if b := data[i]; b < utf8.RuneSelf { + buf = append(buf, b) + i++ + continue + } + _, size := utf8.DecodeRune(data[i:]) + if size == 1 { + if !eof && !utf8.FullRune(data) { + // When DecodeRune has converted only a single + // byte, we know there must be some kind of error + // because we know the byte's not ASCII. + // If we aren't at EOF, and it's an incomplete + // rune encoding, then we return to process + // the final bytes in a subsequent call. + return i, buf, nil + } + buf = append(buf, errorBytes...) + } else { + buf = append(buf, data[i:i+size]...) + } + i += size + } + return len(data), buf, nil +} + +func toUTF8(arg string) (Translator, error) { + return new(translateToUTF8), nil +} |