summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset/utf8.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/utf8.go')
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/utf8.go51
1 files changed, 51 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf8.go b/vendor/github.com/paulrosania/go-charset/charset/utf8.go
new file mode 100644
index 00000000..23980b33
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/utf8.go
@@ -0,0 +1,51 @@
+package charset
+
+import (
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("utf8", toUTF8, toUTF8)
+}
+
+type translateToUTF8 struct {
+ scratch []byte
+}
+
+var errorBytes = []byte(string(utf8.RuneError))
+
+const errorRuneLen = len(string(utf8.RuneError))
+
+func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
+ buf := p.scratch[:0]
+ for i := 0; i < len(data); {
+ // fast path for ASCII
+ if b := data[i]; b < utf8.RuneSelf {
+ buf = append(buf, b)
+ i++
+ continue
+ }
+ _, size := utf8.DecodeRune(data[i:])
+ if size == 1 {
+ if !eof && !utf8.FullRune(data) {
+ // When DecodeRune has converted only a single
+ // byte, we know there must be some kind of error
+ // because we know the byte's not ASCII.
+ // If we aren't at EOF, and it's an incomplete
+ // rune encoding, then we return to process
+ // the final bytes in a subsequent call.
+ return i, buf, nil
+ }
+ buf = append(buf, errorBytes...)
+ } else {
+ buf = append(buf, data[i:i+size]...)
+ }
+ i += size
+ }
+ return len(data), buf, nil
+}
+
+func toUTF8(arg string) (Translator, error) {
+ return new(translateToUTF8), nil
+}