summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset/codepage.go
diff options
context:
space:
mode:
authorWim <wim@42.be>2017-07-07 23:34:05 +0200
committerWim <wim@42.be>2017-07-07 23:34:05 +0200
commita0938d93869904ebf6d9938485c248b976150fac (patch)
treea12fad5acdceeec93a28efb600ca62b9fdfb40a5 /vendor/github.com/paulrosania/go-charset/charset/codepage.go
parent2338c69d402ad3779f4e2a2f38ac800ceca656b9 (diff)
downloadmatterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.gz
matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.bz2
matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.zip
Add go-charset and chardet to vendor
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/codepage.go')
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/codepage.go133
1 files changed, 133 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/codepage.go b/vendor/github.com/paulrosania/go-charset/charset/codepage.go
new file mode 100644
index 00000000..6864c875
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/codepage.go
@@ -0,0 +1,133 @@
+package charset
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("cp", fromCodePage, toCodePage)
+}
+
+type translateFromCodePage struct {
+ byte2rune *[256]rune
+ scratch []byte
+}
+
+type cpKeyFrom string
+type cpKeyTo string
+
+func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
+ buf := p.scratch
+ for _, x := range data {
+ r := p.byte2rune[x]
+ if r < utf8.RuneSelf {
+ buf = append(buf, byte(r))
+ continue
+ }
+ size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
+ buf = buf[0 : len(buf)+size]
+ }
+ return len(data), buf, nil
+}
+
+type toCodePageInfo struct {
+ rune2byte map[rune]byte
+ // same gives the number of runes at start of code page that map exactly to
+ // unicode.
+ same rune
+}
+
+type translateToCodePage struct {
+ toCodePageInfo
+ scratch []byte
+}
+
+func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, len(data))
+ buf := p.scratch[:0]
+
+ for i := 0; i < len(data); {
+ r := rune(data[i])
+ size := 1
+ if r >= utf8.RuneSelf {
+ r, size = utf8.DecodeRune(data[i:])
+ if size == 1 && !eof && !utf8.FullRune(data[i:]) {
+ return i, buf, nil
+ }
+ }
+
+ var b byte
+ if r < p.same {
+ b = byte(r)
+ } else {
+ var ok bool
+ b, ok = p.rune2byte[r]
+ if !ok {
+ b = '?'
+ }
+ }
+ buf = append(buf, b)
+ i += size
+ }
+ return len(data), buf, nil
+}
+
+func fromCodePage(arg string) (Translator, error) {
+ runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
+ data, err := readFile(arg)
+ if err != nil {
+ return nil, err
+ }
+ runes := []rune(string(data))
+ if len(runes) != 256 {
+ return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
+ }
+ r := new([256]rune)
+ copy(r[:], runes)
+ return r, nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
+}
+
+func toCodePage(arg string) (Translator, error) {
+ m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
+ data, err := readFile(arg)
+ if err != nil {
+ return nil, err
+ }
+
+ info := toCodePageInfo{
+ rune2byte: make(map[rune]byte),
+ same: 256,
+ }
+ atStart := true
+ i := rune(0)
+ for _, r := range string(data) {
+ if atStart {
+ if r == i {
+ i++
+ continue
+ }
+ info.same = i
+ atStart = false
+ }
+ info.rune2byte[r] = byte(i)
+ i++
+ }
+ // TODO fix tables
+ // fmt.Printf("%s, same = %d\n", arg, info.same)
+ if i != 256 {
+ return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
+ }
+ return info, nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
+}