diff options
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/codepage.go')
-rw-r--r-- | vendor/github.com/paulrosania/go-charset/charset/codepage.go | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/codepage.go b/vendor/github.com/paulrosania/go-charset/charset/codepage.go new file mode 100644 index 00000000..6864c875 --- /dev/null +++ b/vendor/github.com/paulrosania/go-charset/charset/codepage.go @@ -0,0 +1,133 @@ +package charset + +import ( + "fmt" + "unicode/utf8" +) + +func init() { + registerClass("cp", fromCodePage, toCodePage) +} + +type translateFromCodePage struct { + byte2rune *[256]rune + scratch []byte +} + +type cpKeyFrom string +type cpKeyTo string + +func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) { + p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0] + buf := p.scratch + for _, x := range data { + r := p.byte2rune[x] + if r < utf8.RuneSelf { + buf = append(buf, byte(r)) + continue + } + size := utf8.EncodeRune(buf[len(buf):cap(buf)], r) + buf = buf[0 : len(buf)+size] + } + return len(data), buf, nil +} + +type toCodePageInfo struct { + rune2byte map[rune]byte + // same gives the number of runes at start of code page that map exactly to + // unicode. + same rune +} + +type translateToCodePage struct { + toCodePageInfo + scratch []byte +} + +func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) { + p.scratch = ensureCap(p.scratch, len(data)) + buf := p.scratch[:0] + + for i := 0; i < len(data); { + r := rune(data[i]) + size := 1 + if r >= utf8.RuneSelf { + r, size = utf8.DecodeRune(data[i:]) + if size == 1 && !eof && !utf8.FullRune(data[i:]) { + return i, buf, nil + } + } + + var b byte + if r < p.same { + b = byte(r) + } else { + var ok bool + b, ok = p.rune2byte[r] + if !ok { + b = '?' + } + } + buf = append(buf, b) + i += size + } + return len(data), buf, nil +} + +func fromCodePage(arg string) (Translator, error) { + runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) { + data, err := readFile(arg) + if err != nil { + return nil, err + } + runes := []rune(string(data)) + if len(runes) != 256 { + return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes)) + } + r := new([256]rune) + copy(r[:], runes) + return r, nil + }) + if err != nil { + return nil, err + } + return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil +} + +func toCodePage(arg string) (Translator, error) { + m, err := cache(cpKeyTo(arg), func() (interface{}, error) { + data, err := readFile(arg) + if err != nil { + return nil, err + } + + info := toCodePageInfo{ + rune2byte: make(map[rune]byte), + same: 256, + } + atStart := true + i := rune(0) + for _, r := range string(data) { + if atStart { + if r == i { + i++ + continue + } + info.same = i + atStart = false + } + info.rune2byte[r] = byte(i) + i++ + } + // TODO fix tables + // fmt.Printf("%s, same = %d\n", arg, info.same) + if i != 256 { + return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i) + } + return info, nil + }) + if err != nil { + return nil, err + } + return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil +} |