diff options
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/utf16.go')
-rw-r--r-- | vendor/github.com/paulrosania/go-charset/charset/utf16.go | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf16.go b/vendor/github.com/paulrosania/go-charset/charset/utf16.go new file mode 100644 index 00000000..ebde794c --- /dev/null +++ b/vendor/github.com/paulrosania/go-charset/charset/utf16.go @@ -0,0 +1,110 @@ +package charset + +import ( + "encoding/binary" + "errors" + "unicode/utf8" +) + +func init() { + registerClass("utf16", fromUTF16, toUTF16) +} + +type translateFromUTF16 struct { + first bool + endian binary.ByteOrder + scratch []byte +} + +func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) { + data = data[0 : len(data)&^1] // round to even number of bytes. + if len(data) < 2 { + return 0, nil, nil + } + n := 0 + if p.first && p.endian == nil { + switch binary.BigEndian.Uint16(data) { + case 0xfeff: + p.endian = binary.BigEndian + data = data[2:] + n += 2 + case 0xfffe: + p.endian = binary.LittleEndian + data = data[2:] + n += 2 + default: + p.endian = guessEndian(data) + } + p.first = false + } + + p.scratch = p.scratch[:0] + for ; len(data) > 0; data = data[2:] { + p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data))) + n += 2 + } + return n, p.scratch, nil +} + +func guessEndian(data []byte) binary.ByteOrder { + // XXX TODO + return binary.LittleEndian +} + +type translateToUTF16 struct { + first bool + endian binary.ByteOrder + scratch []byte +} + +func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) { + p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2) + if p.first { + p.scratch = p.scratch[0:2] + p.endian.PutUint16(p.scratch, 0xfeff) + p.first = false + } + n := 0 + for len(data) > 0 { + if !utf8.FullRune(data) && !eof { + break + } + r, size := utf8.DecodeRune(data) + // TODO if r > 65535? + + slen := len(p.scratch) + p.scratch = p.scratch[0 : slen+2] + p.endian.PutUint16(p.scratch[slen:], uint16(r)) + data = data[size:] + n += size + } + return n, p.scratch, nil +} + +func getEndian(arg string) (binary.ByteOrder, error) { + switch arg { + case "le": + return binary.LittleEndian, nil + case "be": + return binary.BigEndian, nil + case "": + return nil, nil + } + return nil, errors.New("charset: unknown utf16 endianness") +} + +func fromUTF16(arg string) (Translator, error) { + endian, err := getEndian(arg) + if err != nil { + return nil, err + } + return &translateFromUTF16{first: true, endian: endian}, nil +} + +func toUTF16(arg string) (Translator, error) { + endian, err := getEndian(arg) + if err != nil { + return nil, err + } + return &translateToUTF16{first: false, endian: endian}, nil +} |