summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset/utf16.go
blob: ebde794c95ad82818733fd6539bb703f6d66f9a8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package charset

import (
	"encoding/binary"
	"errors"
	"unicode/utf8"
)

func init() {
	registerClass("utf16", fromUTF16, toUTF16)
}

type translateFromUTF16 struct {
	first   bool
	endian  binary.ByteOrder
	scratch []byte
}

func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
	data = data[0 : len(data)&^1] // round to even number of bytes.
	if len(data) < 2 {
		return 0, nil, nil
	}
	n := 0
	if p.first && p.endian == nil {
		switch binary.BigEndian.Uint16(data) {
		case 0xfeff:
			p.endian = binary.BigEndian
			data = data[2:]
			n += 2
		case 0xfffe:
			p.endian = binary.LittleEndian
			data = data[2:]
			n += 2
		default:
			p.endian = guessEndian(data)
		}
		p.first = false
	}

	p.scratch = p.scratch[:0]
	for ; len(data) > 0; data = data[2:] {
		p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
		n += 2
	}
	return n, p.scratch, nil
}

func guessEndian(data []byte) binary.ByteOrder {
	// XXX TODO
	return binary.LittleEndian
}

type translateToUTF16 struct {
	first   bool
	endian  binary.ByteOrder
	scratch []byte
}

func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
	p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
	if p.first {
		p.scratch = p.scratch[0:2]
		p.endian.PutUint16(p.scratch, 0xfeff)
		p.first = false
	}
	n := 0
	for len(data) > 0 {
		if !utf8.FullRune(data) && !eof {
			break
		}
		r, size := utf8.DecodeRune(data)
		// TODO if r > 65535?

		slen := len(p.scratch)
		p.scratch = p.scratch[0 : slen+2]
		p.endian.PutUint16(p.scratch[slen:], uint16(r))
		data = data[size:]
		n += size
	}
	return n, p.scratch, nil
}

func getEndian(arg string) (binary.ByteOrder, error) {
	switch arg {
	case "le":
		return binary.LittleEndian, nil
	case "be":
		return binary.BigEndian, nil
	case "":
		return nil, nil
	}
	return nil, errors.New("charset: unknown utf16 endianness")
}

func fromUTF16(arg string) (Translator, error) {
	endian, err := getEndian(arg)
	if err != nil {
		return nil, err
	}
	return &translateFromUTF16{first: true, endian: endian}, nil
}

func toUTF16(arg string) (Translator, error) {
	endian, err := getEndian(arg)
	if err != nil {
		return nil, err
	}
	return &translateToUTF16{first: false, endian: endian}, nil
}