1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
package charset
import (
"encoding/binary"
"errors"
"unicode/utf8"
)
func init() {
registerClass("utf16", fromUTF16, toUTF16)
}
type translateFromUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
data = data[0 : len(data)&^1] // round to even number of bytes.
if len(data) < 2 {
return 0, nil, nil
}
n := 0
if p.first && p.endian == nil {
switch binary.BigEndian.Uint16(data) {
case 0xfeff:
p.endian = binary.BigEndian
data = data[2:]
n += 2
case 0xfffe:
p.endian = binary.LittleEndian
data = data[2:]
n += 2
default:
p.endian = guessEndian(data)
}
p.first = false
}
p.scratch = p.scratch[:0]
for ; len(data) > 0; data = data[2:] {
p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
n += 2
}
return n, p.scratch, nil
}
func guessEndian(data []byte) binary.ByteOrder {
// XXX TODO
return binary.LittleEndian
}
type translateToUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
if p.first {
p.scratch = p.scratch[0:2]
p.endian.PutUint16(p.scratch, 0xfeff)
p.first = false
}
n := 0
for len(data) > 0 {
if !utf8.FullRune(data) && !eof {
break
}
r, size := utf8.DecodeRune(data)
// TODO if r > 65535?
slen := len(p.scratch)
p.scratch = p.scratch[0 : slen+2]
p.endian.PutUint16(p.scratch[slen:], uint16(r))
data = data[size:]
n += size
}
return n, p.scratch, nil
}
func getEndian(arg string) (binary.ByteOrder, error) {
switch arg {
case "le":
return binary.LittleEndian, nil
case "be":
return binary.BigEndian, nil
case "":
return nil, nil
}
return nil, errors.New("charset: unknown utf16 endianness")
}
func fromUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateFromUTF16{first: true, endian: endian}, nil
}
func toUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateToUTF16{first: false, endian: endian}, nil
}
|