summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/dfordsoft/golib/ic/convutf8.go
blob: b48514977245f4fce23c8f52b2689ffe9a85e8ca (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// Package ic convert text between CJK and UTF-8 in pure Go way
package ic

import (
	"bytes"
	"errors"
	"io/ioutil"
	"strings"

	"golang.org/x/text/encoding"
	"golang.org/x/text/encoding/japanese"
	"golang.org/x/text/encoding/korean"
	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/encoding/traditionalchinese"
	"golang.org/x/text/transform"
)

var (
	transformers = map[string]encoding.Encoding{
		"gbk":         simplifiedchinese.GBK,
		"cp936":       simplifiedchinese.GBK,
		"windows-936": simplifiedchinese.GBK,
		"gb18030":     simplifiedchinese.GB18030,
		"gb2312":      simplifiedchinese.HZGB2312,
		"big5":        traditionalchinese.Big5,
		"big-5":       traditionalchinese.Big5,
		"cp950":       traditionalchinese.Big5,
		"euc-kr":      korean.EUCKR,
		"euckr":       korean.EUCKR,
		"cp949":       korean.EUCKR,
		"euc-jp":      japanese.EUCJP,
		"eucjp":       japanese.EUCJP,
		"shift-jis":   japanese.ShiftJIS,
		"iso-2022-jp": japanese.ISO2022JP,
		"cp932":       japanese.ISO2022JP,
		"windows-31j": japanese.ISO2022JP,
	}
)

// ToUTF8 convert from CJK encoding to UTF-8
func ToUTF8(from string, s []byte) ([]byte, error) {
	var reader *transform.Reader

	transformer, ok := transformers[strings.ToLower(from)]
	if !ok {
		return s, errors.New("Unsupported encoding " + from)
	}
	reader = transform.NewReader(bytes.NewReader(s), transformer.NewDecoder())

	d, e := ioutil.ReadAll(reader)
	if e != nil {
		return nil, e
	}
	return d, nil
}

// FromUTF8 convert from UTF-8 encoding to CJK encoding
func FromUTF8(to string, s []byte) ([]byte, error) {
	var reader *transform.Reader

	transformer, ok := transformers[strings.ToLower(to)]
	if !ok {
		return s, errors.New("Unsupported encoding " + to)
	}
	reader = transform.NewReader(bytes.NewReader(s), transformer.NewEncoder())

	d, e := ioutil.ReadAll(reader)
	if e != nil {
		return nil, e
	}
	return d, nil
}