diff options
Diffstat (limited to 'vendor/github.com/missdeer/golib/ic')
-rw-r--r-- | vendor/github.com/missdeer/golib/ic/convutf8.go | 72 | ||||
-rw-r--r-- | vendor/github.com/missdeer/golib/ic/ic.go | 31 |
2 files changed, 103 insertions, 0 deletions
diff --git a/vendor/github.com/missdeer/golib/ic/convutf8.go b/vendor/github.com/missdeer/golib/ic/convutf8.go new file mode 100644 index 00000000..b4851497 --- /dev/null +++ b/vendor/github.com/missdeer/golib/ic/convutf8.go @@ -0,0 +1,72 @@ +// Package ic convert text between CJK and UTF-8 in pure Go way +package ic + +import ( + "bytes" + "errors" + "io/ioutil" + "strings" + + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/japanese" + "golang.org/x/text/encoding/korean" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" + "golang.org/x/text/transform" +) + +var ( + transformers = map[string]encoding.Encoding{ + "gbk": simplifiedchinese.GBK, + "cp936": simplifiedchinese.GBK, + "windows-936": simplifiedchinese.GBK, + "gb18030": simplifiedchinese.GB18030, + "gb2312": simplifiedchinese.HZGB2312, + "big5": traditionalchinese.Big5, + "big-5": traditionalchinese.Big5, + "cp950": traditionalchinese.Big5, + "euc-kr": korean.EUCKR, + "euckr": korean.EUCKR, + "cp949": korean.EUCKR, + "euc-jp": japanese.EUCJP, + "eucjp": japanese.EUCJP, + "shift-jis": japanese.ShiftJIS, + "iso-2022-jp": japanese.ISO2022JP, + "cp932": japanese.ISO2022JP, + "windows-31j": japanese.ISO2022JP, + } +) + +// ToUTF8 convert from CJK encoding to UTF-8 +func ToUTF8(from string, s []byte) ([]byte, error) { + var reader *transform.Reader + + transformer, ok := transformers[strings.ToLower(from)] + if !ok { + return s, errors.New("Unsupported encoding " + from) + } + reader = transform.NewReader(bytes.NewReader(s), transformer.NewDecoder()) + + d, e := ioutil.ReadAll(reader) + if e != nil { + return nil, e + } + return d, nil +} + +// FromUTF8 convert from UTF-8 encoding to CJK encoding +func FromUTF8(to string, s []byte) ([]byte, error) { + var reader *transform.Reader + + transformer, ok := transformers[strings.ToLower(to)] + if !ok { + return s, errors.New("Unsupported encoding " + to) + } + reader = transform.NewReader(bytes.NewReader(s), transformer.NewEncoder()) + + d, e := ioutil.ReadAll(reader) + if e != nil { + return nil, e + } + return d, nil +} diff --git a/vendor/github.com/missdeer/golib/ic/ic.go b/vendor/github.com/missdeer/golib/ic/ic.go new file mode 100644 index 00000000..9e414e36 --- /dev/null +++ b/vendor/github.com/missdeer/golib/ic/ic.go @@ -0,0 +1,31 @@ +package ic + +import "log" + +// Convert convert bytes from CJK or UTF-8 to UTF-8 or CJK +func Convert(from string, to string, src []byte) []byte { + if to == "utf-8" { + out, e := ToUTF8(from, src) + if e == nil { + return out + } + log.Printf("converting from %s to UTF-8 failed: %v", from, e) + return src + } + + if from == "utf-8" { + out, e := FromUTF8(to, src) + if e == nil { + return out + } + log.Printf("converting from UTF-8 to %s failed: %v", to, e) + return src + } + log.Println("only converting between CJK encodings and UTF-8 is supported") + return src +} + +// ConvertString convert string from CJK or UTF-8 to UTF-8 or CJK +func ConvertString(from string, to string, src string) string { + return string(Convert(from, to, []byte(src))) +} |