summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset
diff options
context:
space:
mode:
authorWim <wim@42.be>2017-07-07 23:34:05 +0200
committerWim <wim@42.be>2017-07-07 23:34:05 +0200
commita0938d93869904ebf6d9938485c248b976150fac (patch)
treea12fad5acdceeec93a28efb600ca62b9fdfb40a5 /vendor/github.com/paulrosania/go-charset/charset
parent2338c69d402ad3779f4e2a2f38ac800ceca656b9 (diff)
downloadmatterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.gz
matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.bz2
matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.zip
Add go-charset and chardet to vendor
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset')
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/ascii.go65
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/big5.go88
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/charset.go301
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/codepage.go133
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/cp932.go195
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/file.go40
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go184
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go80
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go176
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/local.go162
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/utf16.go110
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/utf8.go51
12 files changed, 1585 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/ascii.go b/vendor/github.com/paulrosania/go-charset/charset/ascii.go
new file mode 100644
index 00000000..ccf3a35b
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/ascii.go
@@ -0,0 +1,65 @@
+package charset
+
+import (
+ "bytes"
+ "fmt"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("ascii", fromASCII, toASCII)
+}
+
+const errorByte = '?'
+
+type translateFromASCII bool
+
+type codePointError struct {
+ i int
+ cp rune
+ charset string
+}
+
+func (e *codePointError) Error() string {
+ return fmt.Sprintf("Parse error at index %n: Code point %n is undefined in %s", e.i, e.cp, e.charset)
+}
+
+func (strict translateFromASCII) Translate(data []byte, eof bool) (int, []byte, error) {
+ buf := bytes.NewBuffer(make([]byte, 0, len(data)))
+ for i, c := range data {
+ if c > 0 && c < 128 {
+ buf.WriteByte(c)
+ if c < 32 && c != 10 && c != 13 && c != 9 {
+ // badly formed
+ }
+ } else {
+ if strict {
+ return 0, nil, &codePointError{i, rune(c), "US-ASCII"}
+ }
+ buf.WriteRune(utf8.RuneError)
+ }
+ }
+ return len(data), buf.Bytes(), nil
+}
+
+type translateToASCII bool
+
+func (strict translateToASCII) Translate(data []byte, eof bool) (int, []byte, error) {
+ buf := bytes.NewBuffer(make([]byte, 0, len(data)))
+ for _, c := range data {
+ if c > 0 && c < 128 {
+ buf.WriteByte(c)
+ } else {
+ buf.WriteByte(errorByte)
+ }
+ }
+ return len(data), buf.Bytes(), nil
+}
+
+func fromASCII(arg string) (Translator, error) {
+ return new(translateFromASCII), nil
+}
+
+func toASCII(arg string) (Translator, error) {
+ return new(translateToASCII), nil
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/big5.go b/vendor/github.com/paulrosania/go-charset/charset/big5.go
new file mode 100644
index 00000000..e01fa1af
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/big5.go
@@ -0,0 +1,88 @@
+package charset
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("big5", fromBig5, nil)
+}
+
+// Big5 consists of 89 fonts of 157 chars each
+const (
+ big5Max = 13973
+ big5Font = 157
+ big5Data = "big5.dat"
+)
+
+type translateFromBig5 struct {
+ font int
+ scratch []byte
+ big5map []rune
+}
+
+func (p *translateFromBig5) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = p.scratch[:0]
+ n := 0
+ for len(data) > 0 {
+ c := int(data[0])
+ data = data[1:]
+ n++
+ if p.font == -1 {
+ // idle state
+ if c >= 0xa1 {
+ p.font = c
+ continue
+ }
+ if c == 26 {
+ c = '\n'
+ }
+ continue
+ }
+ f := p.font
+ p.font = -1
+ r := utf8.RuneError
+ switch {
+ case c >= 64 && c <= 126:
+ c -= 64
+ case c >= 161 && c <= 254:
+ c = c - 161 + 63
+ default:
+ // bad big5 char
+ f = 255
+ }
+ if f <= 254 {
+ f -= 161
+ ix := f*big5Font + c
+ if ix < len(p.big5map) {
+ r = p.big5map[ix]
+ }
+ if r == -1 {
+ r = utf8.RuneError
+ }
+ }
+ p.scratch = appendRune(p.scratch, r)
+ }
+ return n, p.scratch, nil
+}
+
+type big5Key bool
+
+func fromBig5(arg string) (Translator, error) {
+ big5map, err := cache(big5Key(false), func() (interface{}, error) {
+ data, err := readFile(big5Data)
+ if err != nil {
+ return nil, fmt.Errorf("charset: cannot open big5 data file: %v", err)
+ }
+ big5map := []rune(string(data))
+ if len(big5map) != big5Max {
+ return nil, fmt.Errorf("charset: corrupt big5 data")
+ }
+ return big5map, nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &translateFromBig5{big5map: big5map.([]rune), font: -1}, nil
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/charset.go b/vendor/github.com/paulrosania/go-charset/charset/charset.go
new file mode 100644
index 00000000..6ab6cf89
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/charset.go
@@ -0,0 +1,301 @@
+// The charset package implements translation between character sets.
+// It uses Unicode as the intermediate representation.
+// Because it can be large, the character set data is separated
+// from the charset package. It can be embedded in the Go
+// executable by importing the data package:
+//
+// import _ "github.com/paulrosania/go-charset/data"
+//
+// It can also made available in a data directory (by settting CharsetDir).
+package charset
+
+import (
+ "io"
+ "strings"
+ "unicode/utf8"
+)
+
+// Charset holds information about a given character set.
+type Charset struct {
+ Name string // Canonical name of character set.
+ Aliases []string // Known aliases.
+ Desc string // Description.
+ NoFrom bool // Not possible to translate from this charset.
+ NoTo bool // Not possible to translate to this charset.
+}
+
+// Translator represents a character set converter.
+// The Translate method translates the given data,
+// and returns the number of bytes of data consumed,
+// a slice containing the converted data (which may be
+// overwritten on the next call to Translate), and any
+// conversion error. If eof is true, the data represents
+// the final bytes of the input.
+type Translator interface {
+ Translate(data []byte, eof bool) (n int, cdata []byte, err error)
+}
+
+// A Factory can be used to make character set translators.
+type Factory interface {
+ // TranslatorFrom creates a translator that will translate from the named character
+ // set to UTF-8.
+ TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.
+
+ // TranslatorTo creates a translator that will translate from UTF-8 to the named character set.
+ TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.
+
+ // Names returns all the character set names accessibile through the factory.
+ Names() []string
+
+ // Info returns information on the named character set. It returns nil if the
+ // factory doesn't recognise the given name.
+ Info(name string) *Charset
+}
+
+var factories = []Factory{localFactory{}}
+
+// Register registers a new Factory which will be consulted when NewReader
+// or NewWriter needs a character set translator for a given name.
+func Register(factory Factory) {
+ factories = append(factories, factory)
+}
+
+// NewReader returns a new Reader that translates from the named
+// character set to UTF-8 as it reads r.
+func NewReader(charset string, r io.Reader) (io.Reader, error) {
+ tr, err := TranslatorFrom(charset)
+ if err != nil {
+ return nil, err
+ }
+ return NewTranslatingReader(r, tr), nil
+}
+
+// NewWriter returns a new WriteCloser writing to w. It converts writes
+// of UTF-8 text into writes on w of text in the named character set.
+// The Close is necessary to flush any remaining partially translated
+// characters to the output.
+func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {
+ tr, err := TranslatorTo(charset)
+ if err != nil {
+ return nil, err
+ }
+ return NewTranslatingWriter(w, tr), nil
+}
+
+// Info returns information about a character set, or nil
+// if the character set is not found.
+func Info(name string) *Charset {
+ for _, f := range factories {
+ if info := f.Info(name); info != nil {
+ return info
+ }
+ }
+ return nil
+}
+
+// Names returns the canonical names of all supported character sets, in alphabetical order.
+func Names() []string {
+ // TODO eliminate duplicates
+ var names []string
+ for _, f := range factories {
+ names = append(names, f.Names()...)
+ }
+ return names
+}
+
+// TranslatorFrom returns a translator that will translate from
+// the named character set to UTF-8.
+func TranslatorFrom(charset string) (Translator, error) {
+ var err error
+ var tr Translator
+ for _, f := range factories {
+ tr, err = f.TranslatorFrom(charset)
+ if err == nil {
+ break
+ }
+ }
+ if tr == nil {
+ return nil, err
+ }
+ return tr, nil
+}
+
+// TranslatorTo returns a translator that will translate from UTF-8
+// to the named character set.
+func TranslatorTo(charset string) (Translator, error) {
+ var err error
+ var tr Translator
+ for _, f := range factories {
+ tr, err = f.TranslatorTo(charset)
+ if err == nil {
+ break
+ }
+ }
+ if tr == nil {
+ return nil, err
+ }
+ return tr, nil
+}
+
+func normalizedChar(c rune) rune {
+ switch {
+ case c >= 'A' && c <= 'Z':
+ c = c - 'A' + 'a'
+ case c == '_':
+ c = '-'
+ }
+ return c
+}
+
+// NormalisedName returns s with all Roman capitals
+// mapped to lower case, and '_' mapped to '-'
+func NormalizedName(s string) string {
+ return strings.Map(normalizedChar, s)
+}
+
+type translatingWriter struct {
+ w io.Writer
+ tr Translator
+ buf []byte // unconsumed data from writer.
+}
+
+// NewTranslatingWriter returns a new WriteCloser writing to w.
+// It passes the written bytes through the given Translator.
+func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {
+ return &translatingWriter{w: w, tr: tr}
+}
+
+func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {
+ wdata := data
+ if len(w.buf) > 0 {
+ w.buf = append(w.buf, data...)
+ wdata = w.buf
+ }
+ n, cdata, err := w.tr.Translate(wdata, false)
+ if err != nil {
+ // TODO
+ }
+ if n > 0 {
+ _, err = w.w.Write(cdata)
+ if err != nil {
+ return 0, err
+ }
+ }
+ w.buf = w.buf[:0]
+ if n < len(wdata) {
+ w.buf = append(w.buf, wdata[n:]...)
+ }
+ return len(data), nil
+}
+
+func (p *translatingWriter) Close() error {
+ for {
+ n, data, err := p.tr.Translate(p.buf, true)
+ p.buf = p.buf[n:]
+ if err != nil {
+ // TODO
+ }
+ // If the Translator produces no data
+ // at EOF, then assume that it never will.
+ if len(data) == 0 {
+ break
+ }
+ n, err = p.w.Write(data)
+ if err != nil {
+ return err
+ }
+ if n < len(data) {
+ return io.ErrShortWrite
+ }
+ if len(p.buf) == 0 {
+ break
+ }
+ }
+ return nil
+}
+
+type translatingReader struct {
+ r io.Reader
+ tr Translator
+ cdata []byte // unconsumed data from converter.
+ rdata []byte // unconverted data from reader.
+ err error // final error from reader.
+}
+
+// NewTranslatingReader returns a new Reader that
+// translates data using the given Translator as it reads r.
+func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {
+ return &translatingReader{r: r, tr: tr}
+}
+
+func (r *translatingReader) Read(buf []byte) (int, error) {
+ for {
+ if len(r.cdata) > 0 {
+ n := copy(buf, r.cdata)
+ r.cdata = r.cdata[n:]
+ return n, nil
+ }
+ if r.err == nil {
+ r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))
+ n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])
+ // Guard against non-compliant Readers.
+ if n == 0 && err == nil {
+ err = io.EOF
+ }
+ r.rdata = r.rdata[0 : len(r.rdata)+n]
+ r.err = err
+ } else if len(r.rdata) == 0 {
+ break
+ }
+ nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)
+ if cvterr != nil {
+ // TODO
+ }
+ r.cdata = cdata
+
+ // Ensure that we consume all bytes at eof
+ // if the converter refuses them.
+ if nc == 0 && r.err != nil {
+ nc = len(r.rdata)
+ }
+
+ // Copy unconsumed data to the start of the rdata buffer.
+ r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]
+ }
+ return 0, r.err
+}
+
+// ensureCap returns s with a capacity of at least n bytes.
+// If cap(s) < n, then it returns a new copy of s with the
+// required capacity.
+func ensureCap(s []byte, n int) []byte {
+ if n <= cap(s) {
+ return s
+ }
+ // logic adapted from appendslice1 in runtime
+ m := cap(s)
+ if m == 0 {
+ m = n
+ } else {
+ for {
+ if m < 1024 {
+ m += m
+ } else {
+ m += m / 4
+ }
+ if m >= n {
+ break
+ }
+ }
+ }
+ t := make([]byte, len(s), m)
+ copy(t, s)
+ return t
+}
+
+func appendRune(buf []byte, r rune) []byte {
+ n := len(buf)
+ buf = ensureCap(buf, n+utf8.UTFMax)
+ nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
+ return buf[0 : n+nu]
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/codepage.go b/vendor/github.com/paulrosania/go-charset/charset/codepage.go
new file mode 100644
index 00000000..6864c875
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/codepage.go
@@ -0,0 +1,133 @@
+package charset
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("cp", fromCodePage, toCodePage)
+}
+
+type translateFromCodePage struct {
+ byte2rune *[256]rune
+ scratch []byte
+}
+
+type cpKeyFrom string
+type cpKeyTo string
+
+func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
+ buf := p.scratch
+ for _, x := range data {
+ r := p.byte2rune[x]
+ if r < utf8.RuneSelf {
+ buf = append(buf, byte(r))
+ continue
+ }
+ size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
+ buf = buf[0 : len(buf)+size]
+ }
+ return len(data), buf, nil
+}
+
+type toCodePageInfo struct {
+ rune2byte map[rune]byte
+ // same gives the number of runes at start of code page that map exactly to
+ // unicode.
+ same rune
+}
+
+type translateToCodePage struct {
+ toCodePageInfo
+ scratch []byte
+}
+
+func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, len(data))
+ buf := p.scratch[:0]
+
+ for i := 0; i < len(data); {
+ r := rune(data[i])
+ size := 1
+ if r >= utf8.RuneSelf {
+ r, size = utf8.DecodeRune(data[i:])
+ if size == 1 && !eof && !utf8.FullRune(data[i:]) {
+ return i, buf, nil
+ }
+ }
+
+ var b byte
+ if r < p.same {
+ b = byte(r)
+ } else {
+ var ok bool
+ b, ok = p.rune2byte[r]
+ if !ok {
+ b = '?'
+ }
+ }
+ buf = append(buf, b)
+ i += size
+ }
+ return len(data), buf, nil
+}
+
+func fromCodePage(arg string) (Translator, error) {
+ runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
+ data, err := readFile(arg)
+ if err != nil {
+ return nil, err
+ }
+ runes := []rune(string(data))
+ if len(runes) != 256 {
+ return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
+ }
+ r := new([256]rune)
+ copy(r[:], runes)
+ return r, nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
+}
+
+func toCodePage(arg string) (Translator, error) {
+ m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
+ data, err := readFile(arg)
+ if err != nil {
+ return nil, err
+ }
+
+ info := toCodePageInfo{
+ rune2byte: make(map[rune]byte),
+ same: 256,
+ }
+ atStart := true
+ i := rune(0)
+ for _, r := range string(data) {
+ if atStart {
+ if r == i {
+ i++
+ continue
+ }
+ info.same = i
+ atStart = false
+ }
+ info.rune2byte[r] = byte(i)
+ i++
+ }
+ // TODO fix tables
+ // fmt.Printf("%s, same = %d\n", arg, info.same)
+ if i != 256 {
+ return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
+ }
+ return info, nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/cp932.go b/vendor/github.com/paulrosania/go-charset/charset/cp932.go
new file mode 100644
index 00000000..9f46262b
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/cp932.go
@@ -0,0 +1,195 @@
+package charset
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("cp932", fromCP932, nil)
+}
+
+// encoding details
+// (Traditional) Shift-JIS
+//
+// 00..1f control characters
+// 20 space
+// 21..7f JIS X 0201:1976/1997 roman (see notes)
+// 80 undefined
+// 81..9f lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997
+// a0 undefined
+// a1..df JIS X 0201:1976/1997 katakana
+// e0..ea lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997
+// eb..ff undefined
+//
+// CP932 (windows-31J)
+//
+// this encoding scheme extends Shift-JIS in the following way
+//
+// eb..ec undefined (marked as lead bytes - see notes below)
+// ed..ee lead byte of NEC-selected IBM extended characters
+// ef undefined (marked as lead byte - see notes below)
+// f0..f9 lead byte of User defined GAIJI (see note below)
+// fa..fc lead byte of IBM extended characters
+// fd..ff undefined
+//
+//
+// Notes
+//
+// JISX 0201:1976/1997 roman
+// this is the same as ASCII but with 0x5c (ASCII code for '\')
+// representing the Yen currency symbol '¥' (U+00a5)
+// This mapping is contentious, some conversion packages implent it
+// others do not.
+// The mapping files from The Unicode Consortium show cp932 mapping
+// plain ascii in the range 00..7f whereas shift-jis maps 0x5c ('\') to the yen
+// symbol (¥) and 0x7e ('~') to overline (¯)
+//
+// CP932 double-byte character codes:
+//
+// eb-ec, ef, f0-f9:
+// Marked as DBCS LEAD BYTEs in the unicode mapping data
+// obtained from:
+// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
+//
+// but there are no defined mappings for codes in this range.
+// It is not clear whether or not an implementation should
+// consume one or two bytes before emitting an error char.
+
+const (
+ kanaPages = 1
+ kanaPageSize = 63
+ kanaChar0 = 0xa1
+
+ cp932Pages = 45 // 81..84, 87..9f, e0..ea, ed..ee, fa..fc
+ cp932PageSize = 189 // 40..fc (including 7f)
+ cp932Char0 = 0x40
+)
+
+type jisTables struct {
+ page0 [256]rune
+ dbcsoff [256]int
+ cp932 []rune
+}
+
+type translateFromCP932 struct {
+ tables *jisTables
+ scratch []byte
+}
+
+func (p *translateFromCP932) Translate(data []byte, eof bool) (int, []byte, error) {
+ tables := p.tables
+ p.scratch = p.scratch[:0]
+ n := 0
+ for i := 0; i < len(data); i++ {
+ b := data[i]
+ r := tables.page0[b]
+ if r != -1 {
+ p.scratch = appendRune(p.scratch, r)
+ n++
+ continue
+ }
+ // DBCS
+ i++
+ if i >= len(data) {
+ break
+ }
+ pnum := tables.dbcsoff[b]
+ ix := int(data[i]) - cp932Char0
+ if pnum == -1 || ix < 0 || ix >= cp932PageSize {
+ r = utf8.RuneError
+ } else {
+ r = tables.cp932[pnum*cp932PageSize+ix]
+ }
+ p.scratch = appendRune(p.scratch, r)
+ n += 2
+ }
+ return n, p.scratch, nil
+}
+
+type cp932Key bool
+
+func fromCP932(arg string) (Translator, error) {
+ shiftJIS := arg == "shiftjis"
+ tables, err := cache(cp932Key(shiftJIS), func() (interface{}, error) {
+ tables := new(jisTables)
+ kana, err := jisGetMap("jisx0201kana.dat", kanaPageSize, kanaPages)
+ if err != nil {
+ return nil, err
+ }
+ tables.cp932, err = jisGetMap("cp932.dat", cp932PageSize, cp932Pages)
+ if err != nil {
+ return nil, err
+ }
+
+ // jisx0201kana is mapped into 0xA1..0xDF
+ for i := 0; i < kanaPageSize; i++ {
+ tables.page0[i+kanaChar0] = kana[i]
+ }
+
+ // 00..7f same as ascii in cp932
+ for i := rune(0); i < 0x7f; i++ {
+ tables.page0[i] = i
+ }
+
+ if shiftJIS {
+ // shift-jis uses JIS X 0201 for the ASCII range
+ // this is the same as ASCII apart from
+ // 0x5c ('\') maps to yen symbol (¥) and 0x7e ('~') maps to overline (¯)
+ tables.page0['\\'] = '¥'
+ tables.page0['~'] = '¯'
+ }
+
+ // pre-calculate DBCS page numbers to mapping file page numbers
+ // and mark codes in page0 that are DBCS lead bytes
+ pnum := 0
+ for i := 0x81; i <= 0x84; i++ {
+ tables.page0[i] = -1
+ tables.dbcsoff[i] = pnum
+ pnum++
+ }
+ for i := 0x87; i <= 0x9f; i++ {
+ tables.page0[i] = -1
+ tables.dbcsoff[i] = pnum
+ pnum++
+ }
+ for i := 0xe0; i <= 0xea; i++ {
+ tables.page0[i] = -1
+ tables.dbcsoff[i] = pnum
+ pnum++
+ }
+ if shiftJIS {
+ return tables, nil
+ }
+ // add in cp932 extensions
+ for i := 0xed; i <= 0xee; i++ {
+ tables.page0[i] = -1
+ tables.dbcsoff[i] = pnum
+ pnum++
+ }
+ for i := 0xfa; i <= 0xfc; i++ {
+ tables.page0[i] = -1
+ tables.dbcsoff[i] = pnum
+ pnum++
+ }
+ return tables, nil
+ })
+
+ if err != nil {
+ return nil, err
+ }
+
+ return &translateFromCP932{tables: tables.(*jisTables)}, nil
+}
+
+func jisGetMap(name string, pgsize, npages int) ([]rune, error) {
+ data, err := readFile(name)
+ if err != nil {
+ return nil, err
+ }
+ m := []rune(string(data))
+ if len(m) != pgsize*npages {
+ return nil, fmt.Errorf("%q: incorrect length data", name)
+ }
+ return m, nil
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/file.go b/vendor/github.com/paulrosania/go-charset/charset/file.go
new file mode 100644
index 00000000..a0c26225
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/file.go
@@ -0,0 +1,40 @@
+package charset
+
+import (
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+)
+
+var files = make(map[string]func() (io.ReadCloser, error))
+
+// RegisterDataFile registers the existence of a given data
+// file with the given name that may be used by a character-set converter.
+// It is intended to be used by packages that wish to embed
+// data in the executable binary, and should not be
+// used normally.
+func RegisterDataFile(name string, open func() (io.ReadCloser, error)) {
+ files[name] = open
+}
+
+// CharsetDir gives the location of the default data file directory.
+// This directory will be used for files with names that have not
+// been registered with RegisterDataFile.
+var CharsetDir = "/usr/local/lib/go-charset/datafiles"
+
+func readFile(name string) (data []byte, err error) {
+ var r io.ReadCloser
+ if open := files[name]; open != nil {
+ r, err = open()
+ if err != nil {
+ return
+ }
+ } else {
+ r, err = os.Open(filepath.Join(CharsetDir, name))
+ if err != nil {
+ return
+ }
+ }
+ return ioutil.ReadAll(r)
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go b/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go
new file mode 100644
index 00000000..f7187f5f
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go
@@ -0,0 +1,184 @@
+// The iconv package provides an interface to the GNU iconv character set
+// conversion library (see http://www.gnu.org/software/libiconv/).
+// It automatically registers all the character sets with the charset package,
+// so it is usually used simply for the side effects of importing it.
+// Example:
+// import (
+// "go-charset.googlecode.com/hg/charset"
+// _ "go-charset.googlecode.com/hg/charset/iconv"
+// )
+package iconv
+
+//#cgo darwin LDFLAGS: -liconv
+//#include <stdlib.h>
+//#include <iconv.h>
+//#include <errno.h>
+//iconv_t iconv_open_error = (iconv_t)-1;
+//size_t iconv_error = (size_t)-1;
+import "C"
+import (
+ "errors"
+ "fmt"
+ "github.com/paulrosania/go-charset/charset"
+ "runtime"
+ "strings"
+ "syscall"
+ "unicode/utf8"
+ "unsafe"
+)
+
+type iconvTranslator struct {
+ cd C.iconv_t
+ invalid rune
+ scratch []byte
+}
+
+func canonicalChar(c rune) rune {
+ if c >= 'a' && c <= 'z' {
+ return c - 'a' + 'A'
+ }
+ return c
+}
+
+func canonicalName(s string) string {
+ return strings.Map(canonicalChar, s)
+}
+
+func init() {
+ charset.Register(iconvFactory{})
+}
+
+type iconvFactory struct {
+}
+
+func (iconvFactory) TranslatorFrom(name string) (charset.Translator, error) {
+ return Translator("UTF-8", name, utf8.RuneError)
+}
+
+func (iconvFactory) TranslatorTo(name string) (charset.Translator, error) {
+ // BUG This is wrong. The target character set may not be ASCII
+ // compatible. There's no easy solution to this other than
+ // removing the offending code point.
+ return Translator(name, "UTF-8", '?')
+}
+
+// Translator returns a Translator that translates between
+// the named character sets. When an invalid multibyte
+// character is found, the bytes in invalid are substituted instead.
+func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) {
+ cto, cfrom := C.CString(toCharset), C.CString(fromCharset)
+ cd, err := C.iconv_open(cto, cfrom)
+
+ C.free(unsafe.Pointer(cfrom))
+ C.free(unsafe.Pointer(cto))
+
+ if cd == C.iconv_open_error {
+ if err == syscall.EINVAL {
+ return nil, errors.New("iconv: conversion not supported")
+ }
+ return nil, err
+ }
+ t := &iconvTranslator{cd: cd, invalid: invalid}
+ runtime.SetFinalizer(t, func(*iconvTranslator) {
+ C.iconv_close(cd)
+ })
+ return t, nil
+}
+
+func (iconvFactory) Names() []string {
+ all := aliases()
+ names := make([]string, 0, len(all))
+ for name, aliases := range all {
+ if aliases[0] == name {
+ names = append(names, name)
+ }
+ }
+ return names
+}
+
+func (iconvFactory) Info(name string) *charset.Charset {
+ name = strings.ToLower(name)
+ all := aliases()
+ a, ok := all[name]
+ if !ok {
+ return nil
+ }
+ return &charset.Charset{
+ Name: name,
+ Aliases: a,
+ }
+}
+
+func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) {
+ n := 0
+ p.scratch = p.scratch[:0]
+ for len(data) > 0 {
+ p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax)
+ cData := (*C.char)(unsafe.Pointer(&data[:1][0]))
+ nData := C.size_t(len(data))
+
+ ns := len(p.scratch)
+ cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0]))
+ nScratch := C.size_t(cap(p.scratch) - ns)
+ r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch)
+
+ p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)]
+ n += len(data) - int(nData)
+ data = data[len(data)-int(nData):]
+
+ if r != C.iconv_error || err == nil {
+ return n, p.scratch, nil
+ }
+ switch err := err.(syscall.Errno); err {
+ case C.EILSEQ:
+ // invalid multibyte sequence - skip one byte and continue
+ p.scratch = appendRune(p.scratch, p.invalid)
+ n++
+ data = data[1:]
+ case C.EINVAL:
+ // incomplete multibyte sequence
+ return n, p.scratch, nil
+ case C.E2BIG:
+ // output buffer not large enough; try again with larger buffer.
+ p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax)
+ default:
+ panic(fmt.Sprintf("unexpected error code: %v", err))
+ }
+ }
+ return n, p.scratch, nil
+}
+
+// ensureCap returns s with a capacity of at least n bytes.
+// If cap(s) < n, then it returns a new copy of s with the
+// required capacity.
+func ensureCap(s []byte, n int) []byte {
+ if n <= cap(s) {
+ return s
+ }
+ // logic adapted from appendslice1 in runtime
+ m := cap(s)
+ if m == 0 {
+ m = n
+ } else {
+ for {
+ if m < 1024 {
+ m += m
+ } else {
+ m += m / 4
+ }
+ if m >= n {
+ break
+ }
+ }
+ }
+ t := make([]byte, len(s), m)
+ copy(t, s)
+ return t
+}
+
+func appendRune(buf []byte, r rune) []byte {
+ n := len(buf)
+ buf = ensureCap(buf, n+utf8.UTFMax)
+ nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
+ return buf[0 : n+nu]
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go b/vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go
new file mode 100644
index 00000000..cda03270
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go
@@ -0,0 +1,80 @@
+// +build !linux
+// This file is systemdependent because not all versions
+// of iconv have the iconvlist function.
+
+package iconv
+
+//#cgo darwin LDFLAGS: -liconv
+//#cgo freebsd LDFLAGS: -liconv
+//#cgo windows LDFLAGS: -liconv
+//#include <stdlib.h>
+//#include <string.h>
+//#include <iconv.h>
+//#include <errno.h>
+//
+//typedef struct nameList nameList;
+//struct nameList {
+// int n;
+// char **names;
+// nameList *next;
+//};
+//
+//int
+//addNames(unsigned int n, const char *const *names, void *data) {
+// // we can't call back to Go because of the stack size issue,
+// // so copy all the names.
+// nameList *hd, *e;
+// int i;
+//
+// hd = data;
+// e = malloc(sizeof(nameList));
+// e->n = n;
+// e->names = malloc(sizeof(char*) * n);
+// for(i = 0; i < n; i++){
+// e->names[i] = strdup(names[i]);
+// }
+// e->next = hd->next;
+// hd->next = e;
+// return 0;
+//}
+//
+//nameList *
+//listNames(void) {
+// nameList hd;
+// hd.next = 0;
+// iconvlist(addNames, &hd);
+// return hd.next;
+//}
+import "C"
+
+import (
+ "strings"
+ "sync"
+ "unsafe"
+)
+
+var getAliasesOnce sync.Once
+var allAliases = map[string][]string{}
+
+func aliases() map[string][]string {
+ getAliasesOnce.Do(getAliases)
+ return allAliases
+}
+
+func getAliases() {
+ var next *C.nameList
+ for p := C.listNames(); p != nil; p = next {
+ next = p.next
+ aliases := make([]string, p.n)
+ pnames := (*[1e9]*C.char)(unsafe.Pointer(p.names))
+ for i := range aliases {
+ aliases[i] = strings.ToLower(C.GoString(pnames[i]))
+ C.free(unsafe.Pointer(pnames[i]))
+ }
+ C.free(unsafe.Pointer(p.names))
+ C.free(unsafe.Pointer(p))
+ for _, alias := range aliases {
+ allAliases[alias] = aliases
+ }
+ }
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go b/vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go
new file mode 100644
index 00000000..edf9e28a
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go
@@ -0,0 +1,176 @@
+// +build linux
+
+// We just use a list of names obtained from iconv on a platform
+// that allows iconvlist. We could invoke the iconv command,
+// but that might fail too, and it gives no information about aliases.
+
+package iconv
+
+import (
+ "sync"
+)
+
+func aliases() map[string][]string {
+ initAliasesOnce.Do(initAliases)
+ return allAliases
+}
+
+var initAliasesOnce sync.Once
+var allAliases map[string][]string
+
+func initAliases() {
+ allAliases = make(map[string][]string)
+ for _, a := range aliasData {
+ for _, alias := range a {
+ allAliases[alias] = a
+ }
+ }
+}
+
+var aliasData = [][]string{
+ {"437", "cp437", "ibm437", "cspc8codepage437"},
+ {"850", "cp850", "ibm850", "cspc850multilingual"},
+ {"852", "cp852", "ibm852", "cspcp852"},
+ {"855", "cp855", "ibm855", "csibm855"},
+ {"857", "cp857", "ibm857", "csibm857"},
+ {"860", "cp860", "ibm860", "csibm860"},
+ {"861", "cp-is", "cp861", "ibm861", "csibm861"},
+ {"862", "cp862", "ibm862", "cspc862latinhebrew"},
+ {"863", "cp863", "ibm863", "csibm863"},
+ {"865", "cp865", "ibm865", "csibm865"},
+ {"866", "cp866", "ibm866", "csibm866"},
+ {"869", "cp-gr", "cp869", "ibm869", "csibm869"},
+ {"ansi-x3.4-1968", "ansi-x3.4-1986", "ascii", "cp367", "ibm367", "iso-ir-6", "iso646-us", "iso-646.irv:1991", "us", "us-ascii", "csascii"},
+ {"arabic", "asmo-708", "ecma-114", "iso-8859-6", "iso-ir-127", "iso8859-6", "iso-8859-6", "iso-8859-6:1987", "csisolatinarabic"},
+ {"armscii-8"},
+ {"atari", "atarist"},
+ {"big5-2003"},
+ {"big-5", "big-five", "big5", "bigfive", "cn-big5", "csbig5"},
+ {"big5-hkscs:1999"},
+ {"big5-hkscs:2001"},
+ {"big5-hkscs", "big5-hkscs:2004", "big5hkscs"},
+ {"c99"},
+ {"chinese", "gb-2312-80", "iso-ir-58", "csiso58gb231280"},
+ {"cn", "gb-1988-80", "iso-ir-57", "iso646-cn", "csiso57gb1988"},
+ {"cn-gb", "euc-cn", "euccn", "gb2312", "csgb2312"},
+ {"cn-gb-isoir165", "iso-ir-165"},
+ {"cp1046"},
+ {"cp1124"},
+ {"cp1125"},
+ {"cp1129"},
+ {"cp1131"},
+ {"cp1133", "ibm-cp1133"},
+ {"cp1161", "ibm-1161", "ibm1161", "csibm1161"},
+ {"cp1162", "ibm-1162", "ibm1162", "csibm1162"},
+ {"cp1163", "ibm-1163", "ibm1163", "csibm1163"},
+ {"cp1250", "ms-ee", "windows-1250"},
+ {"cp1251", "ms-cyrl", "windows-1251"},
+ {"cp1252", "ms-ansi", "windows-1252"},
+ {"cp1253", "ms-greek", "windows-1253"},
+ {"cp1254", "ms-turk", "windows-1254"},
+ {"cp1255", "ms-hebr", "windows-1255"},
+ {"cp1256", "ms-arab", "windows-1256"},
+ {"cp1257", "winbaltrim", "windows-1257"},
+ {"cp1258", "windows-1258"},
+ {"cp1361", "johab"},
+ {"cp154", "cyrillic-asian", "pt154", "ptcp154", "csptcp154"},
+ {"cp737"},
+ {"cp775", "ibm775", "cspc775baltic"},
+ {"cp819", "ibm819", "iso-8859-1", "iso-ir-100", "iso8859-1", "iso-8859-1", "iso-8859-1:1987", "l1", "latin1", "csisolatin1"},
+ {"cp853"},
+ {"cp856"},
+ {"cp858"},
+ {"cp864", "ibm864", "csibm864"},
+ {"cp874", "windows-874"},
+ {"cp922"},
+ {"cp932"},
+ {"cp936", "ms936", "windows-936"},
+ {"cp943"},
+ {"cp949", "uhc"},
+ {"cp950"},
+ {"cyrillic", "iso-8859-5", "iso-ir-144", "iso8859-5", "iso-8859-5", "iso-8859-5:1988", "csisolatincyrillic"},
+ {"dec-hanyu"},
+ {"dec-kanji"},
+ {"ecma-118", "elot-928", "greek", "greek8", "iso-8859-7", "iso-ir-126", "iso8859-7", "iso-8859-7", "iso-8859-7:1987", "iso-8859-7:2003", "csisolatingreek"},
+ {"euc-jis-2004", "euc-jisx0213"},
+ {"euc-jp", "eucjp", "extended-unix-code-packed-format-for-japanese", "cseucpkdfmtjapanese"},
+ {"euc-kr", "euckr", "cseuckr"},
+ {"euc-tw", "euctw", "cseuctw"},
+ {"gb18030"},
+ {"gbk"},
+ {"georgian-academy"},
+ {"georgian-ps"},
+ {"hebrew", "iso-8859-8", "iso-ir-138", "iso8859-8", "iso-8859-8", "iso-8859-8:1988", "csisolatinhebrew"},
+ {"hp-roman8", "r8", "roman8", "cshproman8"},
+ {"hz", "hz-gb-2312"},
+ {"iso-10646-ucs-2", "ucs-2", "csunicode"},
+ {"iso-10646-ucs-4", "ucs-4", "csucs4"},
+ {"iso-2022-cn", "csiso2022cn"},
+ {"iso-2022-cn-ext"},
+ {"iso-2022-jp-1"},
+ {"iso-2022-jp-2004", "iso-2022-jp-3"},
+ {"iso-2022-jp-2", "csiso2022jp2"},
+ {"iso-2022-jp", "csiso2022jp"},
+ {"iso-2022-kr", "csiso2022kr"},
+ {"iso-8859-10", "iso-ir-157", "iso8859-10", "iso-8859-10", "iso-8859-10:1992", "l6", "latin6", "csisolatin6"},
+ {"iso-8859-11", "iso8859-11", "iso-8859-11"},
+ {"iso-8859-13", "iso-ir-179", "iso8859-13", "iso-8859-13", "l7", "latin7"},
+ {"iso-8859-14", "iso-celtic", "iso-ir-199", "iso8859-14", "iso-8859-14", "iso-8859-14:1998", "l8", "latin8"},
+ {"iso-8859-15", "iso-ir-203", "iso8859-15", "iso-8859-15", "iso-8859-15:1998", "latin-9"},
+ {"iso-8859-16", "iso-ir-226", "iso8859-16", "iso-8859-16", "iso-8859-16:2001", "l10", "latin10"},
+ {"iso-8859-2", "iso-ir-101", "iso8859-2", "iso-8859-2", "iso-8859-2:1987", "l2", "latin2", "csisolatin2"},
+ {"iso-8859-3", "iso-ir-109", "iso8859-3", "iso-8859-3", "iso-8859-3:1988", "l3", "latin3", "csisolatin3"},
+ {"iso-8859-4", "iso-ir-110", "iso8859-4", "iso-8859-4", "iso-8859-4:1988", "l4", "latin4", "csisolatin4"},
+ {"iso-8859-9", "iso-ir-148", "iso8859-9", "iso-8859-9", "iso-8859-9:1989", "l5", "latin5", "csisolatin5"},
+ {"iso-ir-149", "korean", "ksc-5601", "ks-c-5601-1987", "ks-c-5601-1989", "csksc56011987"},
+ {"iso-ir-14", "iso646-jp", "jis-c6220-1969-ro", "jp", "csiso14jisc6220ro"},
+ {"iso-ir-159", "jis-x0212", "jis-x0212-1990", "jis-x0212.1990-0", "x0212", "csiso159jisx02121990"},
+ {"iso-ir-166", "tis-620", "tis620", "tis620-0", "tis620.2529-1", "tis620.2533-0", "tis620.2533-1"},
+ {"iso-ir-230", "tds565"},
+ {"iso-ir-87", "jis0208", "jis-c6226-1983", "jis-x0208", "jis-x0208-1983", "jis-x0208-1990", "x0208", "csiso87jisx0208"},
+ {"java"},
+ {"jisx0201-1976", "jis-x0201", "x0201", "cshalfwidthkatakana"},
+ {"koi8-r", "cskoi8r"},
+ {"koi8-ru"},
+ {"koi8-t"},
+ {"koi8-u"},
+ {"kz-1048", "rk1048", "strk1048-2002", "cskz1048"},
+ {"macarabic"},
+ {"maccentraleurope"},
+ {"maccroatian"},
+ {"maccyrillic"},
+ {"macgreek"},
+ {"machebrew"},
+ {"maciceland"},
+ {"mac", "macintosh", "macroman", "csmacintosh"},
+ {"macromania"},
+ {"macthai"},
+ {"macturkish"},
+ {"macukraine"},
+ {"ms-kanji", "shift-jis", "shift-jis", "sjis", "csshiftjis"},
+ {" MS-Windows", "Japanese", "(cp932)"},
+ {"mulelao-1"},
+ {"nextstep"},
+ {"riscos-latin1"},
+ {"shift-jis-2004", "shift-jisx0213"},
+ {"tcvn", "tcvn-5712", "tcvn5712-1", "tcvn5712-1:1993"},
+ {"ucs-2be", "unicode-1-1", "unicodebig", "csunicode11"},
+ {"ucs-2-internal"},
+ {"ucs-2le", "unicodelittle"},
+ {"ucs-2-swapped"},
+ {"ucs-4be"},
+ {"ucs-4-internal"},
+ {"ucs-4le"},
+ {"ucs-4-swapped"},
+ {"unicode-1-1-utf-7", "utf-7", "csunicode11utf7"},
+ {"utf-16"},
+ {"utf-16be"},
+ {"utf-16le"},
+ {"utf-32"},
+ {"utf-32be"},
+ {"utf-32le"},
+ {"utf-8"},
+ {"utf-8-mac", "utf8-mac"},
+ {"viscii", "viscii1.1-1", "csviscii"},
+ {"windows-31j", "cp932"},
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/local.go b/vendor/github.com/paulrosania/go-charset/charset/local.go
new file mode 100644
index 00000000..9776b962
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/local.go
@@ -0,0 +1,162 @@
+package charset
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+ "sync"
+)
+
+var (
+ readLocalCharsetsOnce sync.Once
+ localCharsets = make(map[string]*localCharset)
+)
+
+type localCharset struct {
+ Charset
+ arg string
+ *class
+}
+
+// A class of character sets.
+// Each class can be instantiated with an argument specified in the config file.
+// Many character sets can use a single class.
+type class struct {
+ from, to func(arg string) (Translator, error)
+}
+
+// The set of classes, indexed by class name.
+var classes = make(map[string]*class)
+
+func registerClass(charset string, from, to func(arg string) (Translator, error)) {
+ classes[charset] = &class{from, to}
+}
+
+type localFactory struct{}
+
+func (f localFactory) TranslatorFrom(name string) (Translator, error) {
+ f.init()
+ name = NormalizedName(name)
+ cs := localCharsets[name]
+ if cs == nil {
+ return nil, fmt.Errorf("character set %q not found", name)
+ }
+ if cs.from == nil {
+ return nil, fmt.Errorf("cannot translate from %q", name)
+ }
+ return cs.from(cs.arg)
+}
+
+func (f localFactory) TranslatorTo(name string) (Translator, error) {
+ f.init()
+ name = NormalizedName(name)
+ cs := localCharsets[name]
+ if cs == nil {
+ return nil, fmt.Errorf("character set %q not found", name)
+ }
+ if cs.to == nil {
+ return nil, fmt.Errorf("cannot translate to %q", name)
+ }
+ return cs.to(cs.arg)
+}
+
+func (f localFactory) Names() []string {
+ f.init()
+ var names []string
+ for name, cs := range localCharsets {
+ // add names only for non-aliases.
+ if localCharsets[cs.Name] == cs {
+ names = append(names, name)
+ }
+ }
+ return names
+}
+
+func (f localFactory) Info(name string) *Charset {
+ f.init()
+ lcs := localCharsets[NormalizedName(name)]
+ if lcs == nil {
+ return nil
+ }
+ // copy the charset info so that callers can't mess with it.
+ cs := lcs.Charset
+ return &cs
+}
+
+func (f localFactory) init() {
+ readLocalCharsetsOnce.Do(readLocalCharsets)
+}
+
+// charsetEntry is the data structure for one entry in the JSON config file.
+// If Alias is non-empty, it should be the canonical name of another
+// character set; otherwise Class should be the name
+// of an entry in classes, and Arg is the argument for
+// instantiating it.
+type charsetEntry struct {
+ Aliases []string
+ Desc string
+ Class string
+ Arg string
+}
+
+// readCharsets reads the JSON config file.
+// It's done once only, when first needed.
+func readLocalCharsets() {
+ csdata, err := readFile("charsets.json")
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "charset: cannot open \"charsets.json\": %v\n", err)
+ return
+ }
+
+ var entries map[string]charsetEntry
+ err = json.Unmarshal(csdata, &entries)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "charset: cannot decode config file: %v\n", err)
+ }
+ for name, e := range entries {
+ class := classes[e.Class]
+ if class == nil {
+ continue
+ }
+ name = NormalizedName(name)
+ for i, a := range e.Aliases {
+ e.Aliases[i] = NormalizedName(a)
+ }
+ cs := &localCharset{
+ Charset: Charset{
+ Name: name,
+ Aliases: e.Aliases,
+ Desc: e.Desc,
+ NoFrom: class.from == nil,
+ NoTo: class.to == nil,
+ },
+ arg: e.Arg,
+ class: class,
+ }
+ localCharsets[cs.Name] = cs
+ for _, a := range cs.Aliases {
+ localCharsets[a] = cs
+ }
+ }
+}
+
+// A general cache store that local character set translators
+// can use for persistent storage of data.
+var (
+ cacheMutex sync.Mutex
+ cacheStore = make(map[interface{}]interface{})
+)
+
+func cache(key interface{}, f func() (interface{}, error)) (interface{}, error) {
+ cacheMutex.Lock()
+ defer cacheMutex.Unlock()
+ if x := cacheStore[key]; x != nil {
+ return x, nil
+ }
+ x, err := f()
+ if err != nil {
+ return nil, err
+ }
+ cacheStore[key] = x
+ return x, err
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf16.go b/vendor/github.com/paulrosania/go-charset/charset/utf16.go
new file mode 100644
index 00000000..ebde794c
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/utf16.go
@@ -0,0 +1,110 @@
+package charset
+
+import (
+ "encoding/binary"
+ "errors"
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("utf16", fromUTF16, toUTF16)
+}
+
+type translateFromUTF16 struct {
+ first bool
+ endian binary.ByteOrder
+ scratch []byte
+}
+
+func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
+ data = data[0 : len(data)&^1] // round to even number of bytes.
+ if len(data) < 2 {
+ return 0, nil, nil
+ }
+ n := 0
+ if p.first && p.endian == nil {
+ switch binary.BigEndian.Uint16(data) {
+ case 0xfeff:
+ p.endian = binary.BigEndian
+ data = data[2:]
+ n += 2
+ case 0xfffe:
+ p.endian = binary.LittleEndian
+ data = data[2:]
+ n += 2
+ default:
+ p.endian = guessEndian(data)
+ }
+ p.first = false
+ }
+
+ p.scratch = p.scratch[:0]
+ for ; len(data) > 0; data = data[2:] {
+ p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
+ n += 2
+ }
+ return n, p.scratch, nil
+}
+
+func guessEndian(data []byte) binary.ByteOrder {
+ // XXX TODO
+ return binary.LittleEndian
+}
+
+type translateToUTF16 struct {
+ first bool
+ endian binary.ByteOrder
+ scratch []byte
+}
+
+func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
+ if p.first {
+ p.scratch = p.scratch[0:2]
+ p.endian.PutUint16(p.scratch, 0xfeff)
+ p.first = false
+ }
+ n := 0
+ for len(data) > 0 {
+ if !utf8.FullRune(data) && !eof {
+ break
+ }
+ r, size := utf8.DecodeRune(data)
+ // TODO if r > 65535?
+
+ slen := len(p.scratch)
+ p.scratch = p.scratch[0 : slen+2]
+ p.endian.PutUint16(p.scratch[slen:], uint16(r))
+ data = data[size:]
+ n += size
+ }
+ return n, p.scratch, nil
+}
+
+func getEndian(arg string) (binary.ByteOrder, error) {
+ switch arg {
+ case "le":
+ return binary.LittleEndian, nil
+ case "be":
+ return binary.BigEndian, nil
+ case "":
+ return nil, nil
+ }
+ return nil, errors.New("charset: unknown utf16 endianness")
+}
+
+func fromUTF16(arg string) (Translator, error) {
+ endian, err := getEndian(arg)
+ if err != nil {
+ return nil, err
+ }
+ return &translateFromUTF16{first: true, endian: endian}, nil
+}
+
+func toUTF16(arg string) (Translator, error) {
+ endian, err := getEndian(arg)
+ if err != nil {
+ return nil, err
+ }
+ return &translateToUTF16{first: false, endian: endian}, nil
+}
diff --git a/vendor/github.com/paulrosania/go-charset/charset/utf8.go b/vendor/github.com/paulrosania/go-charset/charset/utf8.go
new file mode 100644
index 00000000..23980b33
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/utf8.go
@@ -0,0 +1,51 @@
+package charset
+
+import (
+ "unicode/utf8"
+)
+
+func init() {
+ registerClass("utf8", toUTF8, toUTF8)
+}
+
+type translateToUTF8 struct {
+ scratch []byte
+}
+
+var errorBytes = []byte(string(utf8.RuneError))
+
+const errorRuneLen = len(string(utf8.RuneError))
+
+func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
+ p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
+ buf := p.scratch[:0]
+ for i := 0; i < len(data); {
+ // fast path for ASCII
+ if b := data[i]; b < utf8.RuneSelf {
+ buf = append(buf, b)
+ i++
+ continue
+ }
+ _, size := utf8.DecodeRune(data[i:])
+ if size == 1 {
+ if !eof && !utf8.FullRune(data) {
+ // When DecodeRune has converted only a single
+ // byte, we know there must be some kind of error
+ // because we know the byte's not ASCII.
+ // If we aren't at EOF, and it's an incomplete
+ // rune encoding, then we return to process
+ // the final bytes in a subsequent call.
+ return i, buf, nil
+ }
+ buf = append(buf, errorBytes...)
+ } else {
+ buf = append(buf, data[i:i+size]...)
+ }
+ i += size
+ }
+ return len(data), buf, nil
+}
+
+func toUTF8(arg string) (Translator, error) {
+ return new(translateToUTF8), nil
+}