diff options
author | Wim <wim@42.be> | 2017-07-07 23:34:05 +0200 |
---|---|---|
committer | Wim <wim@42.be> | 2017-07-07 23:34:05 +0200 |
commit | a0938d93869904ebf6d9938485c248b976150fac (patch) | |
tree | a12fad5acdceeec93a28efb600ca62b9fdfb40a5 /vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go | |
parent | 2338c69d402ad3779f4e2a2f38ac800ceca656b9 (diff) | |
download | matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.gz matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.tar.bz2 matterbridge-msglm-a0938d93869904ebf6d9938485c248b976150fac.zip |
Add go-charset and chardet to vendor
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go')
-rw-r--r-- | vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go b/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go new file mode 100644 index 00000000..f7187f5f --- /dev/null +++ b/vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go @@ -0,0 +1,184 @@ +// The iconv package provides an interface to the GNU iconv character set +// conversion library (see http://www.gnu.org/software/libiconv/). +// It automatically registers all the character sets with the charset package, +// so it is usually used simply for the side effects of importing it. +// Example: +// import ( +// "go-charset.googlecode.com/hg/charset" +// _ "go-charset.googlecode.com/hg/charset/iconv" +// ) +package iconv + +//#cgo darwin LDFLAGS: -liconv +//#include <stdlib.h> +//#include <iconv.h> +//#include <errno.h> +//iconv_t iconv_open_error = (iconv_t)-1; +//size_t iconv_error = (size_t)-1; +import "C" +import ( + "errors" + "fmt" + "github.com/paulrosania/go-charset/charset" + "runtime" + "strings" + "syscall" + "unicode/utf8" + "unsafe" +) + +type iconvTranslator struct { + cd C.iconv_t + invalid rune + scratch []byte +} + +func canonicalChar(c rune) rune { + if c >= 'a' && c <= 'z' { + return c - 'a' + 'A' + } + return c +} + +func canonicalName(s string) string { + return strings.Map(canonicalChar, s) +} + +func init() { + charset.Register(iconvFactory{}) +} + +type iconvFactory struct { +} + +func (iconvFactory) TranslatorFrom(name string) (charset.Translator, error) { + return Translator("UTF-8", name, utf8.RuneError) +} + +func (iconvFactory) TranslatorTo(name string) (charset.Translator, error) { + // BUG This is wrong. The target character set may not be ASCII + // compatible. There's no easy solution to this other than + // removing the offending code point. + return Translator(name, "UTF-8", '?') +} + +// Translator returns a Translator that translates between +// the named character sets. When an invalid multibyte +// character is found, the bytes in invalid are substituted instead. +func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) { + cto, cfrom := C.CString(toCharset), C.CString(fromCharset) + cd, err := C.iconv_open(cto, cfrom) + + C.free(unsafe.Pointer(cfrom)) + C.free(unsafe.Pointer(cto)) + + if cd == C.iconv_open_error { + if err == syscall.EINVAL { + return nil, errors.New("iconv: conversion not supported") + } + return nil, err + } + t := &iconvTranslator{cd: cd, invalid: invalid} + runtime.SetFinalizer(t, func(*iconvTranslator) { + C.iconv_close(cd) + }) + return t, nil +} + +func (iconvFactory) Names() []string { + all := aliases() + names := make([]string, 0, len(all)) + for name, aliases := range all { + if aliases[0] == name { + names = append(names, name) + } + } + return names +} + +func (iconvFactory) Info(name string) *charset.Charset { + name = strings.ToLower(name) + all := aliases() + a, ok := all[name] + if !ok { + return nil + } + return &charset.Charset{ + Name: name, + Aliases: a, + } +} + +func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) { + n := 0 + p.scratch = p.scratch[:0] + for len(data) > 0 { + p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax) + cData := (*C.char)(unsafe.Pointer(&data[:1][0])) + nData := C.size_t(len(data)) + + ns := len(p.scratch) + cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0])) + nScratch := C.size_t(cap(p.scratch) - ns) + r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch) + + p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)] + n += len(data) - int(nData) + data = data[len(data)-int(nData):] + + if r != C.iconv_error || err == nil { + return n, p.scratch, nil + } + switch err := err.(syscall.Errno); err { + case C.EILSEQ: + // invalid multibyte sequence - skip one byte and continue + p.scratch = appendRune(p.scratch, p.invalid) + n++ + data = data[1:] + case C.EINVAL: + // incomplete multibyte sequence + return n, p.scratch, nil + case C.E2BIG: + // output buffer not large enough; try again with larger buffer. + p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax) + default: + panic(fmt.Sprintf("unexpected error code: %v", err)) + } + } + return n, p.scratch, nil +} + +// ensureCap returns s with a capacity of at least n bytes. +// If cap(s) < n, then it returns a new copy of s with the +// required capacity. +func ensureCap(s []byte, n int) []byte { + if n <= cap(s) { + return s + } + // logic adapted from appendslice1 in runtime + m := cap(s) + if m == 0 { + m = n + } else { + for { + if m < 1024 { + m += m + } else { + m += m / 4 + } + if m >= n { + break + } + } + } + t := make([]byte, len(s), m) + copy(t, s) + return t +} + +func appendRune(buf []byte, r rune) []byte { + n := len(buf) + buf = ensureCap(buf, n+utf8.UTFMax) + nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r) + return buf[0 : n+nu] +} |