summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset/charset.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/paulrosania/go-charset/charset/charset.go')
-rw-r--r--vendor/github.com/paulrosania/go-charset/charset/charset.go301
1 files changed, 301 insertions, 0 deletions
diff --git a/vendor/github.com/paulrosania/go-charset/charset/charset.go b/vendor/github.com/paulrosania/go-charset/charset/charset.go
new file mode 100644
index 00000000..6ab6cf89
--- /dev/null
+++ b/vendor/github.com/paulrosania/go-charset/charset/charset.go
@@ -0,0 +1,301 @@
+// The charset package implements translation between character sets.
+// It uses Unicode as the intermediate representation.
+// Because it can be large, the character set data is separated
+// from the charset package. It can be embedded in the Go
+// executable by importing the data package:
+//
+// import _ "github.com/paulrosania/go-charset/data"
+//
+// It can also made available in a data directory (by settting CharsetDir).
+package charset
+
+import (
+ "io"
+ "strings"
+ "unicode/utf8"
+)
+
+// Charset holds information about a given character set.
+type Charset struct {
+ Name string // Canonical name of character set.
+ Aliases []string // Known aliases.
+ Desc string // Description.
+ NoFrom bool // Not possible to translate from this charset.
+ NoTo bool // Not possible to translate to this charset.
+}
+
+// Translator represents a character set converter.
+// The Translate method translates the given data,
+// and returns the number of bytes of data consumed,
+// a slice containing the converted data (which may be
+// overwritten on the next call to Translate), and any
+// conversion error. If eof is true, the data represents
+// the final bytes of the input.
+type Translator interface {
+ Translate(data []byte, eof bool) (n int, cdata []byte, err error)
+}
+
+// A Factory can be used to make character set translators.
+type Factory interface {
+ // TranslatorFrom creates a translator that will translate from the named character
+ // set to UTF-8.
+ TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.
+
+ // TranslatorTo creates a translator that will translate from UTF-8 to the named character set.
+ TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.
+
+ // Names returns all the character set names accessibile through the factory.
+ Names() []string
+
+ // Info returns information on the named character set. It returns nil if the
+ // factory doesn't recognise the given name.
+ Info(name string) *Charset
+}
+
+var factories = []Factory{localFactory{}}
+
+// Register registers a new Factory which will be consulted when NewReader
+// or NewWriter needs a character set translator for a given name.
+func Register(factory Factory) {
+ factories = append(factories, factory)
+}
+
+// NewReader returns a new Reader that translates from the named
+// character set to UTF-8 as it reads r.
+func NewReader(charset string, r io.Reader) (io.Reader, error) {
+ tr, err := TranslatorFrom(charset)
+ if err != nil {
+ return nil, err
+ }
+ return NewTranslatingReader(r, tr), nil
+}
+
+// NewWriter returns a new WriteCloser writing to w. It converts writes
+// of UTF-8 text into writes on w of text in the named character set.
+// The Close is necessary to flush any remaining partially translated
+// characters to the output.
+func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {
+ tr, err := TranslatorTo(charset)
+ if err != nil {
+ return nil, err
+ }
+ return NewTranslatingWriter(w, tr), nil
+}
+
+// Info returns information about a character set, or nil
+// if the character set is not found.
+func Info(name string) *Charset {
+ for _, f := range factories {
+ if info := f.Info(name); info != nil {
+ return info
+ }
+ }
+ return nil
+}
+
+// Names returns the canonical names of all supported character sets, in alphabetical order.
+func Names() []string {
+ // TODO eliminate duplicates
+ var names []string
+ for _, f := range factories {
+ names = append(names, f.Names()...)
+ }
+ return names
+}
+
+// TranslatorFrom returns a translator that will translate from
+// the named character set to UTF-8.
+func TranslatorFrom(charset string) (Translator, error) {
+ var err error
+ var tr Translator
+ for _, f := range factories {
+ tr, err = f.TranslatorFrom(charset)
+ if err == nil {
+ break
+ }
+ }
+ if tr == nil {
+ return nil, err
+ }
+ return tr, nil
+}
+
+// TranslatorTo returns a translator that will translate from UTF-8
+// to the named character set.
+func TranslatorTo(charset string) (Translator, error) {
+ var err error
+ var tr Translator
+ for _, f := range factories {
+ tr, err = f.TranslatorTo(charset)
+ if err == nil {
+ break
+ }
+ }
+ if tr == nil {
+ return nil, err
+ }
+ return tr, nil
+}
+
+func normalizedChar(c rune) rune {
+ switch {
+ case c >= 'A' && c <= 'Z':
+ c = c - 'A' + 'a'
+ case c == '_':
+ c = '-'
+ }
+ return c
+}
+
+// NormalisedName returns s with all Roman capitals
+// mapped to lower case, and '_' mapped to '-'
+func NormalizedName(s string) string {
+ return strings.Map(normalizedChar, s)
+}
+
+type translatingWriter struct {
+ w io.Writer
+ tr Translator
+ buf []byte // unconsumed data from writer.
+}
+
+// NewTranslatingWriter returns a new WriteCloser writing to w.
+// It passes the written bytes through the given Translator.
+func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {
+ return &translatingWriter{w: w, tr: tr}
+}
+
+func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {
+ wdata := data
+ if len(w.buf) > 0 {
+ w.buf = append(w.buf, data...)
+ wdata = w.buf
+ }
+ n, cdata, err := w.tr.Translate(wdata, false)
+ if err != nil {
+ // TODO
+ }
+ if n > 0 {
+ _, err = w.w.Write(cdata)
+ if err != nil {
+ return 0, err
+ }
+ }
+ w.buf = w.buf[:0]
+ if n < len(wdata) {
+ w.buf = append(w.buf, wdata[n:]...)
+ }
+ return len(data), nil
+}
+
+func (p *translatingWriter) Close() error {
+ for {
+ n, data, err := p.tr.Translate(p.buf, true)
+ p.buf = p.buf[n:]
+ if err != nil {
+ // TODO
+ }
+ // If the Translator produces no data
+ // at EOF, then assume that it never will.
+ if len(data) == 0 {
+ break
+ }
+ n, err = p.w.Write(data)
+ if err != nil {
+ return err
+ }
+ if n < len(data) {
+ return io.ErrShortWrite
+ }
+ if len(p.buf) == 0 {
+ break
+ }
+ }
+ return nil
+}
+
+type translatingReader struct {
+ r io.Reader
+ tr Translator
+ cdata []byte // unconsumed data from converter.
+ rdata []byte // unconverted data from reader.
+ err error // final error from reader.
+}
+
+// NewTranslatingReader returns a new Reader that
+// translates data using the given Translator as it reads r.
+func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {
+ return &translatingReader{r: r, tr: tr}
+}
+
+func (r *translatingReader) Read(buf []byte) (int, error) {
+ for {
+ if len(r.cdata) > 0 {
+ n := copy(buf, r.cdata)
+ r.cdata = r.cdata[n:]
+ return n, nil
+ }
+ if r.err == nil {
+ r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))
+ n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])
+ // Guard against non-compliant Readers.
+ if n == 0 && err == nil {
+ err = io.EOF
+ }
+ r.rdata = r.rdata[0 : len(r.rdata)+n]
+ r.err = err
+ } else if len(r.rdata) == 0 {
+ break
+ }
+ nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)
+ if cvterr != nil {
+ // TODO
+ }
+ r.cdata = cdata
+
+ // Ensure that we consume all bytes at eof
+ // if the converter refuses them.
+ if nc == 0 && r.err != nil {
+ nc = len(r.rdata)
+ }
+
+ // Copy unconsumed data to the start of the rdata buffer.
+ r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]
+ }
+ return 0, r.err
+}
+
+// ensureCap returns s with a capacity of at least n bytes.
+// If cap(s) < n, then it returns a new copy of s with the
+// required capacity.
+func ensureCap(s []byte, n int) []byte {
+ if n <= cap(s) {
+ return s
+ }
+ // logic adapted from appendslice1 in runtime
+ m := cap(s)
+ if m == 0 {
+ m = n
+ } else {
+ for {
+ if m < 1024 {
+ m += m
+ } else {
+ m += m / 4
+ }
+ if m >= n {
+ break
+ }
+ }
+ }
+ t := make([]byte, len(s), m)
+ copy(t, s)
+ return t
+}
+
+func appendRune(buf []byte, r rune) []byte {
+ n := len(buf)
+ buf = ensureCap(buf, n+utf8.UTFMax)
+ nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
+ return buf[0 : n+nu]
+}