summaryrefslogblamecommitdiffstats
path: root/vendor/github.com/paulrosania/go-charset/charset/codepage.go
blob: 6864c87532e8e11dae02eb20cfd81c98b04d21ea (plain) (tree)



































































































































                                                                                                        
package charset

import (
	"fmt"
	"unicode/utf8"
)

func init() {
	registerClass("cp", fromCodePage, toCodePage)
}

type translateFromCodePage struct {
	byte2rune *[256]rune
	scratch   []byte
}

type cpKeyFrom string
type cpKeyTo string

func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
	p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
	buf := p.scratch
	for _, x := range data {
		r := p.byte2rune[x]
		if r < utf8.RuneSelf {
			buf = append(buf, byte(r))
			continue
		}
		size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
		buf = buf[0 : len(buf)+size]
	}
	return len(data), buf, nil
}

type toCodePageInfo struct {
	rune2byte map[rune]byte
	// same gives the number of runes at start of code page that map exactly to
	// unicode.
	same rune
}

type translateToCodePage struct {
	toCodePageInfo
	scratch []byte
}

func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
	p.scratch = ensureCap(p.scratch, len(data))
	buf := p.scratch[:0]

	for i := 0; i < len(data); {
		r := rune(data[i])
		size := 1
		if r >= utf8.RuneSelf {
			r, size = utf8.DecodeRune(data[i:])
			if size == 1 && !eof && !utf8.FullRune(data[i:]) {
				return i, buf, nil
			}
		}

		var b byte
		if r < p.same {
			b = byte(r)
		} else {
			var ok bool
			b, ok = p.rune2byte[r]
			if !ok {
				b = '?'
			}
		}
		buf = append(buf, b)
		i += size
	}
	return len(data), buf, nil
}

func fromCodePage(arg string) (Translator, error) {
	runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
		data, err := readFile(arg)
		if err != nil {
			return nil, err
		}
		runes := []rune(string(data))
		if len(runes) != 256 {
			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
		}
		r := new([256]rune)
		copy(r[:], runes)
		return r, nil
	})
	if err != nil {
		return nil, err
	}
	return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
}

func toCodePage(arg string) (Translator, error) {
	m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
		data, err := readFile(arg)
		if err != nil {
			return nil, err
		}

		info := toCodePageInfo{
			rune2byte: make(map[rune]byte),
			same:      256,
		}
		atStart := true
		i := rune(0)
		for _, r := range string(data) {
			if atStart {
				if r == i {
					i++
					continue
				}
				info.same = i
				atStart = false
			}
			info.rune2byte[r] = byte(i)
			i++
		}
		// TODO fix tables
		// fmt.Printf("%s, same = %d\n", arg, info.same)
		if i != 256 {
			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
		}
		return info, nil
	})
	if err != nil {
		return nil, err
	}
	return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
}