From f0f801402d95b1f49018fa9af77c341ff4c4de5f Mon Sep 17 00:00:00 2001 From: Wim Date: Sat, 19 Mar 2022 23:14:56 +0100 Subject: Refactor utf-8 conversion (irc) (#1767) --- bridge/irc/charset.go | 32 ++++++++++++++++++++++++++++++++ bridge/irc/handlers.go | 7 +++---- 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 bridge/irc/charset.go (limited to 'bridge') diff --git a/bridge/irc/charset.go b/bridge/irc/charset.go new file mode 100644 index 00000000..57872ec9 --- /dev/null +++ b/bridge/irc/charset.go @@ -0,0 +1,32 @@ +package birc + +import ( + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/japanese" + "golang.org/x/text/encoding/korean" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" + "golang.org/x/text/encoding/unicode" +) + +var encoders = map[string]encoding.Encoding{ + "utf-8": unicode.UTF8, + "iso-2022-jp": japanese.ISO2022JP, + "big5": traditionalchinese.Big5, + "gbk": simplifiedchinese.GBK, + "euc-kr": korean.EUCKR, + "gb2312": simplifiedchinese.HZGB2312, + "shift-jis": japanese.ShiftJIS, + "euc-jp": japanese.EUCJP, + "gb18030": simplifiedchinese.GB18030, +} + +func toUTF8(from string, input string) string { + enc, ok := encoders[from] + if !ok { + return input + } + + res, _ := enc.NewDecoder().String(input) + return res +} diff --git a/bridge/irc/handlers.go b/bridge/irc/handlers.go index b90fa3af..987df2c5 100644 --- a/bridge/irc/handlers.go +++ b/bridge/irc/handlers.go @@ -11,7 +11,6 @@ import ( "github.com/42wim/matterbridge/bridge/config" "github.com/42wim/matterbridge/bridge/helper" "github.com/lrstanley/girc" - "github.com/missdeer/golib/ic" "github.com/paulrosania/go-charset/charset" "github.com/saintfish/chardet" @@ -24,12 +23,12 @@ func (b *Birc) handleCharset(msg *config.Message) error { if b.GetString("Charset") != "" { switch b.GetString("Charset") { case "gbk", "gb18030", "gb2312", "big5", "euc-kr", "euc-jp", "shift-jis", "iso-2022-jp": - msg.Text = ic.ConvertString("utf-8", b.GetString("Charset"), msg.Text) + msg.Text = toUTF8(b.GetString("Charset"), msg.Text) default: buf := new(bytes.Buffer) w, err := charset.NewWriter(b.GetString("Charset"), buf) if err != nil { - b.Log.Errorf("charset from utf-8 conversion failed: %s", err) + b.Log.Errorf("charset to utf-8 conversion failed: %s", err) return err } fmt.Fprint(w, msg.Text) @@ -227,7 +226,7 @@ func (b *Birc) handlePrivMsg(client *girc.Client, event girc.Event) { } switch mycharset { case "gbk", "gb18030", "gb2312", "big5", "euc-kr", "euc-jp", "shift-jis", "iso-2022-jp": - rmsg.Text = ic.ConvertString("utf-8", b.GetString("Charset"), rmsg.Text) + rmsg.Text = toUTF8(b.GetString("Charset"), rmsg.Text) default: r, err := charset.NewReader(mycharset, strings.NewReader(rmsg.Text)) if err != nil { -- cgit v1.2.3