summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWim <wim@42.be>2017-08-29 21:30:59 +0200
committerWim <wim@42.be>2017-08-29 21:31:03 +0200
commite7fcb25107ff6f7676624fb3011e2bc1bede4cd0 (patch)
treeeba216fa1aa9078953e40377d92c15724e6b013b
parent5a85258f7423bc21c707a42c537a565a27f2fbc8 (diff)
downloadmatterbridge-msglm-e7fcb25107ff6f7676624fb3011e2bc1bede4cd0.tar.gz
matterbridge-msglm-e7fcb25107ff6f7676624fb3011e2bc1bede4cd0.tar.bz2
matterbridge-msglm-e7fcb25107ff6f7676624fb3011e2bc1bede4cd0.zip
Add a charset option (irc). Closes #247
-rw-r--r--bridge/config/config.go1
-rw-r--r--bridge/irc/irc.go29
-rw-r--r--matterbridge.toml.sample17
3 files changed, 35 insertions, 12 deletions
diff --git a/bridge/config/config.go b/bridge/config/config.go
index 7e6786a8..c5925a1d 100644
--- a/bridge/config/config.go
+++ b/bridge/config/config.go
@@ -43,6 +43,7 @@ type Protocol struct {
AuthCode string // steam
BindAddress string // mattermost, slack // DEPRECATED
Buffer int // api
+ Charset string // irc
EditSuffix string // mattermost, slack, discord, telegram, gitter
EditDisable bool // mattermost, slack, discord, telegram, gitter
IconURL string // mattermost, slack
diff --git a/bridge/irc/irc.go b/bridge/irc/irc.go
index 36b01b6c..5176389b 100644
--- a/bridge/irc/irc.go
+++ b/bridge/irc/irc.go
@@ -265,20 +265,25 @@ func (b *Birc) handlePrivMsg(event *irc.Event) {
re := regexp.MustCompile(`[[:cntrl:]](\d+,|)\d+`)
msg = re.ReplaceAllString(msg, "")
- // detect what were sending so that we convert it to utf-8
- detector := chardet.NewTextDetector()
- result, err := detector.DetectBest([]byte(msg))
- if err != nil {
- flog.Infof("detection failed for msg: %#v", msg)
- return
- }
- flog.Debugf("detected %s confidence %#v", result.Charset, result.Confidence)
var r io.Reader
- r, err = charset.NewReader(result.Charset, strings.NewReader(msg))
- // if we're not sure, just pick ISO-8859-1
- if result.Confidence < 80 {
- r, err = charset.NewReader("ISO-8859-1", strings.NewReader(msg))
+ var err error
+ mycharset := b.Config.Charset
+ if mycharset == "" {
+ // detect what were sending so that we convert it to utf-8
+ detector := chardet.NewTextDetector()
+ result, err := detector.DetectBest([]byte(msg))
+ if err != nil {
+ flog.Infof("detection failed for msg: %#v", msg)
+ return
+ }
+ flog.Debugf("detected %s confidence %#v", result.Charset, result.Confidence)
+ r, err = charset.NewReader(result.Charset, strings.NewReader(msg))
+ // if we're not sure, just pick ISO-8859-1
+ if result.Confidence < 80 {
+ mycharset = "ISO-8859-1"
+ }
}
+ r, err = charset.NewReader(mycharset, strings.NewReader(msg))
if err != nil {
flog.Errorf("charset to utf-8 conversion failed: %s", err)
return
diff --git a/matterbridge.toml.sample b/matterbridge.toml.sample
index b84cda68..f38d3b0c 100644
--- a/matterbridge.toml.sample
+++ b/matterbridge.toml.sample
@@ -32,6 +32,23 @@ UseSASL=false
#OPTIONAL (default false)
SkipTLSVerify=true
+#If you know your charset, you can specify it manually.
+#Otherwise it tries to detect this automatically. Select one below
+# "iso-8859-2:1987", "iso-8859-9:1989", "866", "latin9", "iso-8859-10:1992", "iso-ir-109", "hebrew",
+# "cp932", "iso-8859-15", "cp437", "utf-16be", "iso-8859-3:1988", "windows-1251", "utf16", "latin6",
+# "latin3", "iso-8859-1:1987", "iso-8859-9", "utf-16le", "big5", "cp819", "asmo-708", "utf-8",
+# "ibm437", "iso-ir-157", "iso-ir-144", "latin4", "850", "iso-8859-5", "iso-8859-5:1988", "l3",
+# "windows-31j", "utf8", "iso-8859-3", "437", "greek", "iso-8859-8", "l6", "l9-iso-8859-15",
+# "iso-8859-2", "latin2", "iso-ir-100", "iso-8859-6", "arabic", "iso-ir-148", "us-ascii", "x-sjis",
+# "utf16be", "iso-8859-8:1988", "utf16le", "l4", "utf-16", "iso-ir-138", "iso-8859-7", "iso-8859-7:1987",
+# "windows-1252", "l2", "koi8-r", "iso8859-1", "latin1", "ecma-114", "iso-ir-110", "elot-928",
+# "iso-ir-126", "iso-8859-1", "iso-ir-127", "cp850", "cyrillic", "greek8", "windows-1250", "iso-latin-1",
+# "l5", "ibm866", "cp866", "ms-kanji", "ibm850", "ecma-118", "iso-ir-101", "ibm819", "l1", "iso-8859-6:1987",
+# "latin5", "ascii", "sjis", "iso-8859-10", "iso-8859-4", "iso-8859-4:1988", "shift-jis
+# The select charset will be converted to utf-8 when sent to other bridges.
+#OPTIONAL (default "")
+Charset=""
+
#Your nick on irc.
#REQUIRED
Nick="matterbot"