From 85564a35fd507fca986065c38262a71764ed41a7 Mon Sep 17 00:00:00 2001 From: Duco van Amstel Date: Wed, 14 Nov 2018 21:43:52 +0000 Subject: Fix IRC line splitting. Closes #584 (#587) --- bridge/config/config.go | 3 -- bridge/helper/helper.go | 42 +++++++++++++---- bridge/helper/helper_test.go | 105 +++++++++++++++++++++++++++++++++++++++++++ bridge/irc/helper.go | 61 ------------------------- bridge/irc/irc.go | 38 ++++++++-------- 5 files changed, 158 insertions(+), 91 deletions(-) create mode 100644 bridge/helper/helper_test.go delete mode 100644 bridge/irc/helper.go diff --git a/bridge/config/config.go b/bridge/config/config.go index 258401db..74f7f531 100644 --- a/bridge/config/config.go +++ b/bridge/config/config.go @@ -2,9 +2,7 @@ package config import ( "bytes" - "fmt" "io/ioutil" - "os" "strings" "sync" "time" @@ -316,7 +314,6 @@ type TestConfig struct { func (c *TestConfig) GetBool(key string) (bool, bool) { val, ok := c.Overrides[key] - fmt.Fprintln(os.Stderr, "DEBUG:", c.Overrides, key, ok, val) if ok { return val.(bool), true } diff --git a/bridge/helper/helper.go b/bridge/helper/helper.go index dab2bb47..bd5e140e 100644 --- a/bridge/helper/helper.go +++ b/bridge/helper/helper.go @@ -40,16 +40,42 @@ func DownloadFileAuth(url string, auth string) (*[]byte, error) { return &data, nil } -func SplitStringLength(input string, length int) string { - a := []rune(input) - str := "" - for i, r := range a { - str += string(r) - if i > 0 && (i+1)%length == 0 { - str += "\n" +// GetSubLines splits messages in newline-delimited lines. If maxLineLength is +// specified as non-zero GetSubLines will and also clip long lines to the +// maximum length and insert a warning marker that the line was clipped. +// +// TODO: The current implementation has the inconvenient that it disregards +// word boundaries when splitting but this is hard to solve without potentially +// breaking formatting and other stylistic effects. +func GetSubLines(message string, maxLineLength int) []string { + const clippingMessage = " " + + var lines []string + for _, line := range strings.Split(strings.TrimSpace(message), "\n") { + if maxLineLength == 0 || len([]byte(line)) <= maxLineLength { + lines = append(lines, line) + continue } + + // !!! WARNING !!! + // Before touching the splitting logic below please ensure that you PROPERLY + // understand how strings, runes and range loops over strings work in Go. + // A good place to start is to read https://blog.golang.org/strings. :-) + var splitStart int + var startOfPreviousRune int + for i := range line { + if i-splitStart > maxLineLength-len([]byte(clippingMessage)) { + lines = append(lines, line[splitStart:startOfPreviousRune]+clippingMessage) + splitStart = startOfPreviousRune + } + startOfPreviousRune = i + } + // This last append is safe to do without looking at the remaining byte-length + // as we assume that the byte-length of the last rune will never exceed that of + // the byte-length of the clipping message. + lines = append(lines, line[splitStart:]) } - return str + return lines } // handle all the stuff we put into extra diff --git a/bridge/helper/helper_test.go b/bridge/helper/helper_test.go new file mode 100644 index 00000000..1770acd9 --- /dev/null +++ b/bridge/helper/helper_test.go @@ -0,0 +1,105 @@ +package helper + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +const testLineLength = 64 + +var ( + lineSplittingTestCases = map[string]struct { + input string + splitOutput []string + nonSplitOutput []string + }{ + "Short single-line message": { + input: "short", + splitOutput: []string{"short"}, + nonSplitOutput: []string{"short"}, + }, + "Long single-line message": { + input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + splitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipis ", + "cing elit, sed do eiusmod tempor incididunt ut ", + " labore et dolore magna aliqua.", + }, + nonSplitOutput: []string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."}, + }, + "Short multi-line message": { + input: "I\ncan't\nget\nno\nsatisfaction!", + splitOutput: []string{ + "I", + "can't", + "get", + "no", + "satisfaction!", + }, + nonSplitOutput: []string{ + "I", + "can't", + "get", + "no", + "satisfaction!", + }, + }, + "Long multi-line message": { + input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n" + + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.\n" + + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.\n" + + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + splitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipis ", + "cing elit, sed do eiusmod tempor incididunt ut ", + " labore et dolore magna aliqua.", + "Ut enim ad minim veniam, quis nostrud exercita ", + "tion ullamco laboris nisi ut aliquip ex ea com ", + "modo consequat.", + "Duis aute irure dolor in reprehenderit in volu ", + "ptate velit esse cillum dolore eu fugiat nulla ", + " pariatur.", + "Excepteur sint occaecat cupidatat non proident ", + ", sunt in culpa qui officia deserunt mollit an ", + "im id est laborum.", + }, + nonSplitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.", + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.", + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + }, + "Message ending with new-line.": { + input: "Newline ending\n", + splitOutput: []string{"Newline ending"}, + nonSplitOutput: []string{"Newline ending"}, + }, + "Long message containing UTF-8 multi-byte runes": { + input: "不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說", + splitOutput: []string{ + "不布人個我此而及單石業喜資富下 ", + "我河下日沒一我臺空達的常景便物 ", + "沒為……子大我別名解成?生賣的 ", + "全直黑,我自我結毛分洲了世當, ", + "是政福那是東;斯說", + }, + nonSplitOutput: []string{"不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說"}, + }, + } +) + +func TestGetSubLines(t *testing.T) { + for testname, testcase := range lineSplittingTestCases { + splitLines := GetSubLines(testcase.input, testLineLength) + assert.Equalf(t, testcase.splitOutput, splitLines, "'%s' testcase should give expected lines with splitting.", testname) + for _, splitLine := range splitLines { + byteLength := len([]byte(splitLine)) + assert.True(t, byteLength <= testLineLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testcase, byteLength, testLineLength) + } + + nonSplitLines := GetSubLines(testcase.input, 0) + assert.Equalf(t, testcase.nonSplitOutput, nonSplitLines, "'%s' testcase should give expected lines without splitting.", testname) + } +} diff --git a/bridge/irc/helper.go b/bridge/irc/helper.go deleted file mode 100644 index 31382aa4..00000000 --- a/bridge/irc/helper.go +++ /dev/null @@ -1,61 +0,0 @@ -package birc - -import ( - "strings" -) - -/* -func tableformatter(nicks []string, nicksPerRow int, continued bool) string { - result := "|IRC users" - if continued { - result = "|(continued)" - } - for i := 0; i < 2; i++ { - for j := 1; j <= nicksPerRow && j <= len(nicks); j++ { - if i == 0 { - result += "|" - } else { - result += ":-|" - } - } - result += "\r\n|" - } - result += nicks[0] + "|" - for i := 1; i < len(nicks); i++ { - if i%nicksPerRow == 0 { - result += "\r\n|" + nicks[i] + "|" - } else { - result += nicks[i] + "|" - } - } - return result -} -*/ - -func plainformatter(nicks []string) string { - return strings.Join(nicks, ", ") + " currently on IRC" -} - -func IsMarkup(message string) bool { - switch message[0] { - case '|': - fallthrough - case '#': - fallthrough - case '_': - fallthrough - case '*': - fallthrough - case '~': - fallthrough - case '-': - fallthrough - case ':': - fallthrough - case '>': - fallthrough - case '=': - return true - } - return false -} diff --git a/bridge/irc/irc.go b/bridge/irc/irc.go index 0702575b..e14fa9e0 100644 --- a/bridge/irc/irc.go +++ b/bridge/irc/irc.go @@ -13,7 +13,6 @@ import ( "strconv" "strings" "time" - "unicode/utf8" "github.com/42wim/matterbridge/bridge" "github.com/42wim/matterbridge/bridge/config" @@ -21,8 +20,11 @@ import ( "github.com/dfordsoft/golib/ic" "github.com/lrstanley/girc" "github.com/paulrosania/go-charset/charset" - _ "github.com/paulrosania/go-charset/data" "github.com/saintfish/chardet" + + // We need to import the 'data' package as an implicit dependency. + // See: https://godoc.org/github.com/paulrosania/go-charset/charset + _ "github.com/paulrosania/go-charset/data" ) type Birc struct { @@ -222,25 +224,23 @@ func (b *Birc) Send(msg config.Message) (string, error) { } } - // split long messages on messageLength, to avoid clipped messages #281 + var msgLines []string if b.GetBool("MessageSplit") { - msg.Text = helper.SplitStringLength(msg.Text, b.MessageLength) + msgLines = helper.GetSubLines(msg.Text, b.MessageLength) + } else { + msgLines = helper.GetSubLines(msg.Text, 0) } - for _, text := range strings.Split(msg.Text, "\n") { - if len(text) > b.MessageLength { - text = text[:b.MessageLength-len(" ")] - if r, size := utf8.DecodeLastRuneInString(text); r == utf8.RuneError { - text = text[:len(text)-size] - } - text += " " - } - if len(b.Local) < b.MessageQueue { - if len(b.Local) == b.MessageQueue-1 { - text += " " - } - b.Local <- config.Message{Text: text, Username: msg.Username, Channel: msg.Channel, Event: msg.Event} - } else { + for i := range msgLines { + if len(b.Local) >= b.MessageQueue { b.Log.Debugf("flooding, dropping message (queue at %d)", len(b.Local)) + return "", nil + } + + b.Local <- config.Message{ + Text: msgLines[i], + Username: msg.Username, + Channel: msg.Channel, + Event: msg.Event, } } return "", nil @@ -462,5 +462,5 @@ func (b *Birc) storeNames(client *girc.Client, event girc.Event) { } func (b *Birc) formatnicks(nicks []string) string { - return plainformatter(nicks) + return strings.Join(nicks, ", ") + " currently on IRC" } -- cgit v1.2.3