diff options
author | Duco van Amstel <duco.vanamstel@gmail.com> | 2018-11-14 21:43:52 +0000 |
---|---|---|
committer | Wim <wim@42.be> | 2018-11-14 22:43:52 +0100 |
commit | 85564a35fd507fca986065c38262a71764ed41a7 (patch) | |
tree | de3f51e4ec07b3be3b03197acfbbf31f3d69cb74 /bridge/helper | |
parent | 09713d40ba8b78671964efe83cdd1da124d44114 (diff) | |
download | matterbridge-msglm-85564a35fd507fca986065c38262a71764ed41a7.tar.gz matterbridge-msglm-85564a35fd507fca986065c38262a71764ed41a7.tar.bz2 matterbridge-msglm-85564a35fd507fca986065c38262a71764ed41a7.zip |
Fix IRC line splitting. Closes #584 (#587)
Diffstat (limited to 'bridge/helper')
-rw-r--r-- | bridge/helper/helper.go | 42 | ||||
-rw-r--r-- | bridge/helper/helper_test.go | 105 |
2 files changed, 139 insertions, 8 deletions
diff --git a/bridge/helper/helper.go b/bridge/helper/helper.go index dab2bb47..bd5e140e 100644 --- a/bridge/helper/helper.go +++ b/bridge/helper/helper.go @@ -40,16 +40,42 @@ func DownloadFileAuth(url string, auth string) (*[]byte, error) { return &data, nil } -func SplitStringLength(input string, length int) string { - a := []rune(input) - str := "" - for i, r := range a { - str += string(r) - if i > 0 && (i+1)%length == 0 { - str += "\n" +// GetSubLines splits messages in newline-delimited lines. If maxLineLength is +// specified as non-zero GetSubLines will and also clip long lines to the +// maximum length and insert a warning marker that the line was clipped. +// +// TODO: The current implementation has the inconvenient that it disregards +// word boundaries when splitting but this is hard to solve without potentially +// breaking formatting and other stylistic effects. +func GetSubLines(message string, maxLineLength int) []string { + const clippingMessage = " <clipped message>" + + var lines []string + for _, line := range strings.Split(strings.TrimSpace(message), "\n") { + if maxLineLength == 0 || len([]byte(line)) <= maxLineLength { + lines = append(lines, line) + continue } + + // !!! WARNING !!! + // Before touching the splitting logic below please ensure that you PROPERLY + // understand how strings, runes and range loops over strings work in Go. + // A good place to start is to read https://blog.golang.org/strings. :-) + var splitStart int + var startOfPreviousRune int + for i := range line { + if i-splitStart > maxLineLength-len([]byte(clippingMessage)) { + lines = append(lines, line[splitStart:startOfPreviousRune]+clippingMessage) + splitStart = startOfPreviousRune + } + startOfPreviousRune = i + } + // This last append is safe to do without looking at the remaining byte-length + // as we assume that the byte-length of the last rune will never exceed that of + // the byte-length of the clipping message. + lines = append(lines, line[splitStart:]) } - return str + return lines } // handle all the stuff we put into extra diff --git a/bridge/helper/helper_test.go b/bridge/helper/helper_test.go new file mode 100644 index 00000000..1770acd9 --- /dev/null +++ b/bridge/helper/helper_test.go @@ -0,0 +1,105 @@ +package helper + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +const testLineLength = 64 + +var ( + lineSplittingTestCases = map[string]struct { + input string + splitOutput []string + nonSplitOutput []string + }{ + "Short single-line message": { + input: "short", + splitOutput: []string{"short"}, + nonSplitOutput: []string{"short"}, + }, + "Long single-line message": { + input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + splitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipis <clipped message>", + "cing elit, sed do eiusmod tempor incididunt ut <clipped message>", + " labore et dolore magna aliqua.", + }, + nonSplitOutput: []string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."}, + }, + "Short multi-line message": { + input: "I\ncan't\nget\nno\nsatisfaction!", + splitOutput: []string{ + "I", + "can't", + "get", + "no", + "satisfaction!", + }, + nonSplitOutput: []string{ + "I", + "can't", + "get", + "no", + "satisfaction!", + }, + }, + "Long multi-line message": { + input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n" + + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.\n" + + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.\n" + + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + splitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipis <clipped message>", + "cing elit, sed do eiusmod tempor incididunt ut <clipped message>", + " labore et dolore magna aliqua.", + "Ut enim ad minim veniam, quis nostrud exercita <clipped message>", + "tion ullamco laboris nisi ut aliquip ex ea com <clipped message>", + "modo consequat.", + "Duis aute irure dolor in reprehenderit in volu <clipped message>", + "ptate velit esse cillum dolore eu fugiat nulla <clipped message>", + " pariatur.", + "Excepteur sint occaecat cupidatat non proident <clipped message>", + ", sunt in culpa qui officia deserunt mollit an <clipped message>", + "im id est laborum.", + }, + nonSplitOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.", + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.", + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + }, + "Message ending with new-line.": { + input: "Newline ending\n", + splitOutput: []string{"Newline ending"}, + nonSplitOutput: []string{"Newline ending"}, + }, + "Long message containing UTF-8 multi-byte runes": { + input: "不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說", + splitOutput: []string{ + "不布人個我此而及單石業喜資富下 <clipped message>", + "我河下日沒一我臺空達的常景便物 <clipped message>", + "沒為……子大我別名解成?生賣的 <clipped message>", + "全直黑,我自我結毛分洲了世當, <clipped message>", + "是政福那是東;斯說", + }, + nonSplitOutput: []string{"不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說"}, + }, + } +) + +func TestGetSubLines(t *testing.T) { + for testname, testcase := range lineSplittingTestCases { + splitLines := GetSubLines(testcase.input, testLineLength) + assert.Equalf(t, testcase.splitOutput, splitLines, "'%s' testcase should give expected lines with splitting.", testname) + for _, splitLine := range splitLines { + byteLength := len([]byte(splitLine)) + assert.True(t, byteLength <= testLineLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testcase, byteLength, testLineLength) + } + + nonSplitLines := GetSubLines(testcase.input, 0) + assert.Equalf(t, testcase.nonSplitOutput, nonSplitLines, "'%s' testcase should give expected lines without splitting.", testname) + } +} |