diff options
Diffstat (limited to 'vendor/github.com/mattermost/mattermost-server/utils/markdown/autolink.go')
-rw-r--r-- | vendor/github.com/mattermost/mattermost-server/utils/markdown/autolink.go | 253 |
1 files changed, 253 insertions, 0 deletions
diff --git a/vendor/github.com/mattermost/mattermost-server/utils/markdown/autolink.go b/vendor/github.com/mattermost/mattermost-server/utils/markdown/autolink.go new file mode 100644 index 00000000..7f7d1117 --- /dev/null +++ b/vendor/github.com/mattermost/mattermost-server/utils/markdown/autolink.go @@ -0,0 +1,253 @@ +// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package markdown + +import ( + "regexp" + "strings" + "unicode" + "unicode/utf8" +) + +// Based off of extensions/autolink.c from https://github.com/github/cmark + +var ( + DefaultUrlSchemes = []string{"http", "https", "ftp", "mailto", "tel"} +) + +// Given a string with a w at the given position, tries to parse and return a range containing a www link. +// if one exists. If the text at the given position isn't a link, returns an empty string. Equivalent to +// www_match from the reference code. +func parseWWWAutolink(data string, position int) (Range, bool) { + // Check that this isn't part of another word + if position > 1 { + prevChar := data[position-1] + + if !isWhitespaceByte(prevChar) && !isAllowedBeforeWWWLink(prevChar) { + return Range{}, false + } + } + + // Check that this starts with www + if len(data)-position < 4 || !regexp.MustCompile(`^www\d{0,3}\.`).MatchString(data[position:]) { + return Range{}, false + } + + end := checkDomain(data[position:], false) + if end == 0 { + return Range{}, false + } + + end += position + + // Grab all text until the end of the string or the next whitespace character + for end < len(data) && !isWhitespaceByte(data[end]) { + end += 1 + } + + // Trim trailing punctuation + end = trimTrailingCharactersFromLink(data, position, end) + if position == end { + return Range{}, false + } + + return Range{position, end}, true +} + +func isAllowedBeforeWWWLink(c byte) bool { + switch c { + case '*', '_', '~', ')': + return true + default: + return false + } +} + +// Given a string with a : at the given position, tried to parse and return a range containing a URL scheme +// if one exists. If the text around the given position isn't a link, returns an empty string. Equivalent to +// url_match from the reference code. +func parseURLAutolink(data string, position int) (Range, bool) { + // Check that a :// exists. This doesn't match the clients that treat the slashes as optional. + if len(data)-position < 4 || data[position+1] != '/' || data[position+2] != '/' { + return Range{}, false + } + + start := position - 1 + for start > 0 && isAlphanumericByte(data[start-1]) { + start -= 1 + } + + // Ensure that the URL scheme is allowed and that at least one character after the scheme is valid. + scheme := data[start:position] + if !isSchemeAllowed(scheme) || !isValidHostCharacter(data[position+3:]) { + return Range{}, false + } + + end := checkDomain(data[position+3:], true) + if end == 0 { + return Range{}, false + } + + end += position + + // Grab all text until the end of the string or the next whitespace character + for end < len(data) && !isWhitespaceByte(data[end]) { + end += 1 + } + + // Trim trailing punctuation + end = trimTrailingCharactersFromLink(data, start, end) + if start == end { + return Range{}, false + } + + return Range{start, end}, true +} + +func isSchemeAllowed(scheme string) bool { + // Note that this doesn't support the custom URL schemes implemented by the client + for _, allowed := range DefaultUrlSchemes { + if strings.EqualFold(allowed, scheme) { + return true + } + } + + return false +} + +// Given a string starting with a URL, returns the number of valid characters that make up the URL's domain. +// Returns 0 if the string doesn't start with a domain name. allowShort determines whether or not the domain +// needs to contain a period to be considered valid. Equivalent to check_domain from the reference code. +func checkDomain(data string, allowShort bool) int { + foundUnderscore := false + foundPeriod := false + + i := 1 + for ; i < len(data)-1; i++ { + if data[i] == '_' { + foundUnderscore = true + break + } else if data[i] == '.' { + foundPeriod = true + } else if !isValidHostCharacter(data[i:]) && data[i] != '-' { + break + } + } + + if foundUnderscore { + return 0 + } + + if allowShort { + // If allowShort is set, accept any string of valid domain characters + return i + } + + // If allowShort isn't set, a valid domain just requires at least a single period. Note that this + // logic isn't entirely necessary because we already know the string starts with "www." when + // this is called from parseWWWAutolink + if foundPeriod { + return i + } else { + return 0 + } +} + +// Returns true if the provided link starts with a valid character for a domain name. Equivalent to +// is_valid_hostchar from the reference code. +func isValidHostCharacter(link string) bool { + c, _ := utf8.DecodeRuneInString(link) + if c == utf8.RuneError { + return false + } + + return !unicode.IsSpace(c) && !unicode.IsPunct(c) +} + +// Removes any trailing characters such as punctuation or stray brackets that shouldn't be part of the link. +// Returns a new end position for the link. Equivalent to autolink_delim from the reference code. +func trimTrailingCharactersFromLink(markdown string, start int, end int) int { + runes := []rune(markdown[start:end]) + linkEnd := len(runes) + + // Cut off the link before an open angle bracket if it contains one + for i, c := range runes { + if c == '<' { + linkEnd = i + break + } + } + + for linkEnd > 0 { + c := runes[linkEnd-1] + + if !canEndAutolink(c) { + // Trim trailing quotes, periods, etc + linkEnd = linkEnd - 1 + } else if c == ';' { + // Trim a trailing HTML entity + newEnd := linkEnd - 2 + + for newEnd > 0 && ((runes[newEnd] >= 'a' && runes[newEnd] <= 'z') || (runes[newEnd] >= 'A' && runes[newEnd] <= 'Z')) { + newEnd -= 1 + } + + if newEnd < linkEnd-2 && runes[newEnd] == '&' { + linkEnd = newEnd + } else { + // This isn't actually an HTML entity, so just trim the semicolon + linkEnd = linkEnd - 1 + } + } else if c == ')' { + // Only allow an autolink ending with a bracket if that bracket is part of a matching pair of brackets. + // If there are more closing brackets than opening ones, remove the extra bracket + + numClosing := 0 + numOpening := 0 + + // Examples (input text => output linked portion): + // + // http://www.pokemon.com/Pikachu_(Electric) + // => http://www.pokemon.com/Pikachu_(Electric) + // + // http://www.pokemon.com/Pikachu_((Electric) + // => http://www.pokemon.com/Pikachu_((Electric) + // + // http://www.pokemon.com/Pikachu_(Electric)) + // => http://www.pokemon.com/Pikachu_(Electric) + // + // http://www.pokemon.com/Pikachu_((Electric)) + // => http://www.pokemon.com/Pikachu_((Electric)) + + for i := 0; i < linkEnd; i++ { + if runes[i] == '(' { + numOpening += 1 + } else if runes[i] == ')' { + numClosing += 1 + } + } + + if numClosing <= numOpening { + // There's fewer or equal closing brackets, so we've found the end of the link + break + } + + linkEnd -= 1 + } else { + // There's no special characters at the end of the link, so we're at the end + break + } + } + + return start + len(string(runes[:linkEnd])) +} + +func canEndAutolink(c rune) bool { + switch c { + case '?', '!', '.', ',', ':', '*', '_', '~', '\'', '"': + return false + default: + return true + } +} |