diff options
Diffstat (limited to 'vendor/github.com/mattermost/mattermost-server/v5/utils/markdown/autolink.go')
-rw-r--r-- | vendor/github.com/mattermost/mattermost-server/v5/utils/markdown/autolink.go | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/vendor/github.com/mattermost/mattermost-server/v5/utils/markdown/autolink.go b/vendor/github.com/mattermost/mattermost-server/v5/utils/markdown/autolink.go deleted file mode 100644 index d06ada66..00000000 --- a/vendor/github.com/mattermost/mattermost-server/v5/utils/markdown/autolink.go +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. -// See LICENSE.txt for license information. - -package markdown - -import ( - "regexp" - "strings" - "unicode" - "unicode/utf8" -) - -// Based off of extensions/autolink.c from https://github.com/github/cmark - -var ( - DefaultUrlSchemes = []string{"http", "https", "ftp", "mailto", "tel"} - wwwAutoLinkRegex = regexp.MustCompile(`^www\d{0,3}\.`) -) - -// Given a string with a w at the given position, tries to parse and return a range containing a www link. -// if one exists. If the text at the given position isn't a link, returns an empty string. Equivalent to -// www_match from the reference code. -func parseWWWAutolink(data string, position int) (Range, bool) { - // Check that this isn't part of another word - if position > 1 { - prevChar := data[position-1] - - if !isWhitespaceByte(prevChar) && !isAllowedBeforeWWWLink(prevChar) { - return Range{}, false - } - } - - // Check that this starts with www - if len(data)-position < 4 || !wwwAutoLinkRegex.MatchString(data[position:]) { - return Range{}, false - } - - end := checkDomain(data[position:], false) - if end == 0 { - return Range{}, false - } - - end += position - - // Grab all text until the end of the string or the next whitespace character - for end < len(data) && !isWhitespaceByte(data[end]) { - end += 1 - } - - // Trim trailing punctuation - end = trimTrailingCharactersFromLink(data, position, end) - if position == end { - return Range{}, false - } - - return Range{position, end}, true -} - -func isAllowedBeforeWWWLink(c byte) bool { - switch c { - case '*', '_', '~', ')': - return true - } - return false -} - -// Given a string with a : at the given position, tried to parse and return a range containing a URL scheme -// if one exists. If the text around the given position isn't a link, returns an empty string. Equivalent to -// url_match from the reference code. -func parseURLAutolink(data string, position int) (Range, bool) { - // Check that a :// exists. This doesn't match the clients that treat the slashes as optional. - if len(data)-position < 4 || data[position+1] != '/' || data[position+2] != '/' { - return Range{}, false - } - - start := position - 1 - for start > 0 && isAlphanumericByte(data[start-1]) { - start -= 1 - } - - if start < 0 || position >= len(data) { - return Range{}, false - } - - // Ensure that the URL scheme is allowed and that at least one character after the scheme is valid. - scheme := data[start:position] - if !isSchemeAllowed(scheme) || !isValidHostCharacter(data[position+3:]) { - return Range{}, false - } - - end := checkDomain(data[position+3:], true) - if end == 0 { - return Range{}, false - } - - end += position - - // Grab all text until the end of the string or the next whitespace character - for end < len(data) && !isWhitespaceByte(data[end]) { - end += 1 - } - - // Trim trailing punctuation - end = trimTrailingCharactersFromLink(data, start, end) - if start == end { - return Range{}, false - } - - return Range{start, end}, true -} - -func isSchemeAllowed(scheme string) bool { - // Note that this doesn't support the custom URL schemes implemented by the client - for _, allowed := range DefaultUrlSchemes { - if strings.EqualFold(allowed, scheme) { - return true - } - } - - return false -} - -// Given a string starting with a URL, returns the number of valid characters that make up the URL's domain. -// Returns 0 if the string doesn't start with a domain name. allowShort determines whether or not the domain -// needs to contain a period to be considered valid. Equivalent to check_domain from the reference code. -func checkDomain(data string, allowShort bool) int { - foundUnderscore := false - foundPeriod := false - - i := 1 - for ; i < len(data)-1; i++ { - if data[i] == '_' { - foundUnderscore = true - break - } else if data[i] == '.' { - foundPeriod = true - } else if !isValidHostCharacter(data[i:]) && data[i] != '-' { - break - } - } - - if foundUnderscore { - return 0 - } - - if allowShort { - // If allowShort is set, accept any string of valid domain characters - return i - } - - // If allowShort isn't set, a valid domain just requires at least a single period. Note that this - // logic isn't entirely necessary because we already know the string starts with "www." when - // this is called from parseWWWAutolink - if foundPeriod { - return i - } - return 0 -} - -// Returns true if the provided link starts with a valid character for a domain name. Equivalent to -// is_valid_hostchar from the reference code. -func isValidHostCharacter(link string) bool { - c, _ := utf8.DecodeRuneInString(link) - if c == utf8.RuneError { - return false - } - - return !unicode.IsSpace(c) && !unicode.IsPunct(c) -} - -// Removes any trailing characters such as punctuation or stray brackets that shouldn't be part of the link. -// Returns a new end position for the link. Equivalent to autolink_delim from the reference code. -func trimTrailingCharactersFromLink(markdown string, start int, end int) int { - runes := []rune(markdown[start:end]) - linkEnd := len(runes) - - // Cut off the link before an open angle bracket if it contains one - for i, c := range runes { - if c == '<' { - linkEnd = i - break - } - } - - for linkEnd > 0 { - c := runes[linkEnd-1] - - if !canEndAutolink(c) { - // Trim trailing quotes, periods, etc - linkEnd = linkEnd - 1 - } else if c == ';' { - // Trim a trailing HTML entity - newEnd := linkEnd - 2 - - for newEnd > 0 && ((runes[newEnd] >= 'a' && runes[newEnd] <= 'z') || (runes[newEnd] >= 'A' && runes[newEnd] <= 'Z')) { - newEnd -= 1 - } - - if newEnd < linkEnd-2 && runes[newEnd] == '&' { - linkEnd = newEnd - } else { - // This isn't actually an HTML entity, so just trim the semicolon - linkEnd = linkEnd - 1 - } - } else if c == ')' { - // Only allow an autolink ending with a bracket if that bracket is part of a matching pair of brackets. - // If there are more closing brackets than opening ones, remove the extra bracket - - numClosing := 0 - numOpening := 0 - - // Examples (input text => output linked portion): - // - // http://www.pokemon.com/Pikachu_(Electric) - // => http://www.pokemon.com/Pikachu_(Electric) - // - // http://www.pokemon.com/Pikachu_((Electric) - // => http://www.pokemon.com/Pikachu_((Electric) - // - // http://www.pokemon.com/Pikachu_(Electric)) - // => http://www.pokemon.com/Pikachu_(Electric) - // - // http://www.pokemon.com/Pikachu_((Electric)) - // => http://www.pokemon.com/Pikachu_((Electric)) - - for i := 0; i < linkEnd; i++ { - if runes[i] == '(' { - numOpening += 1 - } else if runes[i] == ')' { - numClosing += 1 - } - } - - if numClosing <= numOpening { - // There's fewer or equal closing brackets, so we've found the end of the link - break - } - - linkEnd -= 1 - } else { - // There's no special characters at the end of the link, so we're at the end - break - } - } - - return start + len(string(runes[:linkEnd])) -} - -func canEndAutolink(c rune) bool { - switch c { - case '?', '!', '.', ',', ':', '*', '_', '~', '\'', '"': - return false - } - return true -} |