diff options
Diffstat (limited to 'vendor/gitlab.com/golang-commonmark/linkify/linkify.go')
-rw-r--r-- | vendor/gitlab.com/golang-commonmark/linkify/linkify.go | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/vendor/gitlab.com/golang-commonmark/linkify/linkify.go b/vendor/gitlab.com/golang-commonmark/linkify/linkify.go new file mode 100644 index 00000000..116d5615 --- /dev/null +++ b/vendor/gitlab.com/golang-commonmark/linkify/linkify.go @@ -0,0 +1,462 @@ +// Copyright 2015 The Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package linkify provides a way to find links in plain text. +package linkify + +import ( + "strings" + "unicode/utf8" +) + +// Link represents a link found in a string with a schema and a position in the string. +type Link struct { + Scheme string + Start, End int +} + +func max(a, b int) int { + if a >= b { + return a + } + return b +} + +// Links returns links found in s. +func Links(s string) (links []Link) { + for i := 0; i < len(s)-2; i++ { + switch s[i] { + case '.': // IP address or domain name + if i == 0 { + continue // . at the start of a line + } + if length := match(s[i+1:]); length > 0 { + pos := i + 1 + length + switch s[pos-1] { + case '.': // IP address + if pos >= len(s) { + continue // . at the end of line + } + if !digit(s[i-1]) { + i = pos + continue // . should be preceded by a digit + } + if !digit(s[pos]) { + i = pos + continue // . should be followed by a digit + } + + // find the start of the IP address + j := i - 2 + m := max(0, j-3) + for j >= m && digit(s[j]) { + j-- + } + if i-2-j > 2 { + i = pos + 1 + continue // at most 3 digits + } + start := 0 + if j >= 0 { + r, rlen := utf8.DecodeLastRuneInString(s[:j+1]) + if !isPunctOrSpaceOrControl(r) { + i = pos + 1 + continue + } + switch r { + case '.', ':', '/', '\\', '-', '_': + i = pos + 1 + continue + } + start = j + 2 - rlen + } + + length, ok := skipIPv4(s[start:]) + if !ok { + i = pos + 1 + continue + } + end := start + length + if end == len(s) { + links = append(links, Link{ + Scheme: "", + Start: start, + End: end, + }) + return + } + + r, _ := utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) { + continue + } + + end = skipPort(s, end) + end = skipPath(s, end) + end = skipQuery(s, end) + end = skipFragment(s, end) + end = unskipPunct(s, end) + + if end < len(s) { + r, _ = utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) || r == '%' { + continue + } + } + + links = append(links, Link{ + Scheme: "", + Start: start, + End: end, + }) + i = end + + default: // domain name + r, _ := utf8.DecodeLastRuneInString(s[:i]) + if isPunctOrSpaceOrControl(r) { + continue + } + + if pos == len(s) { + start, ok := findHostnameStart(s, i) + if !ok { + continue + } + links = append(links, Link{ + Scheme: "", + Start: start, + End: pos, + }) + return + } + + if s[i+1:pos] != "xn--" { + r, _ = utf8.DecodeRuneInString(s[pos:]) + if isLetterOrDigit(r) { + continue // should not be followed by a letter or a digit + } + } + + end, dot, ok := findHostnameEnd(s, pos) + if !ok { + continue + } + dot = max(dot, i) + + if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") { + if length := match(s[dot+1:]); dot+length+1 != end { + continue + } + } + + start, ok := findHostnameStart(s, i) + if !ok { + continue + } + + end = skipPort(s, end) + end = skipPath(s, end) + end = skipQuery(s, end) + end = skipFragment(s, end) + end = unskipPunct(s, end) + + if end < len(s) { + r, _ = utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) || r == '%' { + continue // should be followed by punctuation or space + } + } + + links = append(links, Link{ + Scheme: "", + Start: start, + End: end, + }) + i = end + } + } + + case '/': // schema-less link + if s[i+1] != '/' { + continue + } + + if i > 0 { + if s[i-1] == ':' { + i++ + continue // should not be preceded by a colon + } + r, _ := utf8.DecodeLastRuneInString(s[:i]) + if !isPunctOrSpaceOrControl(r) { + i++ + continue // should be preceded by punctuation or space + } + } + + r, _ := utf8.DecodeRuneInString(s[i+2:]) + if !isLetterOrDigit(r) { + i++ + continue // should be followed by a letter or a digit + } + + start := i + end, dot, ok := findHostnameEnd(s, i+2) + if !ok { + continue + } + if s[i+2:end] != "localhost" { + if dot == -1 { + continue // no dot + } + if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end { + if length := match(s[dot+1:]); dot+length+1 != end { + continue + } + } + } + + end = skipPort(s, end) + end = skipPath(s, end) + end = skipQuery(s, end) + end = skipFragment(s, end) + end = unskipPunct(s, end) + + if end < len(s) { + r, _ = utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) || r == '%' { + continue // should be followed by punctuation or space + } + } + + links = append(links, Link{ + Scheme: "//", + Start: start, + End: end, + }) + i = end + + case ':': // http, https, ftp, mailto or localhost + if i < 3 { // at least ftp: + continue + } + + if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" { + j := i - 9 + if !digit(s[j+10]) { + continue + } + if j > 0 { + r, _ := utf8.DecodeLastRuneInString(s[:j]) + if !isPunctOrSpaceOrControl(r) { + i++ + continue // should be preceded by punctuation or space + } + } + + start := j + pos := j + 9 + end := skipPort(s, pos) + if end == pos { + continue // invalid port + } + end = skipPath(s, end) + end = skipQuery(s, end) + end = skipFragment(s, end) + end = unskipPunct(s, end) + + if end < len(s) { + r, _ := utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) || r == '%' { + i++ + continue // should be followed by punctuation or space + } + } + + links = append(links, Link{ + Scheme: "", + Start: start, + End: end, + }) + i = end + + break + } + + j := i - 1 + var start int + var schema string + + switch byteToLower(s[j]) { + case 'o': // mailto + if j < 5 { + continue // too short for mailto + } + if len(s)-j < 8 { + continue // insufficient length after + } + if strings.ToLower(s[j-5:j+2]) != "mailto:" { + continue + } + r, _ := utf8.DecodeLastRuneInString(s[:j-5]) + if isLetterOrDigit(r) { + continue // should not be preceded by a letter or a digit + } + r, _ = utf8.DecodeRuneInString(s[j+2:]) + if !isAllowedInEmail(r) { + continue // should be followed by a valid e-mail character + } + + start = j - 5 + end, ok := findEmailEnd(s, j+2) + if !ok { + continue + } + + links = append(links, Link{ + Scheme: "mailto:", + Start: start, + End: end, + }) + i = end + continue // continue processing + + case 'p': // http or ftp + if len(s)-j < 8 { + continue // insufficient length after + } + switch byteToLower(s[j-2]) { + case 'f': + if strings.ToLower(s[j-2:j+4]) != "ftp://" { + continue + } + start = j - 2 + schema = "ftp:" + case 't': + if j < 3 { + continue + } + if strings.ToLower(s[j-3:j+4]) != "http://" { + continue + } + start = j - 3 + schema = "http:" + default: + continue + } + + case 's': // https + if j < 4 { + continue // too short for https + } + if len(s)-j < 8 { + continue // insufficient length after + } + start = j - 4 + if strings.ToLower(s[start:j+4]) != "https://" { + continue + } + schema = "https:" + + default: + continue + } + + // http, https or ftp + + if start > 0 { + r, _ := utf8.DecodeLastRuneInString(s[:start]) + if !isPunctOrSpaceOrControl(r) { + continue // should be preceded by punctuation or space + } + } + + r, _ := utf8.DecodeRuneInString(s[j+4:]) + if isPunctOrSpaceOrControl(r) { + continue + } + + end, dot, ok := findHostnameEnd(s, j+4) + if !ok { + continue + } + if s[j+4:end] != "localhost" { + if dot == -1 { + continue // no dot + } + if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end { + if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") { + if length := match(s[dot+1:]); dot+length+1 != end { + continue + } + } + } + } + + end = skipPort(s, end) + end = skipPath(s, end) + end = skipQuery(s, end) + end = skipFragment(s, end) + end = unskipPunct(s, end) + + if end < len(s) { + r, _ = utf8.DecodeRuneInString(s[end:]) + if !isPunctOrSpaceOrControl(r) || r == '%' { + continue // should be followed by punctuation or space + } + } + + links = append(links, Link{ + Scheme: schema, + Start: start, + End: end, + }) + i = end + + case '@': // schema-less e-mail + if i == 0 { + continue // @ at the start of a line + } + + if len(s)-i < 5 { + continue // insufficient length after + } + + r, _ := utf8.DecodeLastRuneInString(s[:i]) + if !isAllowedInEmail(r) { + continue // should be preceded by a valid e-mail character + } + + r, _ = utf8.DecodeRuneInString(s[i+1:]) + if !isLetterOrDigit(r) { + continue // should be followed by a letter or a digit + } + + start, ok := findEmailStart(s, i-1) + if !ok { + continue + } + + end, dot, ok := findHostnameEnd(s, i+1) + if !ok { + continue + } + if dot == -1 { + continue // no dot + } + if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") { + if length := match(s[dot+1:]); dot+length+1 != end { + continue + } + } + + links = append(links, Link{ + Scheme: "mailto:", + Start: start, + End: end, + }) + i = end + } + } + return +} |