diff options
Diffstat (limited to 'vendor/github.com/mattermost/mattermost-server/utils/markdown/inlines.go')
-rw-r--r-- | vendor/github.com/mattermost/mattermost-server/utils/markdown/inlines.go | 664 |
1 files changed, 664 insertions, 0 deletions
diff --git a/vendor/github.com/mattermost/mattermost-server/utils/markdown/inlines.go b/vendor/github.com/mattermost/mattermost-server/utils/markdown/inlines.go new file mode 100644 index 00000000..a3abccef --- /dev/null +++ b/vendor/github.com/mattermost/mattermost-server/utils/markdown/inlines.go @@ -0,0 +1,664 @@ +// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package markdown + +import ( + "container/list" + "strings" + "unicode" + "unicode/utf8" +) + +type Inline interface { + IsInline() bool +} + +type inlineBase struct{} + +func (inlineBase) IsInline() bool { return true } + +type Text struct { + inlineBase + + Text string + Range Range +} + +type CodeSpan struct { + inlineBase + + Code string +} + +type HardLineBreak struct { + inlineBase +} + +type SoftLineBreak struct { + inlineBase +} + +type InlineLinkOrImage struct { + inlineBase + + Children []Inline + + RawDestination Range + + markdown string + rawTitle string +} + +func (i *InlineLinkOrImage) Destination() string { + return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) +} + +func (i *InlineLinkOrImage) Title() string { + return Unescape(i.rawTitle) +} + +type InlineLink struct { + InlineLinkOrImage +} + +type InlineImage struct { + InlineLinkOrImage +} + +type ReferenceLinkOrImage struct { + inlineBase + *ReferenceDefinition + + Children []Inline +} + +type ReferenceLink struct { + ReferenceLinkOrImage +} + +type ReferenceImage struct { + ReferenceLinkOrImage +} + +type Autolink struct { + inlineBase + + Children []Inline + + RawDestination Range + + markdown string +} + +func (i *Autolink) Destination() string { + destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) + + if strings.HasPrefix(destination, "www") { + destination = "http://" + destination + } + + return destination +} + +type delimiterType int + +const ( + linkOpeningDelimiter delimiterType = iota + imageOpeningDelimiter +) + +type delimiter struct { + Type delimiterType + IsInactive bool + TextNode int + Range Range +} + +type inlineParser struct { + markdown string + ranges []Range + referenceDefinitions []*ReferenceDefinition + + raw string + position int + inlines []Inline + delimiterStack *list.List +} + +func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser { + return &inlineParser{ + markdown: markdown, + ranges: ranges, + referenceDefinitions: referenceDefinitions, + delimiterStack: list.New(), + } +} + +func (p *inlineParser) parseBackticks() { + count := 1 + for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ { + count++ + } + opening := p.raw[p.position : p.position+count] + search := p.position + count + for search < len(p.raw) { + end := strings.Index(p.raw[search:], opening) + if end == -1 { + break + } + if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' { + search += end + count + for search < len(p.raw) && p.raw[search] == '`' { + search++ + } + continue + } + code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ") + p.position = search + end + count + p.inlines = append(p.inlines, &CodeSpan{ + Code: code, + }) + return + } + p.position += len(opening) + absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening)) + p.inlines = append(p.inlines, &Text{ + Text: opening, + Range: Range{absPos, absPos + len(opening)}, + }) +} + +func (p *inlineParser) parseLineEnding() { + if p.position >= 1 && p.raw[p.position-1] == '\t' { + p.inlines = append(p.inlines, &HardLineBreak{}) + } else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') { + p.inlines = append(p.inlines, &HardLineBreak{}) + } else { + p.inlines = append(p.inlines, &SoftLineBreak{}) + } + p.position++ + if p.position < len(p.raw) && p.raw[p.position] == '\n' { + p.position++ + } +} + +func (p *inlineParser) parseEscapeCharacter() { + if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) { + absPos := relativeToAbsolutePosition(p.ranges, p.position+1) + p.inlines = append(p.inlines, &Text{ + Text: string(p.raw[p.position+1]), + Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))}, + }) + p.position += 2 + } else { + absPos := relativeToAbsolutePosition(p.ranges, p.position) + p.inlines = append(p.inlines, &Text{ + Text: `\`, + Range: Range{absPos, absPos + 1}, + }) + p.position++ + } +} + +func (p *inlineParser) parseText() { + if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 { + absPos := relativeToAbsolutePosition(p.ranges, p.position) + p.inlines = append(p.inlines, &Text{ + Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), + Range: Range{absPos, absPos + len(p.raw[p.position:])}, + }) + p.position = len(p.raw) + } else { + absPos := relativeToAbsolutePosition(p.ranges, p.position) + if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' { + s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace) + p.inlines = append(p.inlines, &Text{ + Text: s, + Range: Range{absPos, absPos + len(s)}, + }) + } else { + if next == 0 { + // Always read at least one character since 'w', 'W', and ':' may not actually match another + // type of node + next = 1 + } + + p.inlines = append(p.inlines, &Text{ + Text: p.raw[p.position : p.position+next], + Range: Range{absPos, absPos + next}, + }) + } + p.position += next + } +} + +func (p *inlineParser) parseLinkOrImageDelimiter() { + absPos := relativeToAbsolutePosition(p.ranges, p.position) + if p.raw[p.position] == '[' { + p.inlines = append(p.inlines, &Text{ + Text: "[", + Range: Range{absPos, absPos + 1}, + }) + p.delimiterStack.PushBack(&delimiter{ + Type: linkOpeningDelimiter, + TextNode: len(p.inlines) - 1, + Range: Range{p.position, p.position + 1}, + }) + p.position++ + } else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' { + p.inlines = append(p.inlines, &Text{ + Text: "![", + Range: Range{absPos, absPos + 2}, + }) + p.delimiterStack.PushBack(&delimiter{ + Type: imageOpeningDelimiter, + TextNode: len(p.inlines) - 1, + Range: Range{p.position, p.position + 2}, + }) + p.position += 2 + } else { + p.inlines = append(p.inlines, &Text{ + Text: "!", + Range: Range{absPos, absPos + 1}, + }) + p.position++ + } +} + +func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int, isImage bool) (destination, title Range, end int, ok bool) { + if position >= len(p.raw) || p.raw[position] != '(' { + return + } + position++ + + destinationStart := nextNonWhitespace(p.raw, position) + if destinationStart >= len(p.raw) { + return + } else if p.raw[destinationStart] == ')' { + return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true + } + + destination, end, ok = parseLinkDestination(p.raw, destinationStart) + if !ok { + return + } + position = end + + if isImage && position < len(p.raw) && isWhitespaceByte(p.raw[position]) { + dimensionsStart := nextNonWhitespace(p.raw, position) + if dimensionsStart >= len(p.raw) { + return + } + + if p.raw[dimensionsStart] == '=' { + // Read optional image dimensions even if we don't use them + _, end, ok = parseImageDimensions(p.raw, dimensionsStart) + if !ok { + return + } + + position = end + } + } + + if position < len(p.raw) && isWhitespaceByte(p.raw[position]) { + titleStart := nextNonWhitespace(p.raw, position) + if titleStart >= len(p.raw) { + return + } else if p.raw[titleStart] == ')' { + return destination, Range{titleStart, titleStart}, titleStart + 1, true + } + + if p.raw[titleStart] == '"' || p.raw[titleStart] == '\'' || p.raw[titleStart] == '(' { + title, end, ok = parseLinkTitle(p.raw, titleStart) + if !ok { + return + } + position = end + } + } + + closingPosition := nextNonWhitespace(p.raw, position) + if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' { + return Range{}, Range{}, 0, false + } + + return destination, title, closingPosition + 1, true +} + +func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition { + clean := strings.Join(strings.Fields(label), " ") + for _, d := range p.referenceDefinitions { + if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) { + return d + } + } + return nil +} + +func (p *inlineParser) lookForLinkOrImage() { + for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { + d := element.Value.(*delimiter) + if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter { + continue + } + if d.IsInactive { + p.delimiterStack.Remove(element) + break + } + + isImage := d.Type == imageOpeningDelimiter + + var inline Inline + + if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position+1, isImage); ok { + destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position) + linkOrImage := InlineLinkOrImage{ + Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), + RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position}, + markdown: p.markdown, + rawTitle: p.raw[title.Position:title.End], + } + if d.Type == imageOpeningDelimiter { + inline = &InlineImage{linkOrImage} + } else { + inline = &InlineLink{linkOrImage} + } + p.position = next + } else { + referenceLabel := "" + label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1) + if hasLinkLabel && label.End > label.Position { + referenceLabel = p.raw[label.Position:label.End] + } else { + referenceLabel = p.raw[d.Range.End:p.position] + if !hasLinkLabel { + next = p.position + 1 + } + } + if referenceLabel != "" { + if reference := p.referenceDefinition(referenceLabel); reference != nil { + linkOrImage := ReferenceLinkOrImage{ + ReferenceDefinition: reference, + Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), + } + if d.Type == imageOpeningDelimiter { + inline = &ReferenceImage{linkOrImage} + } else { + inline = &ReferenceLink{linkOrImage} + } + p.position = next + } + } + } + + if inline != nil { + if d.Type == imageOpeningDelimiter { + p.inlines = append(p.inlines[:d.TextNode], inline) + } else { + p.inlines = append(p.inlines[:d.TextNode], inline) + for element := element.Prev(); element != nil; element = element.Prev() { + if d := element.Value.(*delimiter); d.Type == linkOpeningDelimiter { + d.IsInactive = true + } + } + } + p.delimiterStack.Remove(element) + return + } else { + p.delimiterStack.Remove(element) + break + } + } + absPos := relativeToAbsolutePosition(p.ranges, p.position) + p.inlines = append(p.inlines, &Text{ + Text: "]", + Range: Range{absPos, absPos + 1}, + }) + p.position++ +} + +func CharacterReference(ref string) string { + if ref == "" { + return "" + } + if ref[0] == '#' { + if len(ref) < 2 { + return "" + } + n := 0 + if ref[1] == 'X' || ref[1] == 'x' { + if len(ref) < 3 { + return "" + } + for i := 2; i < len(ref); i++ { + if i > 9 { + return "" + } + d := ref[i] + switch { + case d >= '0' && d <= '9': + n = n*16 + int(d-'0') + case d >= 'a' && d <= 'f': + n = n*16 + 10 + int(d-'a') + case d >= 'A' && d <= 'F': + n = n*16 + 10 + int(d-'A') + default: + return "" + } + } + } else { + for i := 1; i < len(ref); i++ { + if i > 8 || ref[i] < '0' || ref[i] > '9' { + return "" + } + n = n*10 + int(ref[i]-'0') + } + } + c := rune(n) + if c == '\u0000' || !utf8.ValidRune(c) { + return string(unicode.ReplacementChar) + } + return string(c) + } + if entity, ok := htmlEntities[ref]; ok { + return entity + } + return "" +} + +func (p *inlineParser) parseCharacterReference() { + absPos := relativeToAbsolutePosition(p.ranges, p.position) + p.position++ + if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 { + p.inlines = append(p.inlines, &Text{ + Text: "&", + Range: Range{absPos, absPos + 1}, + }) + } else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" { + p.position += semicolon + 1 + p.inlines = append(p.inlines, &Text{ + Text: s, + Range: Range{absPos, absPos + len(s)}, + }) + } else { + p.inlines = append(p.inlines, &Text{ + Text: "&", + Range: Range{absPos, absPos + 1}, + }) + } +} + +func (p *inlineParser) parseAutolink(c rune) bool { + for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { + d := element.Value.(*delimiter) + if !d.IsInactive { + return false + } + } + + var link Range + if c == ':' { + var ok bool + link, ok = parseURLAutolink(p.raw, p.position) + + if !ok { + return false + } + + // Since the current position is at the colon, we have to rewind the parsing slightly so that + // we don't duplicate the URL scheme + rewind := strings.Index(p.raw[link.Position:link.End], ":") + if rewind != -1 { + lastInline := p.inlines[len(p.inlines)-1] + lastText, ok := lastInline.(*Text) + + if !ok { + // This should never occur since parseURLAutolink will only return a non-empty value + // when the previous text ends in a valid URL protocol which would mean that the previous + // node is a Text node + return false + } + + p.inlines = p.inlines[0 : len(p.inlines)-1] + p.inlines = append(p.inlines, &Text{ + Text: lastText.Text[:len(lastText.Text)-rewind], + Range: Range{lastText.Range.Position, lastText.Range.End - rewind}, + }) + p.position -= rewind + } + } else if c == 'w' || c == 'W' { + var ok bool + link, ok = parseWWWAutolink(p.raw, p.position) + + if !ok { + return false + } + } + + linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position) + linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position} + + p.inlines = append(p.inlines, &Autolink{ + Children: []Inline{ + &Text{ + Text: p.raw[link.Position:link.End], + Range: linkRange, + }, + }, + RawDestination: linkRange, + markdown: p.markdown, + }) + p.position += (link.End - link.Position) + + return true +} + +func (p *inlineParser) Parse() []Inline { + for _, r := range p.ranges { + p.raw += p.markdown[r.Position:r.End] + } + + for p.position < len(p.raw) { + c, _ := utf8.DecodeRuneInString(p.raw[p.position:]) + + switch c { + case '\r', '\n': + p.parseLineEnding() + case '\\': + p.parseEscapeCharacter() + case '`': + p.parseBackticks() + case '&': + p.parseCharacterReference() + case '!', '[': + p.parseLinkOrImageDelimiter() + case ']': + p.lookForLinkOrImage() + case 'w', 'W', ':': + matched := p.parseAutolink(c) + + if !matched { + p.parseText() + } + default: + p.parseText() + } + } + + return p.inlines +} + +func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) { + return newInlineParser(markdown, ranges, referenceDefinitions).Parse() +} + +func MergeInlineText(inlines []Inline) []Inline { + var ret []Inline + for i, v := range inlines { + // always add first node + if i == 0 { + ret = append(ret, v) + continue + } + // not a text node? nothing to merge + text, ok := v.(*Text) + if !ok { + ret = append(ret, v) + continue + } + // previous node is not a text node? nothing to merge + prevText, ok := ret[len(ret)-1].(*Text) + if !ok { + ret = append(ret, v) + continue + } + // previous node is not right before this one + if prevText.Range.End != text.Range.Position { + ret = append(ret, v) + continue + } + // we have two consecutive text nodes + ret[len(ret)-1] = &Text{ + Text: prevText.Text + text.Text, + Range: Range{prevText.Range.Position, text.Range.End}, + } + } + return ret +} + +func Unescape(markdown string) string { + ret := "" + + position := 0 + for position < len(markdown) { + c, cSize := utf8.DecodeRuneInString(markdown[position:]) + + switch c { + case '\\': + if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) { + ret += string(markdown[position+1]) + position += 2 + } else { + ret += `\` + position++ + } + case '&': + position++ + if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 { + ret += "&" + } else if s := CharacterReference(markdown[position : position+semicolon]); s != "" { + position += semicolon + 1 + ret += s + } else { + ret += "&" + } + default: + ret += string(c) + position += cSize + } + } + + return ret +} |