From 0917dc876613fd71c9726a34bf0138b4f5121be9 Mon Sep 17 00:00:00 2001 From: Benjamin Date: Mon, 18 Nov 2019 06:18:01 +1000 Subject: Update markdown parsing library to github.com/gomarkdown/markdown (#944) --- .../gomarkdown/markdown/ast/attribute.go | 10 + vendor/github.com/gomarkdown/markdown/ast/doc.go | 4 + vendor/github.com/gomarkdown/markdown/ast/node.go | 559 +++++++++++++++++++++ vendor/github.com/gomarkdown/markdown/ast/print.go | 165 ++++++ 4 files changed, 738 insertions(+) create mode 100644 vendor/github.com/gomarkdown/markdown/ast/attribute.go create mode 100644 vendor/github.com/gomarkdown/markdown/ast/doc.go create mode 100644 vendor/github.com/gomarkdown/markdown/ast/node.go create mode 100644 vendor/github.com/gomarkdown/markdown/ast/print.go (limited to 'vendor/github.com/gomarkdown/markdown/ast') diff --git a/vendor/github.com/gomarkdown/markdown/ast/attribute.go b/vendor/github.com/gomarkdown/markdown/ast/attribute.go new file mode 100644 index 00000000..002c6a2e --- /dev/null +++ b/vendor/github.com/gomarkdown/markdown/ast/attribute.go @@ -0,0 +1,10 @@ +package ast + +// An attribute can be attached to block elements. They are specified as +// {#id .classs key="value"} where quotes for values are mandatory, multiple +// key/value pairs are separated by whitespace. +type Attribute struct { + ID []byte + Classes [][]byte + Attrs map[string][]byte +} diff --git a/vendor/github.com/gomarkdown/markdown/ast/doc.go b/vendor/github.com/gomarkdown/markdown/ast/doc.go new file mode 100644 index 00000000..376dc67c --- /dev/null +++ b/vendor/github.com/gomarkdown/markdown/ast/doc.go @@ -0,0 +1,4 @@ +/* +Package ast defines tree representation of a parsed markdown document. +*/ +package ast diff --git a/vendor/github.com/gomarkdown/markdown/ast/node.go b/vendor/github.com/gomarkdown/markdown/ast/node.go new file mode 100644 index 00000000..e6fcba9a --- /dev/null +++ b/vendor/github.com/gomarkdown/markdown/ast/node.go @@ -0,0 +1,559 @@ +package ast + +// ListType contains bitwise or'ed flags for list and list item objects. +type ListType int + +// These are the possible flag values for the ListItem renderer. +// Multiple flag values may be ORed together. +// These are mostly of interest if you are writing a new output format. +const ( + ListTypeOrdered ListType = 1 << iota + ListTypeDefinition + ListTypeTerm + + ListItemContainsBlock + ListItemBeginningOfList // TODO: figure out if this is of any use now + ListItemEndOfList +) + +// CellAlignFlags holds a type of alignment in a table cell. +type CellAlignFlags int + +// These are the possible flag values for the table cell renderer. +// Only a single one of these values will be used; they are not ORed together. +// These are mostly of interest if you are writing a new output format. +const ( + TableAlignmentLeft CellAlignFlags = 1 << iota + TableAlignmentRight + TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) +) + +func (a CellAlignFlags) String() string { + switch a { + case TableAlignmentLeft: + return "left" + case TableAlignmentRight: + return "right" + case TableAlignmentCenter: + return "center" + default: + return "" + } +} + +// DocumentMatters holds the type of a {front,main,back}matter in the document +type DocumentMatters int + +// These are all possible Document divisions. +const ( + DocumentMatterNone DocumentMatters = iota + DocumentMatterFront + DocumentMatterMain + DocumentMatterBack +) + +// CitationTypes holds the type of a citation, informative, normative or suppressed +type CitationTypes int + +const ( + CitationTypeNone CitationTypes = iota + CitationTypeSuppressed + CitationTypeInformative + CitationTypeNormative +) + +// Node defines an ast node +type Node interface { + AsContainer() *Container + AsLeaf() *Leaf + GetParent() Node + SetParent(newParent Node) + GetChildren() []Node + SetChildren(newChildren []Node) +} + +// Container is a type of node that can contain children +type Container struct { + Parent Node + Children []Node + + Literal []byte // Text contents of the leaf nodes + Content []byte // Markdown content of the block nodes + + *Attribute // Block level attribute +} + +// AsContainer returns itself as *Container +func (c *Container) AsContainer() *Container { + return c +} + +// AsLeaf returns nil +func (c *Container) AsLeaf() *Leaf { + return nil +} + +// GetParent returns parent node +func (c *Container) GetParent() Node { + return c.Parent +} + +// SetParent sets the parent node +func (c *Container) SetParent(newParent Node) { + c.Parent = newParent +} + +// GetChildren returns children nodes +func (c *Container) GetChildren() []Node { + return c.Children +} + +// SetChildren sets children node +func (c *Container) SetChildren(newChildren []Node) { + c.Children = newChildren +} + +// Leaf is a type of node that cannot have children +type Leaf struct { + Parent Node + + Literal []byte // Text contents of the leaf nodes + Content []byte // Markdown content of the block nodes + + *Attribute // Block level attribute +} + +// AsContainer returns nil +func (l *Leaf) AsContainer() *Container { + return nil +} + +// AsLeaf returns itself as *Leaf +func (l *Leaf) AsLeaf() *Leaf { + return l +} + +// GetParent returns parent node +func (l *Leaf) GetParent() Node { + return l.Parent +} + +// SetParent sets the parent nodd +func (l *Leaf) SetParent(newParent Node) { + l.Parent = newParent +} + +// GetChildren returns nil because Leaf cannot have children +func (l *Leaf) GetChildren() []Node { + return nil +} + +// SetChildren will panic becuase Leaf cannot have children +func (l *Leaf) SetChildren(newChildren []Node) { + panic("leaf node cannot have children") +} + +// Document represents markdown document node, a root of ast +type Document struct { + Container +} + +// DocumentMatter represents markdown node that signals a document +// division: frontmatter, mainmatter or backmatter. +type DocumentMatter struct { + Container + + Matter DocumentMatters +} + +// BlockQuote represents markdown block quote node +type BlockQuote struct { + Container +} + +// Aside represents an markdown aside node. +type Aside struct { + Container +} + +// List represents markdown list node +type List struct { + Container + + ListFlags ListType + Tight bool // Skip

s around list item data if true + BulletChar byte // '*', '+' or '-' in bullet lists + Delimiter byte // '.' or ')' after the number in ordered lists + Start int // for ordered lists this indicates the starting number if > 0 + RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering + IsFootnotesList bool // This is a list of footnotes +} + +// ListItem represents markdown list item node +type ListItem struct { + Container + + ListFlags ListType + Tight bool // Skip

s around list item data if true + BulletChar byte // '*', '+' or '-' in bullet lists + Delimiter byte // '.' or ')' after the number in ordered lists + RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering + IsFootnotesList bool // This is a list of footnotes +} + +// Paragraph represents markdown paragraph node +type Paragraph struct { + Container +} + +// Math represents markdown MathAjax inline node +type Math struct { + Leaf +} + +// MathBlock represents markdown MathAjax block node +type MathBlock struct { + Container +} + +// Heading represents markdown heading node +type Heading struct { + Container + + Level int // This holds the heading level number + HeadingID string // This might hold heading ID, if present + IsTitleblock bool // Specifies whether it's a title block + IsSpecial bool // We are a special heading (starts with .#) +} + +// HorizontalRule represents markdown horizontal rule node +type HorizontalRule struct { + Leaf +} + +// Emph represents markdown emphasis node +type Emph struct { + Container +} + +// Strong represents markdown strong node +type Strong struct { + Container +} + +// Del represents markdown del node +type Del struct { + Container +} + +// Link represents markdown link node +type Link struct { + Container + + Destination []byte // Destination is what goes into a href + Title []byte // Title is the tooltip thing that goes in a title attribute + NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote + Footnote Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil. + DeferredID []byte // If a deferred link this holds the original ID. +} + +// CrossReference is a reference node. +type CrossReference struct { + Container + + Destination []byte // Destination is where the reference points to +} + +// Citation is a citation node. +type Citation struct { + Leaf + + Destination [][]byte // Destination is where the citation points to. Multiple ones are allowed. + Type []CitationTypes // 1:1 mapping of destination and citation type + Suffix [][]byte // Potential citation suffix, i.e. [@!RFC1035, p. 144] +} + +// Image represents markdown image node +type Image struct { + Container + + Destination []byte // Destination is what goes into a href + Title []byte // Title is the tooltip thing that goes in a title attribute +} + +// Text represents markdown text node +type Text struct { + Leaf +} + +// HTMLBlock represents markdown html node +type HTMLBlock struct { + Leaf +} + +// CodeBlock represents markdown code block node +type CodeBlock struct { + Leaf + + IsFenced bool // Specifies whether it's a fenced code block or an indented one + Info []byte // This holds the info string + FenceChar byte + FenceLength int + FenceOffset int +} + +// Softbreak represents markdown softbreak node +// Note: not used currently +type Softbreak struct { + Leaf +} + +// Hardbreak represents markdown hard break node +type Hardbreak struct { + Leaf +} + +// NonBlockingSpace represents markdown non-blocking space node +type NonBlockingSpace struct { + Leaf +} + +// Code represents markdown code node +type Code struct { + Leaf +} + +// HTMLSpan represents markdown html span node +type HTMLSpan struct { + Leaf +} + +// Table represents markdown table node +type Table struct { + Container +} + +// TableCell represents markdown table cell node +type TableCell struct { + Container + + IsHeader bool // This tells if it's under the header row + Align CellAlignFlags // This holds the value for align attribute +} + +// TableHeader represents markdown table head node +type TableHeader struct { + Container +} + +// TableBody represents markdown table body node +type TableBody struct { + Container +} + +// TableRow represents markdown table row node +type TableRow struct { + Container +} + +// TableFooter represents markdown table foot node +type TableFooter struct { + Container +} + +// Caption represents a figure, code or quote caption +type Caption struct { + Container +} + +// CaptionFigure is a node (blockquote or codeblock) that has a caption +type CaptionFigure struct { + Container + + HeadingID string // This might hold heading ID, if present +} + +// Callout is a node that can exist both in text (where it is an actual node) and in a code block. +type Callout struct { + Leaf + + ID []byte // number of this callout +} + +// Index is a node that contains an Index item and an optional, subitem. +type Index struct { + Leaf + + Primary bool + Item []byte + Subitem []byte + ID string // ID of the index +} + +// Subscript is a subscript node +type Subscript struct { + Leaf +} + +// Subscript is a superscript node +type Superscript struct { + Leaf +} + +// Footnotes is a node that contains all footnotes +type Footnotes struct { + Container +} + +func removeNodeFromArray(a []Node, node Node) []Node { + n := len(a) + for i := 0; i < n; i++ { + if a[i] == node { + return append(a[:i], a[i+1:]...) + } + } + return nil +} + +// AppendChild appends child to children of parent +// It panics if either node is nil. +func AppendChild(parent Node, child Node) { + RemoveFromTree(child) + child.SetParent(parent) + newChildren := append(parent.GetChildren(), child) + parent.SetChildren(newChildren) +} + +// RemoveFromTree removes this node from tree +func RemoveFromTree(n Node) { + if n.GetParent() == nil { + return + } + // important: don't clear n.Children if n has no parent + // we're called from AppendChild and that might happen on a node + // that accumulated Children but hasn't been inserted into the tree + n.SetChildren(nil) + p := n.GetParent() + newChildren := removeNodeFromArray(p.GetChildren(), n) + if newChildren != nil { + p.SetChildren(newChildren) + } +} + +// GetLastChild returns last child of node n +// It's implemented as stand-alone function to keep Node interface small +func GetLastChild(n Node) Node { + a := n.GetChildren() + if len(a) > 0 { + return a[len(a)-1] + } + return nil +} + +// GetFirstChild returns first child of node n +// It's implemented as stand-alone function to keep Node interface small +func GetFirstChild(n Node) Node { + a := n.GetChildren() + if len(a) > 0 { + return a[0] + } + return nil +} + +// GetNextNode returns next sibling of node n (node after n) +// We can't make it part of Container or Leaf because we loose Node identity +func GetNextNode(n Node) Node { + parent := n.GetParent() + if parent == nil { + return nil + } + a := parent.GetChildren() + len := len(a) - 1 + for i := 0; i < len; i++ { + if a[i] == n { + return a[i+1] + } + } + return nil +} + +// GetPrevNode returns previous sibling of node n (node before n) +// We can't make it part of Container or Leaf because we loose Node identity +func GetPrevNode(n Node) Node { + parent := n.GetParent() + if parent == nil { + return nil + } + a := parent.GetChildren() + len := len(a) + for i := 1; i < len; i++ { + if a[i] == n { + return a[i-1] + } + } + return nil +} + +// WalkStatus allows NodeVisitor to have some control over the tree traversal. +// It is returned from NodeVisitor and different values allow Node.Walk to +// decide which node to go to next. +type WalkStatus int + +const ( + // GoToNext is the default traversal of every node. + GoToNext WalkStatus = iota + // SkipChildren tells walker to skip all children of current node. + SkipChildren + // Terminate tells walker to terminate the traversal. + Terminate +) + +// NodeVisitor is a callback to be called when traversing the syntax tree. +// Called twice for every node: once with entering=true when the branch is +// first visited, then with entering=false after all the children are done. +type NodeVisitor interface { + Visit(node Node, entering bool) WalkStatus +} + +// NodeVisitorFunc casts a function to match NodeVisitor interface +type NodeVisitorFunc func(node Node, entering bool) WalkStatus + +// Walk traverses tree recursively +func Walk(n Node, visitor NodeVisitor) WalkStatus { + isContainer := n.AsContainer() != nil + status := visitor.Visit(n, true) // entering + if status == Terminate { + // even if terminating, close container node + if isContainer { + visitor.Visit(n, false) + } + return status + } + if isContainer && status != SkipChildren { + children := n.GetChildren() + for _, n := range children { + status = Walk(n, visitor) + if status == Terminate { + return status + } + } + } + if isContainer { + status = visitor.Visit(n, false) // exiting + if status == Terminate { + return status + } + } + return GoToNext +} + +// Visit calls visitor function +func (f NodeVisitorFunc) Visit(node Node, entering bool) WalkStatus { + return f(node, entering) +} + +// WalkFunc is like Walk but accepts just a callback function +func WalkFunc(n Node, f NodeVisitorFunc) { + visitor := NodeVisitorFunc(f) + Walk(n, visitor) +} diff --git a/vendor/github.com/gomarkdown/markdown/ast/print.go b/vendor/github.com/gomarkdown/markdown/ast/print.go new file mode 100644 index 00000000..75daf911 --- /dev/null +++ b/vendor/github.com/gomarkdown/markdown/ast/print.go @@ -0,0 +1,165 @@ +package ast + +import ( + "bytes" + "fmt" + "io" + "strings" +) + +// Print is for debugging. It prints a string representation of parsed +// markdown doc (result of parser.Parse()) to dst. +// +// To make output readable, it shortens text output. +func Print(dst io.Writer, doc Node) { + PrintWithPrefix(dst, doc, " ") +} + +// PrintWithPrefix is like Print but allows customizing prefix used for +// indentation. By default it's 2 spaces. You can change it to e.g. tab +// by passing "\t" +func PrintWithPrefix(w io.Writer, doc Node, prefix string) { + // for more compact output, don't print outer Document + if _, ok := doc.(*Document); ok { + for _, c := range doc.GetChildren() { + printRecur(w, c, prefix, 0) + } + } else { + printRecur(w, doc, prefix, 0) + } +} + +// ToString is like Dump but returns result as a string +func ToString(doc Node) string { + var buf bytes.Buffer + Print(&buf, doc) + return buf.String() +} + +func contentToString(d1 []byte, d2 []byte) string { + if d1 != nil { + return string(d1) + } + if d2 != nil { + return string(d2) + } + return "" +} + +func getContent(node Node) string { + if c := node.AsContainer(); c != nil { + return contentToString(c.Literal, c.Content) + } + leaf := node.AsLeaf() + return contentToString(leaf.Literal, leaf.Content) +} + +func shortenString(s string, maxLen int) string { + // for cleaner, one-line ouput, replace some white-space chars + // with their escaped version + s = strings.Replace(s, "\n", `\n`, -1) + s = strings.Replace(s, "\r", `\r`, -1) + s = strings.Replace(s, "\t", `\t`, -1) + if maxLen < 0 { + return s + } + if len(s) < maxLen { + return s + } + // add "..." to indicate truncation + return s[:maxLen-3] + "..." +} + +// get a short name of the type of v which excludes package name +// and strips "()" from the end +func getNodeType(node Node) string { + s := fmt.Sprintf("%T", node) + s = strings.TrimSuffix(s, "()") + if idx := strings.Index(s, "."); idx != -1 { + return s[idx+1:] + } + return s +} + +func printDefault(w io.Writer, indent string, typeName string, content string) { + content = strings.TrimSpace(content) + if len(content) > 0 { + fmt.Fprintf(w, "%s%s '%s'\n", indent, typeName, content) + } else { + fmt.Fprintf(w, "%s%s\n", indent, typeName) + } +} + +func getListFlags(f ListType) string { + var s string + if f&ListTypeOrdered != 0 { + s += "ordered " + } + if f&ListTypeDefinition != 0 { + s += "definition " + } + if f&ListTypeTerm != 0 { + s += "term " + } + if f&ListItemContainsBlock != 0 { + s += "has_block " + } + if f&ListItemBeginningOfList != 0 { + s += "start " + } + if f&ListItemEndOfList != 0 { + s += "end " + } + s = strings.TrimSpace(s) + return s +} + +func printRecur(w io.Writer, node Node, prefix string, depth int) { + if node == nil { + return + } + indent := strings.Repeat(prefix, depth) + + content := shortenString(getContent(node), 40) + typeName := getNodeType(node) + switch v := node.(type) { + case *Link: + content := "url=" + string(v.Destination) + printDefault(w, indent, typeName, content) + case *Image: + content := "url=" + string(v.Destination) + printDefault(w, indent, typeName, content) + case *List: + if v.Start > 1 { + content += fmt.Sprintf("start=%d ", v.Start) + } + if v.Tight { + content += "tight " + } + if v.IsFootnotesList { + content += "footnotes " + } + flags := getListFlags(v.ListFlags) + if len(flags) > 0 { + content += "flags=" + flags + " " + } + printDefault(w, indent, typeName, content) + case *ListItem: + if v.Tight { + content += "tight " + } + if v.IsFootnotesList { + content += "footnotes " + } + flags := getListFlags(v.ListFlags) + if len(flags) > 0 { + content += "flags=" + flags + " " + } + printDefault(w, indent, typeName, content) + default: + printDefault(w, indent, typeName, content) + } + for _, child := range node.GetChildren() { + printRecur(w, child, prefix, depth+1) + } +} -- cgit v1.2.3