diff options
Diffstat (limited to 'vendor/github.com/russross/blackfriday/markdown.go')
-rw-r--r-- | vendor/github.com/russross/blackfriday/markdown.go | 761 |
1 files changed, 385 insertions, 376 deletions
diff --git a/vendor/github.com/russross/blackfriday/markdown.go b/vendor/github.com/russross/blackfriday/markdown.go index 1722a738..ff61cb05 100644 --- a/vendor/github.com/russross/blackfriday/markdown.go +++ b/vendor/github.com/russross/blackfriday/markdown.go @@ -1,230 +1,200 @@ -// // Blackfriday Markdown Processor // Available at http://github.com/russross/blackfriday // // Copyright © 2011 Russ Ross <russ@russross.com>. // Distributed under the Simplified BSD License. // See README.md for details. -// - -// -// -// Markdown parsing and processing -// -// package blackfriday import ( "bytes" "fmt" + "io" "strings" "unicode/utf8" ) -const VERSION = "1.5" +// +// Markdown parsing and processing +// + +// Version string of the package. Appears in the rendered document when +// CompletePage flag is on. +const Version = "2.0" + +// Extensions is a bitwise or'ed collection of enabled Blackfriday's +// extensions. +type Extensions int // These are the supported markdown parsing extensions. // OR these values together to select multiple extensions. const ( - EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words - EXTENSION_TABLES // render tables - EXTENSION_FENCED_CODE // render fenced code blocks - EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked - EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ - EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules - EXTENSION_SPACE_HEADERS // be strict about prefix header rules - EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks - EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four - EXTENSION_FOOTNOTES // Pandoc-style footnotes - EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block - EXTENSION_HEADER_IDS // specify header IDs with {#id} - EXTENSION_TITLEBLOCK // Titleblock ala pandoc - EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text - EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks - EXTENSION_DEFINITION_LISTS // render definition lists - EXTENSION_JOIN_LINES // delete newline and join lines - - commonHtmlFlags = 0 | - HTML_USE_XHTML | - HTML_USE_SMARTYPANTS | - HTML_SMARTYPANTS_FRACTIONS | - HTML_SMARTYPANTS_DASHES | - HTML_SMARTYPANTS_LATEX_DASHES - - commonExtensions = 0 | - EXTENSION_NO_INTRA_EMPHASIS | - EXTENSION_TABLES | - EXTENSION_FENCED_CODE | - EXTENSION_AUTOLINK | - EXTENSION_STRIKETHROUGH | - EXTENSION_SPACE_HEADERS | - EXTENSION_HEADER_IDS | - EXTENSION_BACKSLASH_LINE_BREAK | - EXTENSION_DEFINITION_LISTS + NoExtensions Extensions = 0 + NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words + Tables // Render tables + FencedCode // Render fenced code blocks + Autolink // Detect embedded URLs that are not explicitly marked + Strikethrough // Strikethrough text using ~~test~~ + LaxHTMLBlocks // Loosen up HTML block parsing rules + SpaceHeadings // Be strict about prefix heading rules + HardLineBreak // Translate newlines into line breaks + TabSizeEight // Expand tabs to eight spaces instead of four + Footnotes // Pandoc-style footnotes + NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block + HeadingIDs // specify heading IDs with {#id} + Titleblock // Titleblock ala pandoc + AutoHeadingIDs // Create the heading ID from the text + BackslashLineBreak // Translate trailing backslashes into line breaks + DefinitionLists // Render definition lists + + CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | + SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes + + CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | + Autolink | Strikethrough | SpaceHeadings | HeadingIDs | + BackslashLineBreak | DefinitionLists ) -// These are the possible flag values for the link renderer. -// Only a single one of these values will be used; they are not ORed together. -// These are mostly of interest if you are writing a new output format. -const ( - LINK_TYPE_NOT_AUTOLINK = iota - LINK_TYPE_NORMAL - LINK_TYPE_EMAIL -) +// ListType contains bitwise or'ed flags for list and list item objects. +type ListType int // These are the possible flag values for the ListItem renderer. // Multiple flag values may be ORed together. // These are mostly of interest if you are writing a new output format. const ( - LIST_TYPE_ORDERED = 1 << iota - LIST_TYPE_DEFINITION - LIST_TYPE_TERM - LIST_ITEM_CONTAINS_BLOCK - LIST_ITEM_BEGINNING_OF_LIST - LIST_ITEM_END_OF_LIST + ListTypeOrdered ListType = 1 << iota + ListTypeDefinition + ListTypeTerm + + ListItemContainsBlock + ListItemBeginningOfList // TODO: figure out if this is of any use now + ListItemEndOfList ) +// CellAlignFlags holds a type of alignment in a table cell. +type CellAlignFlags int + // These are the possible flag values for the table cell renderer. // Only a single one of these values will be used; they are not ORed together. // These are mostly of interest if you are writing a new output format. const ( - TABLE_ALIGNMENT_LEFT = 1 << iota - TABLE_ALIGNMENT_RIGHT - TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) + TableAlignmentLeft CellAlignFlags = 1 << iota + TableAlignmentRight + TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) ) // The size of a tab stop. const ( - TAB_SIZE_DEFAULT = 4 - TAB_SIZE_EIGHT = 8 + TabSizeDefault = 4 + TabSizeDouble = 8 ) // blockTags is a set of tags that are recognized as HTML block tags. // Any of these can be included in markdown text without special escaping. var blockTags = map[string]struct{}{ - "blockquote": {}, - "del": {}, - "div": {}, - "dl": {}, - "fieldset": {}, - "form": {}, - "h1": {}, - "h2": {}, - "h3": {}, - "h4": {}, - "h5": {}, - "h6": {}, - "iframe": {}, - "ins": {}, - "math": {}, - "noscript": {}, - "ol": {}, - "pre": {}, - "p": {}, - "script": {}, - "style": {}, - "table": {}, - "ul": {}, + "blockquote": struct{}{}, + "del": struct{}{}, + "div": struct{}{}, + "dl": struct{}{}, + "fieldset": struct{}{}, + "form": struct{}{}, + "h1": struct{}{}, + "h2": struct{}{}, + "h3": struct{}{}, + "h4": struct{}{}, + "h5": struct{}{}, + "h6": struct{}{}, + "iframe": struct{}{}, + "ins": struct{}{}, + "math": struct{}{}, + "noscript": struct{}{}, + "ol": struct{}{}, + "pre": struct{}{}, + "p": struct{}{}, + "script": struct{}{}, + "style": struct{}{}, + "table": struct{}{}, + "ul": struct{}{}, // HTML5 - "address": {}, - "article": {}, - "aside": {}, - "canvas": {}, - "figcaption": {}, - "figure": {}, - "footer": {}, - "header": {}, - "hgroup": {}, - "main": {}, - "nav": {}, - "output": {}, - "progress": {}, - "section": {}, - "video": {}, + "address": struct{}{}, + "article": struct{}{}, + "aside": struct{}{}, + "canvas": struct{}{}, + "figcaption": struct{}{}, + "figure": struct{}{}, + "footer": struct{}{}, + "header": struct{}{}, + "hgroup": struct{}{}, + "main": struct{}{}, + "nav": struct{}{}, + "output": struct{}{}, + "progress": struct{}{}, + "section": struct{}{}, + "video": struct{}{}, } -// Renderer is the rendering interface. -// This is mostly of interest if you are implementing a new rendering format. +// Renderer is the rendering interface. This is mostly of interest if you are +// implementing a new rendering format. // -// When a byte slice is provided, it contains the (rendered) contents of the -// element. -// -// When a callback is provided instead, it will write the contents of the -// respective element directly to the output buffer and return true on success. -// If the callback returns false, the rendering function should reset the -// output buffer as though it had never been called. -// -// Currently Html and Latex implementations are provided +// Only an HTML implementation is provided in this repository, see the README +// for external implementations. type Renderer interface { - // block-level callbacks - BlockCode(out *bytes.Buffer, text []byte, lang string) - BlockQuote(out *bytes.Buffer, text []byte) - BlockHtml(out *bytes.Buffer, text []byte) - Header(out *bytes.Buffer, text func() bool, level int, id string) - HRule(out *bytes.Buffer) - List(out *bytes.Buffer, text func() bool, flags int) - ListItem(out *bytes.Buffer, text []byte, flags int) - Paragraph(out *bytes.Buffer, text func() bool) - Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) - TableRow(out *bytes.Buffer, text []byte) - TableHeaderCell(out *bytes.Buffer, text []byte, flags int) - TableCell(out *bytes.Buffer, text []byte, flags int) - Footnotes(out *bytes.Buffer, text func() bool) - FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) - TitleBlock(out *bytes.Buffer, text []byte) - - // Span-level callbacks - AutoLink(out *bytes.Buffer, link []byte, kind int) - CodeSpan(out *bytes.Buffer, text []byte) - DoubleEmphasis(out *bytes.Buffer, text []byte) - Emphasis(out *bytes.Buffer, text []byte) - Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) - LineBreak(out *bytes.Buffer) - Link(out *bytes.Buffer, link []byte, title []byte, content []byte) - RawHtmlTag(out *bytes.Buffer, tag []byte) - TripleEmphasis(out *bytes.Buffer, text []byte) - StrikeThrough(out *bytes.Buffer, text []byte) - FootnoteRef(out *bytes.Buffer, ref []byte, id int) - - // Low-level callbacks - Entity(out *bytes.Buffer, entity []byte) - NormalText(out *bytes.Buffer, text []byte) - - // Header and footer - DocumentHeader(out *bytes.Buffer) - DocumentFooter(out *bytes.Buffer) - - GetFlags() int + // RenderNode is the main rendering method. It will be called once for + // every leaf node and twice for every non-leaf node (first with + // entering=true, then with entering=false). The method should write its + // rendition of the node to the supplied writer w. + RenderNode(w io.Writer, node *Node, entering bool) WalkStatus + + // RenderHeader is a method that allows the renderer to produce some + // content preceding the main body of the output document. The header is + // understood in the broad sense here. For example, the default HTML + // renderer will write not only the HTML document preamble, but also the + // table of contents if it was requested. + // + // The method will be passed an entire document tree, in case a particular + // implementation needs to inspect it to produce output. + // + // The output should be written to the supplied writer w. If your + // implementation has no header to write, supply an empty implementation. + RenderHeader(w io.Writer, ast *Node) + + // RenderFooter is a symmetric counterpart of RenderHeader. + RenderFooter(w io.Writer, ast *Node) } // Callback functions for inline parsing. One such function is defined // for each character that triggers a response when parsing inline data. -type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int - -// Parser holds runtime state used by the parser. -// This is constructed by the Markdown function. -type parser struct { - r Renderer - refOverride ReferenceOverrideFunc - refs map[string]*reference - inlineCallback [256]inlineParser - flags int - nesting int - maxNesting int - insideLink bool +type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) + +// Markdown is a type that holds extensions and the runtime state used by +// Parse, and the renderer. You can not use it directly, construct it with New. +type Markdown struct { + renderer Renderer + referenceOverride ReferenceOverrideFunc + refs map[string]*reference + inlineCallback [256]inlineParser + extensions Extensions + nesting int + maxNesting int + insideLink bool // Footnotes need to be ordered as well as available to quickly check for // presence. If a ref is also a footnote, it's stored both in refs and here // in notes. Slice is nil if footnotes not enabled. - notes []*reference - notesRecord map[string]struct{} + notes []*reference + + doc *Node + tip *Node // = doc + oldTip *Node + lastMatchedContainer *Node // = doc + allClosed bool } -func (p *parser) getRef(refid string) (ref *reference, found bool) { - if p.refOverride != nil { - r, overridden := p.refOverride(refid) +func (p *Markdown) getRef(refid string) (ref *reference, found bool) { + if p.referenceOverride != nil { + r, overridden := p.referenceOverride(refid) if overridden { if r == nil { return nil, false @@ -232,7 +202,7 @@ func (p *parser) getRef(refid string) (ref *reference, found bool) { return &reference{ link: []byte(r.Link), title: []byte(r.Title), - noteId: 0, + noteID: 0, hasBlock: false, text: []byte(r.Text)}, true } @@ -242,9 +212,34 @@ func (p *parser) getRef(refid string) (ref *reference, found bool) { return ref, found } -func (p *parser) isFootnote(ref *reference) bool { - _, ok := p.notesRecord[string(ref.link)] - return ok +func (p *Markdown) finalize(block *Node) { + above := block.Parent + block.open = false + p.tip = above +} + +func (p *Markdown) addChild(node NodeType, offset uint32) *Node { + return p.addExistingChild(NewNode(node), offset) +} + +func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { + for !p.tip.canContain(node.Type) { + p.finalize(p.tip) + } + p.tip.AppendChild(node) + p.tip = node + return node +} + +func (p *Markdown) closeUnmatchedBlocks() { + if !p.allClosed { + for p.oldTip != p.lastMatchedContainer { + parent := p.oldTip.Parent + p.finalize(p.oldTip) + p.oldTip = parent + } + p.allClosed = true + } } // @@ -271,102 +266,27 @@ type Reference struct { // See the documentation in Options for more details on use-case. type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) -// Options represents configurable overrides and callbacks (in addition to the -// extension flag set) for configuring a Markdown parse. -type Options struct { - // Extensions is a flag set of bit-wise ORed extension bits. See the - // EXTENSION_* flags defined in this package. - Extensions int - - // ReferenceOverride is an optional function callback that is called every - // time a reference is resolved. - // - // In Markdown, the link reference syntax can be made to resolve a link to - // a reference instead of an inline URL, in one of the following ways: - // - // * [link text][refid] - // * [refid][] - // - // Usually, the refid is defined at the bottom of the Markdown document. If - // this override function is provided, the refid is passed to the override - // function first, before consulting the defined refids at the bottom. If - // the override function indicates an override did not occur, the refids at - // the bottom will be used to fill in the link details. - ReferenceOverride ReferenceOverrideFunc -} - -// MarkdownBasic is a convenience function for simple rendering. -// It processes markdown input with no extensions enabled. -func MarkdownBasic(input []byte) []byte { - // set up the HTML renderer - htmlFlags := HTML_USE_XHTML - renderer := HtmlRenderer(htmlFlags, "", "") - - // set up the parser - return MarkdownOptions(input, renderer, Options{Extensions: 0}) -} - -// Call Markdown with most useful extensions enabled -// MarkdownCommon is a convenience function for simple rendering. -// It processes markdown input with common extensions enabled, including: -// -// * Smartypants processing with smart fractions and LaTeX dashes -// -// * Intra-word emphasis suppression -// -// * Tables -// -// * Fenced code blocks -// -// * Autolinking -// -// * Strikethrough support -// -// * Strict header parsing -// -// * Custom Header IDs -func MarkdownCommon(input []byte) []byte { - // set up the HTML renderer - renderer := HtmlRenderer(commonHtmlFlags, "", "") - return MarkdownOptions(input, renderer, Options{ - Extensions: commonExtensions}) -} - -// Markdown is the main rendering function. -// It parses and renders a block of markdown-encoded text. -// The supplied Renderer is used to format the output, and extensions dictates -// which non-standard extensions are enabled. -// -// To use the supplied Html or LaTeX renderers, see HtmlRenderer and -// LatexRenderer, respectively. -func Markdown(input []byte, renderer Renderer, extensions int) []byte { - return MarkdownOptions(input, renderer, Options{ - Extensions: extensions}) -} - -// MarkdownOptions is just like Markdown but takes additional options through -// the Options struct. -func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { - // no point in parsing if we can't render - if renderer == nil { - return nil +// New constructs a Markdown processor. You can use the same With* functions as +// for Run() to customize parser's behavior and the renderer. +func New(opts ...Option) *Markdown { + var p Markdown + for _, opt := range opts { + opt(&p) } - - extensions := opts.Extensions - - // fill in the render structure - p := new(parser) - p.r = renderer - p.flags = extensions - p.refOverride = opts.ReferenceOverride p.refs = make(map[string]*reference) p.maxNesting = 16 p.insideLink = false - + docNode := NewNode(Document) + p.doc = docNode + p.tip = docNode + p.oldTip = docNode + p.lastMatchedContainer = docNode + p.allClosed = true // register inline parsers + p.inlineCallback[' '] = maybeLineBreak p.inlineCallback['*'] = emphasis p.inlineCallback['_'] = emphasis - if extensions&EXTENSION_STRIKETHROUGH != 0 { + if p.extensions&Strikethrough != 0 { p.inlineCallback['~'] = emphasis } p.inlineCallback['`'] = codeSpan @@ -375,116 +295,166 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { p.inlineCallback['<'] = leftAngle p.inlineCallback['\\'] = escape p.inlineCallback['&'] = entity - - if extensions&EXTENSION_AUTOLINK != 0 { - p.inlineCallback[':'] = autoLink - } - - if extensions&EXTENSION_FOOTNOTES != 0 { + p.inlineCallback['!'] = maybeImage + p.inlineCallback['^'] = maybeInlineFootnote + if p.extensions&Autolink != 0 { + p.inlineCallback['h'] = maybeAutoLink + p.inlineCallback['m'] = maybeAutoLink + p.inlineCallback['f'] = maybeAutoLink + p.inlineCallback['H'] = maybeAutoLink + p.inlineCallback['M'] = maybeAutoLink + p.inlineCallback['F'] = maybeAutoLink + } + if p.extensions&Footnotes != 0 { p.notes = make([]*reference, 0) - p.notesRecord = make(map[string]struct{}) } - - first := firstPass(p, input) - second := secondPass(p, first) - return second + return &p } -// first pass: -// - normalize newlines -// - extract references (outside of fenced code blocks) -// - expand tabs (outside of fenced code blocks) -// - copy everything else -func firstPass(p *parser, input []byte) []byte { - var out bytes.Buffer - tabSize := TAB_SIZE_DEFAULT - if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { - tabSize = TAB_SIZE_EIGHT - } - beg := 0 - lastFencedCodeBlockEnd := 0 - for beg < len(input) { - // Find end of this line, then process the line. - end := beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } +// Option customizes the Markdown processor's default behavior. +type Option func(*Markdown) - if p.flags&EXTENSION_FENCED_CODE != 0 { - // track fenced code block boundaries to suppress tab expansion - // and reference extraction inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { - beg += refEnd - continue - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - out.WriteByte('\n') - - beg = end +// WithRenderer allows you to override the default renderer. +func WithRenderer(r Renderer) Option { + return func(p *Markdown) { + p.renderer = r } +} - // empty input? - if out.Len() == 0 { - out.WriteByte('\n') +// WithExtensions allows you to pick some of the many extensions provided by +// Blackfriday. You can bitwise OR them. +func WithExtensions(e Extensions) Option { + return func(p *Markdown) { + p.extensions = e } - - return out.Bytes() } -// second pass: actual rendering -func secondPass(p *parser, input []byte) []byte { - var output bytes.Buffer - - p.r.DocumentHeader(&output) - p.block(&output, input) - - if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { - p.r.Footnotes(&output, func() bool { - flags := LIST_ITEM_BEGINNING_OF_LIST - for i := 0; i < len(p.notes); i += 1 { - ref := p.notes[i] - var buf bytes.Buffer - if ref.hasBlock { - flags |= LIST_ITEM_CONTAINS_BLOCK - p.block(&buf, ref.title) - } else { - p.inline(&buf, ref.title) - } - p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) - flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK - } - - return true +// WithNoExtensions turns off all extensions and custom behavior. +func WithNoExtensions() Option { + return func(p *Markdown) { + p.extensions = NoExtensions + p.renderer = NewHTMLRenderer(HTMLRendererParameters{ + Flags: HTMLFlagsNone, }) } +} - p.r.DocumentFooter(&output) - - if p.nesting != 0 { - panic("Nesting level did not end at zero") +// WithRefOverride sets an optional function callback that is called every +// time a reference is resolved. +// +// In Markdown, the link reference syntax can be made to resolve a link to +// a reference instead of an inline URL, in one of the following ways: +// +// * [link text][refid] +// * [refid][] +// +// Usually, the refid is defined at the bottom of the Markdown document. If +// this override function is provided, the refid is passed to the override +// function first, before consulting the defined refids at the bottom. If +// the override function indicates an override did not occur, the refids at +// the bottom will be used to fill in the link details. +func WithRefOverride(o ReferenceOverrideFunc) Option { + return func(p *Markdown) { + p.referenceOverride = o } +} - return output.Bytes() +// Run is the main entry point to Blackfriday. It parses and renders a +// block of markdown-encoded text. +// +// The simplest invocation of Run takes one argument, input: +// output := Run(input) +// This will parse the input with CommonExtensions enabled and render it with +// the default HTMLRenderer (with CommonHTMLFlags). +// +// Variadic arguments opts can customize the default behavior. Since Markdown +// type does not contain exported fields, you can not use it directly. Instead, +// use the With* functions. For example, this will call the most basic +// functionality, with no extensions: +// output := Run(input, WithNoExtensions()) +// +// You can use any number of With* arguments, even contradicting ones. They +// will be applied in order of appearance and the latter will override the +// former: +// output := Run(input, WithNoExtensions(), WithExtensions(exts), +// WithRenderer(yourRenderer)) +func Run(input []byte, opts ...Option) []byte { + r := NewHTMLRenderer(HTMLRendererParameters{ + Flags: CommonHTMLFlags, + }) + optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} + optList = append(optList, opts...) + parser := New(optList...) + ast := parser.Parse(input) + var buf bytes.Buffer + parser.renderer.RenderHeader(&buf, ast) + ast.Walk(func(node *Node, entering bool) WalkStatus { + return parser.renderer.RenderNode(&buf, node, entering) + }) + parser.renderer.RenderFooter(&buf, ast) + return buf.Bytes() +} + +// Parse is an entry point to the parsing part of Blackfriday. It takes an +// input markdown document and produces a syntax tree for its contents. This +// tree can then be rendered with a default or custom renderer, or +// analyzed/transformed by the caller to whatever non-standard needs they have. +// The return value is the root node of the syntax tree. +func (p *Markdown) Parse(input []byte) *Node { + p.block(input) + // Walk the tree and finish up some of unfinished blocks + for p.tip != nil { + p.finalize(p.tip) + } + // Walk the tree again and process inline markdown in each block + p.doc.Walk(func(node *Node, entering bool) WalkStatus { + if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { + p.inline(node, node.content) + node.content = nil + } + return GoToNext + }) + p.parseRefsToAST() + return p.doc +} + +func (p *Markdown) parseRefsToAST() { + if p.extensions&Footnotes == 0 || len(p.notes) == 0 { + return + } + p.tip = p.doc + block := p.addBlock(List, nil) + block.IsFootnotesList = true + block.ListFlags = ListTypeOrdered + flags := ListItemBeginningOfList + // Note: this loop is intentionally explicit, not range-form. This is + // because the body of the loop will append nested footnotes to p.notes and + // we need to process those late additions. Range form would only walk over + // the fixed initial set. + for i := 0; i < len(p.notes); i++ { + ref := p.notes[i] + p.addExistingChild(ref.footnote, 0) + block := ref.footnote + block.ListFlags = flags | ListTypeOrdered + block.RefLink = ref.link + if ref.hasBlock { + flags |= ListItemContainsBlock + p.block(ref.title) + } else { + p.inline(block, ref.title) + } + flags &^= ListItemBeginningOfList | ListItemContainsBlock + } + above := block.Parent + finalizeList(block) + p.tip = above + block.Walk(func(node *Node, entering bool) WalkStatus { + if node.Type == Paragraph || node.Type == Heading { + p.inline(node, node.content) + node.content = nil + } + return GoToNext + }) } // @@ -516,18 +486,56 @@ func secondPass(p *parser, input []byte) []byte { // // are not yet supported. -// References are parsed and stored in this struct. +// reference holds all information necessary for a reference-style links or +// footnotes. +// +// Consider this markdown with reference-style links: +// +// [link][ref] +// +// [ref]: /url/ "tooltip title" +// +// It will be ultimately converted to this HTML: +// +// <p><a href=\"/url/\" title=\"title\">link</a></p> +// +// And a reference structure will be populated as follows: +// +// p.refs["ref"] = &reference{ +// link: "/url/", +// title: "tooltip title", +// } +// +// Alternatively, reference can contain information about a footnote. Consider +// this markdown: +// +// Text needing a footnote.[^a] +// +// [^a]: This is the note +// +// A reference structure will be populated as follows: +// +// p.refs["a"] = &reference{ +// link: "a", +// title: "This is the note", +// noteID: <some positive int>, +// } +// +// TODO: As you can see, it begs for splitting into two dedicated structures +// for refs and for footnotes. type reference struct { link []byte title []byte - noteId int // 0 if not a footnote ref + noteID int // 0 if not a footnote ref hasBlock bool - text []byte + footnote *Node // a link to the Item node within a list of footnotes + + text []byte // only gets populated by refOverride feature with Reference.Text } func (r *reference) String() string { - return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", - r.link, r.title, r.text, r.noteId, r.hasBlock) + return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", + r.link, r.title, r.text, r.noteID, r.hasBlock) } // Check whether or not data starts with a reference link. @@ -535,7 +543,7 @@ func (r *reference) String() string { // (in the render struct). // Returns the number of bytes to skip to move past it, // or zero if the first line is not a reference. -func isReference(p *parser, data []byte, tabSize int) int { +func isReference(p *Markdown, data []byte, tabSize int) int { // up to 3 optional leading spaces if len(data) < 4 { return 0 @@ -545,18 +553,18 @@ func isReference(p *parser, data []byte, tabSize int) int { i++ } - noteId := 0 + noteID := 0 // id part: anything but a newline between brackets if data[i] != '[' { return 0 } i++ - if p.flags&EXTENSION_FOOTNOTES != 0 { + if p.extensions&Footnotes != 0 { if i < len(data) && data[i] == '^' { // we can set it to anything here because the proper noteIds will // be assigned later during the second pass. It just has to be != 0 - noteId = 1 + noteID = 1 i++ } } @@ -568,7 +576,11 @@ func isReference(p *parser, data []byte, tabSize int) int { return 0 } idEnd := i - + // footnotes can have empty ID, like this: [^], but a reference can not be + // empty like this: []. Break early if it's not a footnote and there's no ID + if noteID == 0 && idOffset == idEnd { + return 0 + } // spacer: colon (space | tab)* newline? (space | tab)* i++ if i >= len(data) || data[i] != ':' { @@ -599,7 +611,7 @@ func isReference(p *parser, data []byte, tabSize int) int { hasBlock bool ) - if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { + if p.extensions&Footnotes != 0 && noteID != 0 { linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) lineEnd = linkEnd } else { @@ -612,11 +624,11 @@ func isReference(p *parser, data []byte, tabSize int) int { // a valid ref has been found ref := &reference{ - noteId: noteId, + noteID: noteID, hasBlock: hasBlock, } - if noteId > 0 { + if noteID > 0 { // reusing the link field for the id since footnotes don't have links ref.link = data[idOffset:idEnd] // if footnote, it's not really a title, it's the contained text @@ -634,15 +646,12 @@ func isReference(p *parser, data []byte, tabSize int) int { return lineEnd } -func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { +func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { // link: whitespace-free sequence, optionally between angle brackets if data[i] == '<' { i++ } linkOffset = i - if i == len(data) { - return - } for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { i++ } @@ -705,13 +714,13 @@ func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffse return } -// The first bit of this logic is the same as (*parser).listItem, but the rest +// The first bit of this logic is the same as Parser.listItem, but the rest // is much simpler. This function simply finds the entire block and shifts it // over by one tab if it is indeed a block (just returns the line if it's not). // blockEnd is the end of the section in the input buffer, and contents is the // extracted text that was shifted over one tab. It will need to be rendered at // the end of the document. -func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { +func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { if i == 0 || len(data) == 0 { return } |