diff options
Diffstat (limited to 'vendor')
-rw-r--r-- | vendor/github.com/russross/blackfriday/LICENSE.txt | 29 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/block.go | 1430 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/html.go | 949 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/inline.go | 1148 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/latex.go | 332 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/markdown.go | 926 | ||||
-rw-r--r-- | vendor/github.com/russross/blackfriday/smartypants.go | 400 | ||||
-rw-r--r-- | vendor/github.com/shurcooL/sanitized_anchor_name/LICENSE | 19 | ||||
-rw-r--r-- | vendor/github.com/shurcooL/sanitized_anchor_name/main.go | 29 |
9 files changed, 5262 insertions, 0 deletions
diff --git a/vendor/github.com/russross/blackfriday/LICENSE.txt b/vendor/github.com/russross/blackfriday/LICENSE.txt new file mode 100644 index 00000000..2885af36 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/LICENSE.txt @@ -0,0 +1,29 @@ +Blackfriday is distributed under the Simplified BSD License: + +> Copyright © 2011 Russ Ross +> All rights reserved. +> +> Redistribution and use in source and binary forms, with or without +> modification, are permitted provided that the following conditions +> are met: +> +> 1. Redistributions of source code must retain the above copyright +> notice, this list of conditions and the following disclaimer. +> +> 2. Redistributions in binary form must reproduce the above +> copyright notice, this list of conditions and the following +> disclaimer in the documentation and/or other materials provided with +> the distribution. +> +> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +> "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +> LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +> FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +> COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +> INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +> BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +> LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +> CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +> ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +> POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/russross/blackfriday/block.go b/vendor/github.com/russross/blackfriday/block.go new file mode 100644 index 00000000..9cf451f0 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/block.go @@ -0,0 +1,1430 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// Functions to parse block-level elements. +// + +package blackfriday + +import ( + "bytes" + + "github.com/shurcooL/sanitized_anchor_name" +) + +// Parse block-level data. +// Note: this function and many that it calls assume that +// the input buffer ends with a newline. +func (p *parser) block(out *bytes.Buffer, data []byte) { + if len(data) == 0 || data[len(data)-1] != '\n' { + panic("block input is missing terminating newline") + } + + // this is called recursively: enforce a maximum depth + if p.nesting >= p.maxNesting { + return + } + p.nesting++ + + // parse out one block-level construct at a time + for len(data) > 0 { + // prefixed header: + // + // # Header 1 + // ## Header 2 + // ... + // ###### Header 6 + if p.isPrefixHeader(data) { + data = data[p.prefixHeader(out, data):] + continue + } + + // block of preformatted HTML: + // + // <div> + // ... + // </div> + if data[0] == '<' { + if i := p.html(out, data, true); i > 0 { + data = data[i:] + continue + } + } + + // title block + // + // % stuff + // % more stuff + // % even more stuff + if p.flags&EXTENSION_TITLEBLOCK != 0 { + if data[0] == '%' { + if i := p.titleBlock(out, data, true); i > 0 { + data = data[i:] + continue + } + } + } + + // blank lines. note: returns the # of bytes to skip + if i := p.isEmpty(data); i > 0 { + data = data[i:] + continue + } + + // indented code block: + // + // func max(a, b int) int { + // if a > b { + // return a + // } + // return b + // } + if p.codePrefix(data) > 0 { + data = data[p.code(out, data):] + continue + } + + // fenced code block: + // + // ``` go + // func fact(n int) int { + // if n <= 1 { + // return n + // } + // return n * fact(n-1) + // } + // ``` + if p.flags&EXTENSION_FENCED_CODE != 0 { + if i := p.fencedCodeBlock(out, data, true); i > 0 { + data = data[i:] + continue + } + } + + // horizontal rule: + // + // ------ + // or + // ****** + // or + // ______ + if p.isHRule(data) { + p.r.HRule(out) + var i int + for i = 0; data[i] != '\n'; i++ { + } + data = data[i:] + continue + } + + // block quote: + // + // > A big quote I found somewhere + // > on the web + if p.quotePrefix(data) > 0 { + data = data[p.quote(out, data):] + continue + } + + // table: + // + // Name | Age | Phone + // ------|-----|--------- + // Bob | 31 | 555-1234 + // Alice | 27 | 555-4321 + if p.flags&EXTENSION_TABLES != 0 { + if i := p.table(out, data); i > 0 { + data = data[i:] + continue + } + } + + // an itemized/unordered list: + // + // * Item 1 + // * Item 2 + // + // also works with + or - + if p.uliPrefix(data) > 0 { + data = data[p.list(out, data, 0):] + continue + } + + // a numbered/ordered list: + // + // 1. Item 1 + // 2. Item 2 + if p.oliPrefix(data) > 0 { + data = data[p.list(out, data, LIST_TYPE_ORDERED):] + continue + } + + // definition lists: + // + // Term 1 + // : Definition a + // : Definition b + // + // Term 2 + // : Definition c + if p.flags&EXTENSION_DEFINITION_LISTS != 0 { + if p.dliPrefix(data) > 0 { + data = data[p.list(out, data, LIST_TYPE_DEFINITION):] + continue + } + } + + // anything else must look like a normal paragraph + // note: this finds underlined headers, too + data = data[p.paragraph(out, data):] + } + + p.nesting-- +} + +func (p *parser) isPrefixHeader(data []byte) bool { + if data[0] != '#' { + return false + } + + if p.flags&EXTENSION_SPACE_HEADERS != 0 { + level := 0 + for level < 6 && data[level] == '#' { + level++ + } + if data[level] != ' ' { + return false + } + } + return true +} + +func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int { + level := 0 + for level < 6 && data[level] == '#' { + level++ + } + i := skipChar(data, level, ' ') + end := skipUntilChar(data, i, '\n') + skip := end + id := "" + if p.flags&EXTENSION_HEADER_IDS != 0 { + j, k := 0, 0 + // find start/end of header id + for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ { + } + for k = j + 1; k < end && data[k] != '}'; k++ { + } + // extract header id iff found + if j < end && k < end { + id = string(data[j+2 : k]) + end = j + skip = k + 1 + for end > 0 && data[end-1] == ' ' { + end-- + } + } + } + for end > 0 && data[end-1] == '#' { + if isBackslashEscaped(data, end-1) { + break + } + end-- + } + for end > 0 && data[end-1] == ' ' { + end-- + } + if end > i { + if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { + id = sanitized_anchor_name.Create(string(data[i:end])) + } + work := func() bool { + p.inline(out, data[i:end]) + return true + } + p.r.Header(out, work, level, id) + } + return skip +} + +func (p *parser) isUnderlinedHeader(data []byte) int { + // test of level 1 header + if data[0] == '=' { + i := skipChar(data, 1, '=') + i = skipChar(data, i, ' ') + if data[i] == '\n' { + return 1 + } else { + return 0 + } + } + + // test of level 2 header + if data[0] == '-' { + i := skipChar(data, 1, '-') + i = skipChar(data, i, ' ') + if data[i] == '\n' { + return 2 + } else { + return 0 + } + } + + return 0 +} + +func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int { + if data[0] != '%' { + return 0 + } + splitData := bytes.Split(data, []byte("\n")) + var i int + for idx, b := range splitData { + if !bytes.HasPrefix(b, []byte("%")) { + i = idx // - 1 + break + } + } + + data = bytes.Join(splitData[0:i], []byte("\n")) + p.r.TitleBlock(out, data) + + return len(data) +} + +func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int { + var i, j int + + // identify the opening tag + if data[0] != '<' { + return 0 + } + curtag, tagfound := p.htmlFindTag(data[1:]) + + // handle special cases + if !tagfound { + // check for an HTML comment + if size := p.htmlComment(out, data, doRender); size > 0 { + return size + } + + // check for an <hr> tag + if size := p.htmlHr(out, data, doRender); size > 0 { + return size + } + + // check for HTML CDATA + if size := p.htmlCDATA(out, data, doRender); size > 0 { + return size + } + + // no special case recognized + return 0 + } + + // look for an unindented matching closing tag + // followed by a blank line + found := false + /* + closetag := []byte("\n</" + curtag + ">") + j = len(curtag) + 1 + for !found { + // scan for a closing tag at the beginning of a line + if skip := bytes.Index(data[j:], closetag); skip >= 0 { + j += skip + len(closetag) + } else { + break + } + + // see if it is the only thing on the line + if skip := p.isEmpty(data[j:]); skip > 0 { + // see if it is followed by a blank line/eof + j += skip + if j >= len(data) { + found = true + i = j + } else { + if skip := p.isEmpty(data[j:]); skip > 0 { + j += skip + found = true + i = j + } + } + } + } + */ + + // if not found, try a second pass looking for indented match + // but not if tag is "ins" or "del" (following original Markdown.pl) + if !found && curtag != "ins" && curtag != "del" { + i = 1 + for i < len(data) { + i++ + for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { + i++ + } + + if i+2+len(curtag) >= len(data) { + break + } + + j = p.htmlFindEnd(curtag, data[i-1:]) + + if j > 0 { + i += j - 1 + found = true + break + } + } + } + + if !found { + return 0 + } + + // the end of the block has been found + if doRender { + // trim newlines + end := i + for end > 0 && data[end-1] == '\n' { + end-- + } + p.r.BlockHtml(out, data[:end]) + } + + return i +} + +func (p *parser) renderHTMLBlock(out *bytes.Buffer, data []byte, start int, doRender bool) int { + // html block needs to end with a blank line + if i := p.isEmpty(data[start:]); i > 0 { + size := start + i + if doRender { + // trim trailing newlines + end := size + for end > 0 && data[end-1] == '\n' { + end-- + } + p.r.BlockHtml(out, data[:end]) + } + return size + } + return 0 +} + +// HTML comment, lax form +func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int { + i := p.inlineHTMLComment(out, data) + return p.renderHTMLBlock(out, data, i, doRender) +} + +// HTML CDATA section +func (p *parser) htmlCDATA(out *bytes.Buffer, data []byte, doRender bool) int { + const cdataTag = "<![cdata[" + const cdataTagLen = len(cdataTag) + if len(data) < cdataTagLen+1 { + return 0 + } + if !bytes.Equal(bytes.ToLower(data[:cdataTagLen]), []byte(cdataTag)) { + return 0 + } + i := cdataTagLen + // scan for an end-of-comment marker, across lines if necessary + for i < len(data) && !(data[i-2] == ']' && data[i-1] == ']' && data[i] == '>') { + i++ + } + i++ + // no end-of-comment marker + if i >= len(data) { + return 0 + } + return p.renderHTMLBlock(out, data, i, doRender) +} + +// HR, which is the only self-closing block tag considered +func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int { + if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { + return 0 + } + if data[3] != ' ' && data[3] != '/' && data[3] != '>' { + // not an <hr> tag after all; at least not a valid one + return 0 + } + + i := 3 + for data[i] != '>' && data[i] != '\n' { + i++ + } + + if data[i] == '>' { + return p.renderHTMLBlock(out, data, i+1, doRender) + } + + return 0 +} + +func (p *parser) htmlFindTag(data []byte) (string, bool) { + i := 0 + for isalnum(data[i]) { + i++ + } + key := string(data[:i]) + if _, ok := blockTags[key]; ok { + return key, true + } + return "", false +} + +func (p *parser) htmlFindEnd(tag string, data []byte) int { + // assume data[0] == '<' && data[1] == '/' already tested + + // check if tag is a match + closetag := []byte("</" + tag + ">") + if !bytes.HasPrefix(data, closetag) { + return 0 + } + i := len(closetag) + + // check that the rest of the line is blank + skip := 0 + if skip = p.isEmpty(data[i:]); skip == 0 { + return 0 + } + i += skip + skip = 0 + + if i >= len(data) { + return i + } + + if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { + return i + } + if skip = p.isEmpty(data[i:]); skip == 0 { + // following line must be blank + return 0 + } + + return i + skip +} + +func (*parser) isEmpty(data []byte) int { + // it is okay to call isEmpty on an empty buffer + if len(data) == 0 { + return 0 + } + + var i int + for i = 0; i < len(data) && data[i] != '\n'; i++ { + if data[i] != ' ' && data[i] != '\t' { + return 0 + } + } + return i + 1 +} + +func (*parser) isHRule(data []byte) bool { + i := 0 + + // skip up to three spaces + for i < 3 && data[i] == ' ' { + i++ + } + + // look at the hrule char + if data[i] != '*' && data[i] != '-' && data[i] != '_' { + return false + } + c := data[i] + + // the whole line must be the char or whitespace + n := 0 + for data[i] != '\n' { + switch { + case data[i] == c: + n++ + case data[i] != ' ': + return false + } + i++ + } + + return n >= 3 +} + +// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, +// and returns the end index if so, or 0 otherwise. It also returns the marker found. +// If syntax is not nil, it gets set to the syntax specified in the fence line. +// A final newline is mandatory to recognize the fence line, unless newlineOptional is true. +func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) { + i, size := 0, 0 + + // skip up to three spaces + for i < len(data) && i < 3 && data[i] == ' ' { + i++ + } + + // check for the marker characters: ~ or ` + if i >= len(data) { + return 0, "" + } + if data[i] != '~' && data[i] != '`' { + return 0, "" + } + + c := data[i] + + // the whole line must be the same char or whitespace + for i < len(data) && data[i] == c { + size++ + i++ + } + + // the marker char must occur at least 3 times + if size < 3 { + return 0, "" + } + marker = string(data[i-size : i]) + + // if this is the end marker, it must match the beginning marker + if oldmarker != "" && marker != oldmarker { + return 0, "" + } + + // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here + // into one, always get the syntax, and discard it if the caller doesn't care. + if syntax != nil { + syn := 0 + i = skipChar(data, i, ' ') + + if i >= len(data) { + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" + } + + syntaxStart := i + + if data[i] == '{' { + i++ + syntaxStart++ + + for i < len(data) && data[i] != '}' && data[i] != '\n' { + syn++ + i++ + } + + if i >= len(data) || data[i] != '}' { + return 0, "" + } + + // strip all whitespace at the beginning and the end + // of the {} block + for syn > 0 && isspace(data[syntaxStart]) { + syntaxStart++ + syn-- + } + + for syn > 0 && isspace(data[syntaxStart+syn-1]) { + syn-- + } + + i++ + } else { + for i < len(data) && !isspace(data[i]) { + syn++ + i++ + } + } + + *syntax = string(data[syntaxStart : syntaxStart+syn]) + } + + i = skipChar(data, i, ' ') + if i >= len(data) || data[i] != '\n' { + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" + } + + return i + 1, marker // Take newline into account. +} + +// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, +// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. +// If doRender is true, a final newline is mandatory to recognize the fenced code block. +func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int { + var syntax string + beg, marker := isFenceLine(data, &syntax, "", false) + if beg == 0 || beg >= len(data) { + return 0 + } + + var work bytes.Buffer + + for { + // safe to assume beg < len(data) + + // check for the end of the code block + newlineOptional := !doRender + fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) + if fenceEnd != 0 { + beg += fenceEnd + break + } + + // copy the current line + end := skipUntilChar(data, beg, '\n') + 1 + + // did we reach the end of the buffer without a closing marker? + if end >= len(data) { + return 0 + } + + // verbatim copy to the working buffer + if doRender { + work.Write(data[beg:end]) + } + beg = end + } + + if doRender { + p.r.BlockCode(out, work.Bytes(), syntax) + } + + return beg +} + +func (p *parser) table(out *bytes.Buffer, data []byte) int { + var header bytes.Buffer + i, columns := p.tableHeader(&header, data) + if i == 0 { + return 0 + } + + var body bytes.Buffer + + for i < len(data) { + pipes, rowStart := 0, i + for ; data[i] != '\n'; i++ { + if data[i] == '|' { + pipes++ + } + } + + if pipes == 0 { + i = rowStart + break + } + + // include the newline in data sent to tableRow + i++ + p.tableRow(&body, data[rowStart:i], columns, false) + } + + p.r.Table(out, header.Bytes(), body.Bytes(), columns) + + return i +} + +// check if the specified position is preceded by an odd number of backslashes +func isBackslashEscaped(data []byte, i int) bool { + backslashes := 0 + for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' { + backslashes++ + } + return backslashes&1 == 1 +} + +func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) { + i := 0 + colCount := 1 + for i = 0; data[i] != '\n'; i++ { + if data[i] == '|' && !isBackslashEscaped(data, i) { + colCount++ + } + } + + // doesn't look like a table header + if colCount == 1 { + return + } + + // include the newline in the data sent to tableRow + header := data[:i+1] + + // column count ignores pipes at beginning or end of line + if data[0] == '|' { + colCount-- + } + if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) { + colCount-- + } + + columns = make([]int, colCount) + + // move on to the header underline + i++ + if i >= len(data) { + return + } + + if data[i] == '|' && !isBackslashEscaped(data, i) { + i++ + } + i = skipChar(data, i, ' ') + + // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 + // and trailing | optional on last column + col := 0 + for data[i] != '\n' { + dashes := 0 + + if data[i] == ':' { + i++ + columns[col] |= TABLE_ALIGNMENT_LEFT + dashes++ + } + for data[i] == '-' { + i++ + dashes++ + } + if data[i] == ':' { + i++ + columns[col] |= TABLE_ALIGNMENT_RIGHT + dashes++ + } + for data[i] == ' ' { + i++ + } + + // end of column test is messy + switch { + case dashes < 3: + // not a valid column + return + + case data[i] == '|' && !isBackslashEscaped(data, i): + // marker found, now skip past trailing whitespace + col++ + i++ + for data[i] == ' ' { + i++ + } + + // trailing junk found after last column + if col >= colCount && data[i] != '\n' { + return + } + + case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount: + // something else found where marker was required + return + + case data[i] == '\n': + // marker is optional for the last column + col++ + + default: + // trailing junk found after last column + return + } + } + if col != colCount { + return + } + + p.tableRow(out, header, columns, true) + size = i + 1 + return +} + +func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) { + i, col := 0, 0 + var rowWork bytes.Buffer + + if data[i] == '|' && !isBackslashEscaped(data, i) { + i++ + } + + for col = 0; col < len(columns) && i < len(data); col++ { + for data[i] == ' ' { + i++ + } + + cellStart := i + + for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { + i++ + } + + cellEnd := i + + // skip the end-of-cell marker, possibly taking us past end of buffer + i++ + + for cellEnd > cellStart && data[cellEnd-1] == ' ' { + cellEnd-- + } + + var cellWork bytes.Buffer + p.inline(&cellWork, data[cellStart:cellEnd]) + + if header { + p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col]) + } else { + p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col]) + } + } + + // pad it out with empty columns to get the right number + for ; col < len(columns); col++ { + if header { + p.r.TableHeaderCell(&rowWork, nil, columns[col]) + } else { + p.r.TableCell(&rowWork, nil, columns[col]) + } + } + + // silently ignore rows with too many cells + + p.r.TableRow(out, rowWork.Bytes()) +} + +// returns blockquote prefix length +func (p *parser) quotePrefix(data []byte) int { + i := 0 + for i < 3 && data[i] == ' ' { + i++ + } + if data[i] == '>' { + if data[i+1] == ' ' { + return i + 2 + } + return i + 1 + } + return 0 +} + +// blockquote ends with at least one blank line +// followed by something without a blockquote prefix +func (p *parser) terminateBlockquote(data []byte, beg, end int) bool { + if p.isEmpty(data[beg:]) <= 0 { + return false + } + if end >= len(data) { + return true + } + return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0 +} + +// parse a blockquote fragment +func (p *parser) quote(out *bytes.Buffer, data []byte) int { + var raw bytes.Buffer + beg, end := 0, 0 + for beg < len(data) { + end = beg + // Step over whole lines, collecting them. While doing that, check for + // fenced code and if one's found, incorporate it altogether, + // irregardless of any contents inside it + for data[end] != '\n' { + if p.flags&EXTENSION_FENCED_CODE != 0 { + if i := p.fencedCodeBlock(out, data[end:], false); i > 0 { + // -1 to compensate for the extra end++ after the loop: + end += i - 1 + break + } + } + end++ + } + end++ + + if pre := p.quotePrefix(data[beg:]); pre > 0 { + // skip the prefix + beg += pre + } else if p.terminateBlockquote(data, beg, end) { + break + } + + // this line is part of the blockquote + raw.Write(data[beg:end]) + beg = end + } + + var cooked bytes.Buffer + p.block(&cooked, raw.Bytes()) + p.r.BlockQuote(out, cooked.Bytes()) + return end +} + +// returns prefix length for block code +func (p *parser) codePrefix(data []byte) int { + if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { + return 4 + } + return 0 +} + +func (p *parser) code(out *bytes.Buffer, data []byte) int { + var work bytes.Buffer + + i := 0 + for i < len(data) { + beg := i + for data[i] != '\n' { + i++ + } + i++ + + blankline := p.isEmpty(data[beg:i]) > 0 + if pre := p.codePrefix(data[beg:i]); pre > 0 { + beg += pre + } else if !blankline { + // non-empty, non-prefixed line breaks the pre + i = beg + break + } + + // verbatim copy to the working buffeu + if blankline { + work.WriteByte('\n') + } else { + work.Write(data[beg:i]) + } + } + + // trim all the \n off the end of work + workbytes := work.Bytes() + eol := len(workbytes) + for eol > 0 && workbytes[eol-1] == '\n' { + eol-- + } + if eol != len(workbytes) { + work.Truncate(eol) + } + + work.WriteByte('\n') + + p.r.BlockCode(out, work.Bytes(), "") + + return i +} + +// returns unordered list item prefix +func (p *parser) uliPrefix(data []byte) int { + i := 0 + + // start with up to 3 spaces + for i < 3 && data[i] == ' ' { + i++ + } + + // need a *, +, or - followed by a space + if (data[i] != '*' && data[i] != '+' && data[i] != '-') || + data[i+1] != ' ' { + return 0 + } + return i + 2 +} + +// returns ordered list item prefix +func (p *parser) oliPrefix(data []byte) int { + i := 0 + + // start with up to 3 spaces + for i < 3 && data[i] == ' ' { + i++ + } + + // count the digits + start := i + for data[i] >= '0' && data[i] <= '9' { + i++ + } + + // we need >= 1 digits followed by a dot and a space + if start == i || data[i] != '.' || data[i+1] != ' ' { + return 0 + } + return i + 2 +} + +// returns definition list item prefix +func (p *parser) dliPrefix(data []byte) int { + i := 0 + + // need a : followed by a spaces + if data[i] != ':' || data[i+1] != ' ' { + return 0 + } + for data[i] == ' ' { + i++ + } + return i + 2 +} + +// parse ordered or unordered list block +func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int { + i := 0 + flags |= LIST_ITEM_BEGINNING_OF_LIST + work := func() bool { + for i < len(data) { + skip := p.listItem(out, data[i:], &flags) + i += skip + + if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 { + break + } + flags &= ^LIST_ITEM_BEGINNING_OF_LIST + } + return true + } + + p.r.List(out, work, flags) + return i +} + +// Parse a single list item. +// Assumes initial prefix is already removed if this is a sublist. +func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int { + // keep track of the indentation of the first line + itemIndent := 0 + for itemIndent < 3 && data[itemIndent] == ' ' { + itemIndent++ + } + + i := p.uliPrefix(data) + if i == 0 { + i = p.oliPrefix(data) + } + if i == 0 { + i = p.dliPrefix(data) + // reset definition term flag + if i > 0 { + *flags &= ^LIST_TYPE_TERM + } + } + if i == 0 { + // if in defnition list, set term flag and continue + if *flags&LIST_TYPE_DEFINITION != 0 { + *flags |= LIST_TYPE_TERM + } else { + return 0 + } + } + + // skip leading whitespace on first line + for data[i] == ' ' { + i++ + } + + // find the end of the line + line := i + for i > 0 && data[i-1] != '\n' { + i++ + } + + // get working buffer + var raw bytes.Buffer + + // put the first line into the working buffer + raw.Write(data[line:i]) + line = i + + // process the following lines + containsBlankLine := false + sublist := 0 + +gatherlines: + for line < len(data) { + i++ + + // find the end of this line + for data[i-1] != '\n' { + i++ + } + + // if it is an empty line, guess that it is part of this item + // and move on to the next line + if p.isEmpty(data[line:i]) > 0 { + containsBlankLine = true + raw.Write(data[line:i]) + line = i + continue + } + + // calculate the indentation + indent := 0 + for indent < 4 && line+indent < i && data[line+indent] == ' ' { + indent++ + } + + chunk := data[line+indent : i] + + // evaluate how this line fits in + switch { + // is this a nested list item? + case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || + p.oliPrefix(chunk) > 0 || + p.dliPrefix(chunk) > 0: + + if containsBlankLine { + // end the list if the type changed after a blank line + if indent <= itemIndent && + ((*flags&LIST_TYPE_ORDERED != 0 && p.uliPrefix(chunk) > 0) || + (*flags&LIST_TYPE_ORDERED == 0 && p.oliPrefix(chunk) > 0)) { + + *flags |= LIST_ITEM_END_OF_LIST + break gatherlines + } + *flags |= LIST_ITEM_CONTAINS_BLOCK + } + + // to be a nested list, it must be indented more + // if not, it is the next item in the same list + if indent <= itemIndent { + break gatherlines + } + + // is this the first item in the nested list? + if sublist == 0 { + sublist = raw.Len() + } + + // is this a nested prefix header? + case p.isPrefixHeader(chunk): + // if the header is not indented, it is not nested in the list + // and thus ends the list + if containsBlankLine && indent < 4 { + *flags |= LIST_ITEM_END_OF_LIST + break gatherlines + } + *flags |= LIST_ITEM_CONTAINS_BLOCK + + // anything following an empty line is only part + // of this item if it is indented 4 spaces + // (regardless of the indentation of the beginning of the item) + case containsBlankLine && indent < 4: + if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 { + // is the next item still a part of this list? + next := i + for data[next] != '\n' { + next++ + } + for next < len(data)-1 && data[next] == '\n' { + next++ + } + if i < len(data)-1 && data[i] != ':' && data[next] != ':' { + *flags |= LIST_ITEM_END_OF_LIST + } + } else { + *flags |= LIST_ITEM_END_OF_LIST + } + break gatherlines + + // a blank line means this should be parsed as a block + case containsBlankLine: + *flags |= LIST_ITEM_CONTAINS_BLOCK + } + + containsBlankLine = false + + // add the line into the working buffer without prefix + raw.Write(data[line+indent : i]) + + line = i + } + + // If reached end of data, the Renderer.ListItem call we're going to make below + // is definitely the last in the list. + if line >= len(data) { + *flags |= LIST_ITEM_END_OF_LIST + } + + rawBytes := raw.Bytes() + + // render the contents of the list item + var cooked bytes.Buffer + if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 { + // intermediate render of block item, except for definition term + if sublist > 0 { + p.block(&cooked, rawBytes[:sublist]) + p.block(&cooked, rawBytes[sublist:]) + } else { + p.block(&cooked, rawBytes) + } + } else { + // intermediate render of inline item + if sublist > 0 { + p.inline(&cooked, rawBytes[:sublist]) + p.block(&cooked, rawBytes[sublist:]) + } else { + p.inline(&cooked, rawBytes) + } + } + + // render the actual list item + cookedBytes := cooked.Bytes() + parsedEnd := len(cookedBytes) + + // strip trailing newlines + for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' { + parsedEnd-- + } + p.r.ListItem(out, cookedBytes[:parsedEnd], *flags) + + return line +} + +// render a single paragraph that has already been parsed out +func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) { + if len(data) == 0 { + return + } + + // trim leading spaces + beg := 0 + for data[beg] == ' ' { + beg++ + } + + // trim trailing newline + end := len(data) - 1 + + // trim trailing spaces + for end > beg && data[end-1] == ' ' { + end-- + } + + work := func() bool { + p.inline(out, data[beg:end]) + return true + } + p.r.Paragraph(out, work) +} + +func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { + // prev: index of 1st char of previous line + // line: index of 1st char of current line + // i: index of cursor/end of current line + var prev, line, i int + + // keep going until we find something to mark the end of the paragraph + for i < len(data) { + // mark the beginning of the current line + prev = line + current := data[i:] + line = i + + // did we find a blank line marking the end of the paragraph? + if n := p.isEmpty(current); n > 0 { + // did this blank line followed by a definition list item? + if p.flags&EXTENSION_DEFINITION_LISTS != 0 { + if i < len(data)-1 && data[i+1] == ':' { + return p.list(out, data[prev:], LIST_TYPE_DEFINITION) + } + } + + p.renderParagraph(out, data[:i]) + return i + n + } + + // an underline under some text marks a header, so our paragraph ended on prev line + if i > 0 { + if level := p.isUnderlinedHeader(current); level > 0 { + // render the paragraph + p.renderParagraph(out, data[:prev]) + + // ignore leading and trailing whitespace + eol := i - 1 + for prev < eol && data[prev] == ' ' { + prev++ + } + for eol > prev && data[eol-1] == ' ' { + eol-- + } + + // render the header + // this ugly double closure avoids forcing variables onto the heap + work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool { + return func() bool { + pp.inline(o, d) + return true + } + }(out, p, data[prev:eol]) + + id := "" + if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { + id = sanitized_anchor_name.Create(string(data[prev:eol])) + } + + p.r.Header(out, work, level, id) + + // find the end of the underline + for data[i] != '\n' { + i++ + } + return i + } + } + + // if the next line starts a block of HTML, then the paragraph ends here + if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { + if data[i] == '<' && p.html(out, current, false) > 0 { + // rewind to before the HTML block + p.renderParagraph(out, data[:i]) + return i + } + } + + // if there's a prefixed header or a horizontal rule after this, paragraph is over + if p.isPrefixHeader(current) || p.isHRule(current) { + p.renderParagraph(out, data[:i]) + return i + } + + // if there's a fenced code block, paragraph is over + if p.flags&EXTENSION_FENCED_CODE != 0 { + if p.fencedCodeBlock(out, current, false) > 0 { + p.renderParagraph(out, data[:i]) + return i + } + } + + // if there's a definition list item, prev line is a definition term + if p.flags&EXTENSION_DEFINITION_LISTS != 0 { + if p.dliPrefix(current) != 0 { + return p.list(out, data[prev:], LIST_TYPE_DEFINITION) + } + } + + // if there's a list after this, paragraph is over + if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 { + if p.uliPrefix(current) != 0 || + p.oliPrefix(current) != 0 || + p.quotePrefix(current) != 0 || + p.codePrefix(current) != 0 { + p.renderParagraph(out, data[:i]) + return i + } + } + + // otherwise, scan to the beginning of the next line + for data[i] != '\n' { + i++ + } + i++ + } + + p.renderParagraph(out, data[:i]) + return i +} diff --git a/vendor/github.com/russross/blackfriday/html.go b/vendor/github.com/russross/blackfriday/html.go new file mode 100644 index 00000000..74e67ee8 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/html.go @@ -0,0 +1,949 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// +// HTML rendering backend +// +// + +package blackfriday + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + "strings" +) + +// Html renderer configuration options. +const ( + HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks + HTML_SKIP_STYLE // skip embedded <style> elements + HTML_SKIP_IMAGES // skip embedded images + HTML_SKIP_LINKS // skip all links + HTML_SAFELINK // only link to trusted protocols + HTML_NOFOLLOW_LINKS // only link with rel="nofollow" + HTML_NOREFERRER_LINKS // only link with rel="noreferrer" + HTML_HREF_TARGET_BLANK // add a blank target + HTML_TOC // generate a table of contents + HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents) + HTML_COMPLETE_PAGE // generate a complete HTML page + HTML_USE_XHTML // generate XHTML output instead of HTML + HTML_USE_SMARTYPANTS // enable smart punctuation substitutions + HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS) + HTML_SMARTYPANTS_DASHES // enable smart dashes (with HTML_USE_SMARTYPANTS) + HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS and HTML_SMARTYPANTS_DASHES) + HTML_SMARTYPANTS_ANGLED_QUOTES // enable angled double quotes (with HTML_USE_SMARTYPANTS) for double quotes rendering + HTML_FOOTNOTE_RETURN_LINKS // generate a link at the end of a footnote to return to the source +) + +var ( + alignments = []string{ + "left", + "right", + "center", + } + + // TODO: improve this regexp to catch all possible entities: + htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`) +) + +type HtmlRendererParameters struct { + // Prepend this text to each relative URL. + AbsolutePrefix string + // Add this text to each footnote anchor, to ensure uniqueness. + FootnoteAnchorPrefix string + // Show this text inside the <a> tag for a footnote return link, if the + // HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string + // <sup>[return]</sup> is used. + FootnoteReturnLinkContents string + // If set, add this text to the front of each Header ID, to ensure + // uniqueness. + HeaderIDPrefix string + // If set, add this text to the back of each Header ID, to ensure uniqueness. + HeaderIDSuffix string +} + +// Html is a type that implements the Renderer interface for HTML output. +// +// Do not create this directly, instead use the HtmlRenderer function. +type Html struct { + flags int // HTML_* options + closeTag string // how to end singleton tags: either " />" or ">" + title string // document title + css string // optional css file url (used with HTML_COMPLETE_PAGE) + + parameters HtmlRendererParameters + + // table of contents data + tocMarker int + headerCount int + currentLevel int + toc *bytes.Buffer + + // Track header IDs to prevent ID collision in a single generation. + headerIDs map[string]int + + smartypants *smartypantsRenderer +} + +const ( + xhtmlClose = " />" + htmlClose = ">" +) + +// HtmlRenderer creates and configures an Html object, which +// satisfies the Renderer interface. +// +// flags is a set of HTML_* options ORed together. +// title is the title of the document, and css is a URL for the document's +// stylesheet. +// title and css are only used when HTML_COMPLETE_PAGE is selected. +func HtmlRenderer(flags int, title string, css string) Renderer { + return HtmlRendererWithParameters(flags, title, css, HtmlRendererParameters{}) +} + +func HtmlRendererWithParameters(flags int, title string, + css string, renderParameters HtmlRendererParameters) Renderer { + // configure the rendering engine + closeTag := htmlClose + if flags&HTML_USE_XHTML != 0 { + closeTag = xhtmlClose + } + + if renderParameters.FootnoteReturnLinkContents == "" { + renderParameters.FootnoteReturnLinkContents = `<sup>[return]</sup>` + } + + return &Html{ + flags: flags, + closeTag: closeTag, + title: title, + css: css, + parameters: renderParameters, + + headerCount: 0, + currentLevel: 0, + toc: new(bytes.Buffer), + + headerIDs: make(map[string]int), + + smartypants: smartypants(flags), + } +} + +// Using if statements is a bit faster than a switch statement. As the compiler +// improves, this should be unnecessary this is only worthwhile because +// attrEscape is the single largest CPU user in normal use. +// Also tried using map, but that gave a ~3x slowdown. +func escapeSingleChar(char byte) (string, bool) { + if char == '"' { + return """, true + } + if char == '&' { + return "&", true + } + if char == '<' { + return "<", true + } + if char == '>' { + return ">", true + } + return "", false +} + +func attrEscape(out *bytes.Buffer, src []byte) { + org := 0 + for i, ch := range src { + if entity, ok := escapeSingleChar(ch); ok { + if i > org { + // copy all the normal characters since the last escape + out.Write(src[org:i]) + } + org = i + 1 + out.WriteString(entity) + } + } + if org < len(src) { + out.Write(src[org:]) + } +} + +func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) { + end := 0 + for _, rang := range skipRanges { + attrEscape(out, src[end:rang[0]]) + out.Write(src[rang[0]:rang[1]]) + end = rang[1] + } + attrEscape(out, src[end:]) +} + +func (options *Html) GetFlags() int { + return options.flags +} + +func (options *Html) TitleBlock(out *bytes.Buffer, text []byte) { + text = bytes.TrimPrefix(text, []byte("% ")) + text = bytes.Replace(text, []byte("\n% "), []byte("\n"), -1) + out.WriteString("<h1 class=\"title\">") + out.Write(text) + out.WriteString("\n</h1>") +} + +func (options *Html) Header(out *bytes.Buffer, text func() bool, level int, id string) { + marker := out.Len() + doubleSpace(out) + + if id == "" && options.flags&HTML_TOC != 0 { + id = fmt.Sprintf("toc_%d", options.headerCount) + } + + if id != "" { + id = options.ensureUniqueHeaderID(id) + + if options.parameters.HeaderIDPrefix != "" { + id = options.parameters.HeaderIDPrefix + id + } + + if options.parameters.HeaderIDSuffix != "" { + id = id + options.parameters.HeaderIDSuffix + } + + out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id)) + } else { + out.WriteString(fmt.Sprintf("<h%d>", level)) + } + + tocMarker := out.Len() + if !text() { + out.Truncate(marker) + return + } + + // are we building a table of contents? + if options.flags&HTML_TOC != 0 { + options.TocHeaderWithAnchor(out.Bytes()[tocMarker:], level, id) + } + + out.WriteString(fmt.Sprintf("</h%d>\n", level)) +} + +func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) { + if options.flags&HTML_SKIP_HTML != 0 { + return + } + + doubleSpace(out) + out.Write(text) + out.WriteByte('\n') +} + +func (options *Html) HRule(out *bytes.Buffer) { + doubleSpace(out) + out.WriteString("<hr") + out.WriteString(options.closeTag) + out.WriteByte('\n') +} + +func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) { + doubleSpace(out) + + // parse out the language names/classes + count := 0 + for _, elt := range strings.Fields(lang) { + if elt[0] == '.' { + elt = elt[1:] + } + if len(elt) == 0 { + continue + } + if count == 0 { + out.WriteString("<pre><code class=\"language-") + } else { + out.WriteByte(' ') + } + attrEscape(out, []byte(elt)) + count++ + } + + if count == 0 { + out.WriteString("<pre><code>") + } else { + out.WriteString("\">") + } + + attrEscape(out, text) + out.WriteString("</code></pre>\n") +} + +func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) { + doubleSpace(out) + out.WriteString("<blockquote>\n") + out.Write(text) + out.WriteString("</blockquote>\n") +} + +func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) { + doubleSpace(out) + out.WriteString("<table>\n<thead>\n") + out.Write(header) + out.WriteString("</thead>\n\n<tbody>\n") + out.Write(body) + out.WriteString("</tbody>\n</table>\n") +} + +func (options *Html) TableRow(out *bytes.Buffer, text []byte) { + doubleSpace(out) + out.WriteString("<tr>\n") + out.Write(text) + out.WriteString("\n</tr>\n") +} + +func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) { + doubleSpace(out) + switch align { + case TABLE_ALIGNMENT_LEFT: + out.WriteString("<th align=\"left\">") + case TABLE_ALIGNMENT_RIGHT: + out.WriteString("<th align=\"right\">") + case TABLE_ALIGNMENT_CENTER: + out.WriteString("<th align=\"center\">") + default: + out.WriteString("<th>") + } + + out.Write(text) + out.WriteString("</th>") +} + +func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) { + doubleSpace(out) + switch align { + case TABLE_ALIGNMENT_LEFT: + out.WriteString("<td align=\"left\">") + case TABLE_ALIGNMENT_RIGHT: + out.WriteString("<td align=\"right\">") + case TABLE_ALIGNMENT_CENTER: + out.WriteString("<td align=\"center\">") + default: + out.WriteString("<td>") + } + + out.Write(text) + out.WriteString("</td>") +} + +func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) { + out.WriteString("<div class=\"footnotes\">\n") + options.HRule(out) + options.List(out, text, LIST_TYPE_ORDERED) + out.WriteString("</div>\n") +} + +func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) { + if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 { + doubleSpace(out) + } + slug := slugify(name) + out.WriteString(`<li id="`) + out.WriteString(`fn:`) + out.WriteString(options.parameters.FootnoteAnchorPrefix) + out.Write(slug) + out.WriteString(`">`) + out.Write(text) + if options.flags&HTML_FOOTNOTE_RETURN_LINKS != 0 { + out.WriteString(` <a class="footnote-return" href="#`) + out.WriteString(`fnref:`) + out.WriteString(options.parameters.FootnoteAnchorPrefix) + out.Write(slug) + out.WriteString(`">`) + out.WriteString(options.parameters.FootnoteReturnLinkContents) + out.WriteString(`</a>`) + } + out.WriteString("</li>\n") +} + +func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) { + marker := out.Len() + doubleSpace(out) + + if flags&LIST_TYPE_DEFINITION != 0 { + out.WriteString("<dl>") + } else if flags&LIST_TYPE_ORDERED != 0 { + out.WriteString("<ol>") + } else { + out.WriteString("<ul>") + } + if !text() { + out.Truncate(marker) + return + } + if flags&LIST_TYPE_DEFINITION != 0 { + out.WriteString("</dl>\n") + } else if flags&LIST_TYPE_ORDERED != 0 { + out.WriteString("</ol>\n") + } else { + out.WriteString("</ul>\n") + } +} + +func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) { + if (flags&LIST_ITEM_CONTAINS_BLOCK != 0 && flags&LIST_TYPE_DEFINITION == 0) || + flags&LIST_ITEM_BEGINNING_OF_LIST != 0 { + doubleSpace(out) + } + if flags&LIST_TYPE_TERM != 0 { + out.WriteString("<dt>") + } else if flags&LIST_TYPE_DEFINITION != 0 { + out.WriteString("<dd>") + } else { + out.WriteString("<li>") + } + out.Write(text) + if flags&LIST_TYPE_TERM != 0 { + out.WriteString("</dt>\n") + } else if flags&LIST_TYPE_DEFINITION != 0 { + out.WriteString("</dd>\n") + } else { + out.WriteString("</li>\n") + } +} + +func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) { + marker := out.Len() + doubleSpace(out) + + out.WriteString("<p>") + if !text() { + out.Truncate(marker) + return + } + out.WriteString("</p>\n") +} + +func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) { + skipRanges := htmlEntity.FindAllIndex(link, -1) + if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL { + // mark it but don't link it if it is not a safe link: no smartypants + out.WriteString("<tt>") + entityEscapeWithSkip(out, link, skipRanges) + out.WriteString("</tt>") + return + } + + out.WriteString("<a href=\"") + if kind == LINK_TYPE_EMAIL { + out.WriteString("mailto:") + } else { + options.maybeWriteAbsolutePrefix(out, link) + } + + entityEscapeWithSkip(out, link, skipRanges) + + var relAttrs []string + if options.flags&HTML_NOFOLLOW_LINKS != 0 && !isRelativeLink(link) { + relAttrs = append(relAttrs, "nofollow") + } + if options.flags&HTML_NOREFERRER_LINKS != 0 && !isRelativeLink(link) { + relAttrs = append(relAttrs, "noreferrer") + } + if len(relAttrs) > 0 { + out.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " "))) + } + + // blank target only add to external link + if options.flags&HTML_HREF_TARGET_BLANK != 0 && !isRelativeLink(link) { + out.WriteString("\" target=\"_blank") + } + + out.WriteString("\">") + + // Pretty print: if we get an email address as + // an actual URI, e.g. `mailto:foo@bar.com`, we don't + // want to print the `mailto:` prefix + switch { + case bytes.HasPrefix(link, []byte("mailto://")): + attrEscape(out, link[len("mailto://"):]) + case bytes.HasPrefix(link, []byte("mailto:")): + attrEscape(out, link[len("mailto:"):]) + default: + entityEscapeWithSkip(out, link, skipRanges) + } + + out.WriteString("</a>") +} + +func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) { + out.WriteString("<code>") + attrEscape(out, text) + out.WriteString("</code>") +} + +func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) { + out.WriteString("<strong>") + out.Write(text) + out.WriteString("</strong>") +} + +func (options *Html) Emphasis(out *bytes.Buffer, text []byte) { + if len(text) == 0 { + return + } + out.WriteString("<em>") + out.Write(text) + out.WriteString("</em>") +} + +func (options *Html) maybeWriteAbsolutePrefix(out *bytes.Buffer, link []byte) { + if options.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' { + out.WriteString(options.parameters.AbsolutePrefix) + if link[0] != '/' { + out.WriteByte('/') + } + } +} + +func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) { + if options.flags&HTML_SKIP_IMAGES != 0 { + return + } + + out.WriteString("<img src=\"") + options.maybeWriteAbsolutePrefix(out, link) + attrEscape(out, link) + out.WriteString("\" alt=\"") + if len(alt) > 0 { + attrEscape(out, alt) + } + if len(title) > 0 { + out.WriteString("\" title=\"") + attrEscape(out, title) + } + + out.WriteByte('"') + out.WriteString(options.closeTag) +} + +func (options *Html) LineBreak(out *bytes.Buffer) { + out.WriteString("<br") + out.WriteString(options.closeTag) + out.WriteByte('\n') +} + +func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) { + if options.flags&HTML_SKIP_LINKS != 0 { + // write the link text out but don't link it, just mark it with typewriter font + out.WriteString("<tt>") + attrEscape(out, content) + out.WriteString("</tt>") + return + } + + if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) { + // write the link text out but don't link it, just mark it with typewriter font + out.WriteString("<tt>") + attrEscape(out, content) + out.WriteString("</tt>") + return + } + + out.WriteString("<a href=\"") + options.maybeWriteAbsolutePrefix(out, link) + attrEscape(out, link) + if len(title) > 0 { + out.WriteString("\" title=\"") + attrEscape(out, title) + } + var relAttrs []string + if options.flags&HTML_NOFOLLOW_LINKS != 0 && !isRelativeLink(link) { + relAttrs = append(relAttrs, "nofollow") + } + if options.flags&HTML_NOREFERRER_LINKS != 0 && !isRelativeLink(link) { + relAttrs = append(relAttrs, "noreferrer") + } + if len(relAttrs) > 0 { + out.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " "))) + } + + // blank target only add to external link + if options.flags&HTML_HREF_TARGET_BLANK != 0 && !isRelativeLink(link) { + out.WriteString("\" target=\"_blank") + } + + out.WriteString("\">") + out.Write(content) + out.WriteString("</a>") + return +} + +func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) { + if options.flags&HTML_SKIP_HTML != 0 { + return + } + if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") { + return + } + if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") { + return + } + if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") { + return + } + out.Write(text) +} + +func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) { + out.WriteString("<strong><em>") + out.Write(text) + out.WriteString("</em></strong>") +} + +func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) { + out.WriteString("<del>") + out.Write(text) + out.WriteString("</del>") +} + +func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + slug := slugify(ref) + out.WriteString(`<sup class="footnote-ref" id="`) + out.WriteString(`fnref:`) + out.WriteString(options.parameters.FootnoteAnchorPrefix) + out.Write(slug) + out.WriteString(`"><a rel="footnote" href="#`) + out.WriteString(`fn:`) + out.WriteString(options.parameters.FootnoteAnchorPrefix) + out.Write(slug) + out.WriteString(`">`) + out.WriteString(strconv.Itoa(id)) + out.WriteString(`</a></sup>`) +} + +func (options *Html) Entity(out *bytes.Buffer, entity []byte) { + out.Write(entity) +} + +func (options *Html) NormalText(out *bytes.Buffer, text []byte) { + if options.flags&HTML_USE_SMARTYPANTS != 0 { + options.Smartypants(out, text) + } else { + attrEscape(out, text) + } +} + +func (options *Html) Smartypants(out *bytes.Buffer, text []byte) { + smrt := smartypantsData{false, false} + + // first do normal entity escaping + var escaped bytes.Buffer + attrEscape(&escaped, text) + text = escaped.Bytes() + + mark := 0 + for i := 0; i < len(text); i++ { + if action := options.smartypants[text[i]]; action != nil { + if i > mark { + out.Write(text[mark:i]) + } + + previousChar := byte(0) + if i > 0 { + previousChar = text[i-1] + } + i += action(out, &smrt, previousChar, text[i:]) + mark = i + 1 + } + } + + if mark < len(text) { + out.Write(text[mark:]) + } +} + +func (options *Html) DocumentHeader(out *bytes.Buffer) { + if options.flags&HTML_COMPLETE_PAGE == 0 { + return + } + + ending := "" + if options.flags&HTML_USE_XHTML != 0 { + out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ") + out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n") + out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n") + ending = " /" + } else { + out.WriteString("<!DOCTYPE html>\n") + out.WriteString("<html>\n") + } + out.WriteString("<head>\n") + out.WriteString(" <title>") + options.NormalText(out, []byte(options.title)) + out.WriteString("</title>\n") + out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v") + out.WriteString(VERSION) + out.WriteString("\"") + out.WriteString(ending) + out.WriteString(">\n") + out.WriteString(" <meta charset=\"utf-8\"") + out.WriteString(ending) + out.WriteString(">\n") + if options.css != "" { + out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"") + attrEscape(out, []byte(options.css)) + out.WriteString("\"") + out.WriteString(ending) + out.WriteString(">\n") + } + out.WriteString("</head>\n") + out.WriteString("<body>\n") + + options.tocMarker = out.Len() +} + +func (options *Html) DocumentFooter(out *bytes.Buffer) { + // finalize and insert the table of contents + if options.flags&HTML_TOC != 0 { + options.TocFinalize() + + // now we have to insert the table of contents into the document + var temp bytes.Buffer + + // start by making a copy of everything after the document header + temp.Write(out.Bytes()[options.tocMarker:]) + + // now clear the copied material from the main output buffer + out.Truncate(options.tocMarker) + + // corner case spacing issue + if options.flags&HTML_COMPLETE_PAGE != 0 { + out.WriteByte('\n') + } + + // insert the table of contents + out.WriteString("<nav>\n") + out.Write(options.toc.Bytes()) + out.WriteString("</nav>\n") + + // corner case spacing issue + if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 { + out.WriteByte('\n') + } + + // write out everything that came after it + if options.flags&HTML_OMIT_CONTENTS == 0 { + out.Write(temp.Bytes()) + } + } + + if options.flags&HTML_COMPLETE_PAGE != 0 { + out.WriteString("\n</body>\n") + out.WriteString("</html>\n") + } + +} + +func (options *Html) TocHeaderWithAnchor(text []byte, level int, anchor string) { + for level > options.currentLevel { + switch { + case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")): + // this sublist can nest underneath a header + size := options.toc.Len() + options.toc.Truncate(size - len("</li>\n")) + + case options.currentLevel > 0: + options.toc.WriteString("<li>") + } + if options.toc.Len() > 0 { + options.toc.WriteByte('\n') + } + options.toc.WriteString("<ul>\n") + options.currentLevel++ + } + + for level < options.currentLevel { + options.toc.WriteString("</ul>") + if options.currentLevel > 1 { + options.toc.WriteString("</li>\n") + } + options.currentLevel-- + } + + options.toc.WriteString("<li><a href=\"#") + if anchor != "" { + options.toc.WriteString(anchor) + } else { + options.toc.WriteString("toc_") + options.toc.WriteString(strconv.Itoa(options.headerCount)) + } + options.toc.WriteString("\">") + options.headerCount++ + + options.toc.Write(text) + + options.toc.WriteString("</a></li>\n") +} + +func (options *Html) TocHeader(text []byte, level int) { + options.TocHeaderWithAnchor(text, level, "") +} + +func (options *Html) TocFinalize() { + for options.currentLevel > 1 { + options.toc.WriteString("</ul></li>\n") + options.currentLevel-- + } + + if options.currentLevel > 0 { + options.toc.WriteString("</ul>\n") + } +} + +func isHtmlTag(tag []byte, tagname string) bool { + found, _ := findHtmlTagPos(tag, tagname) + return found +} + +// Look for a character, but ignore it when it's in any kind of quotes, it +// might be JavaScript +func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { + inSingleQuote := false + inDoubleQuote := false + inGraveQuote := false + i := start + for i < len(html) { + switch { + case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: + return i + case html[i] == '\'': + inSingleQuote = !inSingleQuote + case html[i] == '"': + inDoubleQuote = !inDoubleQuote + case html[i] == '`': + inGraveQuote = !inGraveQuote + } + i++ + } + return start +} + +func findHtmlTagPos(tag []byte, tagname string) (bool, int) { + i := 0 + if i < len(tag) && tag[0] != '<' { + return false, -1 + } + i++ + i = skipSpace(tag, i) + + if i < len(tag) && tag[i] == '/' { + i++ + } + + i = skipSpace(tag, i) + j := 0 + for ; i < len(tag); i, j = i+1, j+1 { + if j >= len(tagname) { + break + } + + if strings.ToLower(string(tag[i]))[0] != tagname[j] { + return false, -1 + } + } + + if i == len(tag) { + return false, -1 + } + + rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') + if rightAngle > i { + return true, rightAngle + } + + return false, -1 +} + +func skipUntilChar(text []byte, start int, char byte) int { + i := start + for i < len(text) && text[i] != char { + i++ + } + return i +} + +func skipSpace(tag []byte, i int) int { + for i < len(tag) && isspace(tag[i]) { + i++ + } + return i +} + +func skipChar(data []byte, start int, char byte) int { + i := start + for i < len(data) && data[i] == char { + i++ + } + return i +} + +func doubleSpace(out *bytes.Buffer) { + if out.Len() > 0 { + out.WriteByte('\n') + } +} + +func isRelativeLink(link []byte) (yes bool) { + // a tag begin with '#' + if link[0] == '#' { + return true + } + + // link begin with '/' but not '//', the second maybe a protocol relative link + if len(link) >= 2 && link[0] == '/' && link[1] != '/' { + return true + } + + // only the root '/' + if len(link) == 1 && link[0] == '/' { + return true + } + + // current directory : begin with "./" + if bytes.HasPrefix(link, []byte("./")) { + return true + } + + // parent directory : begin with "../" + if bytes.HasPrefix(link, []byte("../")) { + return true + } + + return false +} + +func (options *Html) ensureUniqueHeaderID(id string) string { + for count, found := options.headerIDs[id]; found; count, found = options.headerIDs[id] { + tmp := fmt.Sprintf("%s-%d", id, count+1) + + if _, tmpFound := options.headerIDs[tmp]; !tmpFound { + options.headerIDs[id] = count + 1 + id = tmp + } else { + id = id + "-1" + } + } + + if _, found := options.headerIDs[id]; !found { + options.headerIDs[id] = 0 + } + + return id +} diff --git a/vendor/github.com/russross/blackfriday/inline.go b/vendor/github.com/russross/blackfriday/inline.go new file mode 100644 index 00000000..cb00ed68 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/inline.go @@ -0,0 +1,1148 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// Functions to parse inline elements. +// + +package blackfriday + +import ( + "bytes" + "regexp" + "strconv" +) + +var ( + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` + anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) +) + +// Functions to parse text within a block +// Each function returns the number of chars taken care of +// data is the complete block being rendered +// offset is the number of valid chars before the current cursor + +func (p *parser) inline(out *bytes.Buffer, data []byte) { + // this is called recursively: enforce a maximum depth + if p.nesting >= p.maxNesting { + return + } + p.nesting++ + + i, end := 0, 0 + for i < len(data) { + // copy inactive chars into the output + for end < len(data) && p.inlineCallback[data[end]] == nil { + end++ + } + + p.r.NormalText(out, data[i:end]) + + if end >= len(data) { + break + } + i = end + + // call the trigger + handler := p.inlineCallback[data[end]] + if consumed := handler(p, out, data, i); consumed == 0 { + // no action from the callback; buffer the byte for later + end = i + 1 + } else { + // skip past whatever the callback used + i += consumed + end = i + } + } + + p.nesting-- +} + +// single and double emphasis parsing +func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int { + data = data[offset:] + c := data[0] + ret := 0 + + if len(data) > 2 && data[1] != c { + // whitespace cannot follow an opening emphasis; + // strikethrough only takes two characters '~~' + if c == '~' || isspace(data[1]) { + return 0 + } + if ret = helperEmphasis(p, out, data[1:], c); ret == 0 { + return 0 + } + + return ret + 1 + } + + if len(data) > 3 && data[1] == c && data[2] != c { + if isspace(data[2]) { + return 0 + } + if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 { + return 0 + } + + return ret + 2 + } + + if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { + if c == '~' || isspace(data[3]) { + return 0 + } + if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 { + return 0 + } + + return ret + 3 + } + + return 0 +} + +func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int { + data = data[offset:] + + nb := 0 + + // count the number of backticks in the delimiter + for nb < len(data) && data[nb] == '`' { + nb++ + } + + // find the next delimiter + i, end := 0, 0 + for end = nb; end < len(data) && i < nb; end++ { + if data[end] == '`' { + i++ + } else { + i = 0 + } + } + + // no matching delimiter? + if i < nb && end >= len(data) { + return 0 + } + + // trim outside whitespace + fBegin := nb + for fBegin < end && data[fBegin] == ' ' { + fBegin++ + } + + fEnd := end - nb + for fEnd > fBegin && data[fEnd-1] == ' ' { + fEnd-- + } + + // render the code span + if fBegin != fEnd { + p.r.CodeSpan(out, data[fBegin:fEnd]) + } + + return end + +} + +// newline preceded by two spaces becomes <br> +// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled +func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int { + // remove trailing spaces from out + outBytes := out.Bytes() + end := len(outBytes) + eol := end + for eol > 0 && outBytes[eol-1] == ' ' { + eol-- + } + out.Truncate(eol) + + precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' ' + precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527 + precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0 + + // should there be a hard line break here? + if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash { + return 0 + } + + if precededByBackslash && eol > 0 { + out.Truncate(eol - 1) + } + p.r.LineBreak(out) + return 1 +} + +type linkType int + +const ( + linkNormal linkType = iota + linkImg + linkDeferredFootnote + linkInlineFootnote +) + +func isReferenceStyleLink(data []byte, pos int, t linkType) bool { + if t == linkDeferredFootnote { + return false + } + return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' +} + +// '[': parse a link or an image or a footnote +func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { + // no links allowed inside regular links, footnote, and deferred footnotes + if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { + return 0 + } + + var t linkType + switch { + // special case: ![^text] == deferred footnote (that follows something with + // an exclamation point) + case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^': + t = linkDeferredFootnote + // ![alt] == image + case offset > 0 && data[offset-1] == '!': + t = linkImg + // ^[text] == inline footnote + // [^refId] == deferred footnote + case p.flags&EXTENSION_FOOTNOTES != 0: + if offset > 0 && data[offset-1] == '^' { + t = linkInlineFootnote + } else if len(data)-1 > offset && data[offset+1] == '^' { + t = linkDeferredFootnote + } + // [text] == regular link + default: + t = linkNormal + } + + data = data[offset:] + + var ( + i = 1 + noteId int + title, link, altContent []byte + textHasNl = false + ) + + if t == linkDeferredFootnote { + i++ + } + + brace := 0 + + // look for the matching closing bracket + for level := 1; level > 0 && i < len(data); i++ { + switch { + case data[i] == '\n': + textHasNl = true + + case data[i-1] == '\\': + continue + + case data[i] == '[': + level++ + + case data[i] == ']': + level-- + if level <= 0 { + i-- // compensate for extra i++ in for loop + } + } + } + + if i >= len(data) { + return 0 + } + + txtE := i + i++ + + // skip any amount of whitespace or newline + // (this is much more lax than original markdown syntax) + for i < len(data) && isspace(data[i]) { + i++ + } + + switch { + // inline style link + case i < len(data) && data[i] == '(': + // skip initial whitespace + i++ + + for i < len(data) && isspace(data[i]) { + i++ + } + + linkB := i + + // look for link end: ' " ), check for new opening braces and take this + // into account, this may lead for overshooting and probably will require + // some fine-tuning. + findlinkend: + for i < len(data) { + switch { + case data[i] == '\\': + i += 2 + + case data[i] == '(': + brace++ + i++ + + case data[i] == ')': + if brace <= 0 { + break findlinkend + } + brace-- + i++ + + case data[i] == '\'' || data[i] == '"': + break findlinkend + + default: + i++ + } + } + + if i >= len(data) { + return 0 + } + linkE := i + + // look for title end if present + titleB, titleE := 0, 0 + if data[i] == '\'' || data[i] == '"' { + i++ + titleB = i + + findtitleend: + for i < len(data) { + switch { + case data[i] == '\\': + i += 2 + + case data[i] == ')': + break findtitleend + + default: + i++ + } + } + + if i >= len(data) { + return 0 + } + + // skip whitespace after title + titleE = i - 1 + for titleE > titleB && isspace(data[titleE]) { + titleE-- + } + + // check for closing quote presence + if data[titleE] != '\'' && data[titleE] != '"' { + titleB, titleE = 0, 0 + linkE = i + } + } + + // remove whitespace at the end of the link + for linkE > linkB && isspace(data[linkE-1]) { + linkE-- + } + + // remove optional angle brackets around the link + if data[linkB] == '<' { + linkB++ + } + if data[linkE-1] == '>' { + linkE-- + } + + // build escaped link and title + if linkE > linkB { + link = data[linkB:linkE] + } + + if titleE > titleB { + title = data[titleB:titleE] + } + + i++ + + // reference style link + case isReferenceStyleLink(data, i, t): + var id []byte + altContentConsidered := false + + // look for the id + i++ + linkB := i + for i < len(data) && data[i] != ']' { + i++ + } + if i >= len(data) { + return 0 + } + linkE := i + + // find the reference + if linkB == linkE { + if textHasNl { + var b bytes.Buffer + + for j := 1; j < txtE; j++ { + switch { + case data[j] != '\n': + b.WriteByte(data[j]) + case data[j-1] != ' ': + b.WriteByte(' ') + } + } + + id = b.Bytes() + } else { + id = data[1:txtE] + altContentConsidered = true + } + } else { + id = data[linkB:linkE] + } + + // find the reference with matching id + lr, ok := p.getRef(string(id)) + if !ok { + return 0 + } + + // keep link and title from reference + link = lr.link + title = lr.title + if altContentConsidered { + altContent = lr.text + } + i++ + + // shortcut reference style link or reference or inline footnote + default: + var id []byte + + // craft the id + if textHasNl { + var b bytes.Buffer + + for j := 1; j < txtE; j++ { + switch { + case data[j] != '\n': + b.WriteByte(data[j]) + case data[j-1] != ' ': + b.WriteByte(' ') + } + } + + id = b.Bytes() + } else { + if t == linkDeferredFootnote { + id = data[2:txtE] // get rid of the ^ + } else { + id = data[1:txtE] + } + } + + if t == linkInlineFootnote { + // create a new reference + noteId = len(p.notes) + 1 + + var fragment []byte + if len(id) > 0 { + if len(id) < 16 { + fragment = make([]byte, len(id)) + } else { + fragment = make([]byte, 16) + } + copy(fragment, slugify(id)) + } else { + fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...) + } + + ref := &reference{ + noteId: noteId, + hasBlock: false, + link: fragment, + title: id, + } + + p.notes = append(p.notes, ref) + + link = ref.link + title = ref.title + } else { + // find the reference with matching id + lr, ok := p.getRef(string(id)) + if !ok { + return 0 + } + + if t == linkDeferredFootnote { + lr.noteId = len(p.notes) + 1 + p.notes = append(p.notes, lr) + } + + // keep link and title from reference + link = lr.link + // if inline footnote, title == footnote contents + title = lr.title + noteId = lr.noteId + } + + // rewind the whitespace + i = txtE + 1 + } + + // build content: img alt is escaped, link content is parsed + var content bytes.Buffer + if txtE > 1 { + if t == linkImg { + content.Write(data[1:txtE]) + } else { + // links cannot contain other links, so turn off link parsing temporarily + insideLink := p.insideLink + p.insideLink = true + p.inline(&content, data[1:txtE]) + p.insideLink = insideLink + } + } + + var uLink []byte + if t == linkNormal || t == linkImg { + if len(link) > 0 { + var uLinkBuf bytes.Buffer + unescapeText(&uLinkBuf, link) + uLink = uLinkBuf.Bytes() + } + + // links need something to click on and somewhere to go + if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { + return 0 + } + } + + // call the relevant rendering function + switch t { + case linkNormal: + if len(altContent) > 0 { + p.r.Link(out, uLink, title, altContent) + } else { + p.r.Link(out, uLink, title, content.Bytes()) + } + + case linkImg: + outSize := out.Len() + outBytes := out.Bytes() + if outSize > 0 && outBytes[outSize-1] == '!' { + out.Truncate(outSize - 1) + } + + p.r.Image(out, uLink, title, content.Bytes()) + + case linkInlineFootnote: + outSize := out.Len() + outBytes := out.Bytes() + if outSize > 0 && outBytes[outSize-1] == '^' { + out.Truncate(outSize - 1) + } + + p.r.FootnoteRef(out, link, noteId) + + case linkDeferredFootnote: + p.r.FootnoteRef(out, link, noteId) + + default: + return 0 + } + + return i +} + +func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int { + if len(data) < 5 { + return 0 + } + if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { + return 0 + } + i := 5 + // scan for an end-of-comment marker, across lines if necessary + for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { + i++ + } + // no end-of-comment marker + if i >= len(data) { + return 0 + } + return i + 1 +} + +// '<' when tags or autolinks are allowed +func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int { + data = data[offset:] + altype := LINK_TYPE_NOT_AUTOLINK + end := tagLength(data, &altype) + if size := p.inlineHTMLComment(out, data); size > 0 { + end = size + } + if end > 2 { + if altype != LINK_TYPE_NOT_AUTOLINK { + var uLink bytes.Buffer + unescapeText(&uLink, data[1:end+1-2]) + if uLink.Len() > 0 { + p.r.AutoLink(out, uLink.Bytes(), altype) + } + } else { + p.r.RawHtmlTag(out, data[:end]) + } + } + + return end +} + +// '\\' backslash escape +var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") + +func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int { + data = data[offset:] + + if len(data) > 1 { + if bytes.IndexByte(escapeChars, data[1]) < 0 { + return 0 + } + + p.r.NormalText(out, data[1:2]) + } + + return 2 +} + +func unescapeText(ob *bytes.Buffer, src []byte) { + i := 0 + for i < len(src) { + org := i + for i < len(src) && src[i] != '\\' { + i++ + } + + if i > org { + ob.Write(src[org:i]) + } + + if i+1 >= len(src) { + break + } + + ob.WriteByte(src[i+1]) + i += 2 + } +} + +// '&' escaped when it doesn't belong to an entity +// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; +func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int { + data = data[offset:] + + end := 1 + + if end < len(data) && data[end] == '#' { + end++ + } + + for end < len(data) && isalnum(data[end]) { + end++ + } + + if end < len(data) && data[end] == ';' { + end++ // real entity + } else { + return 0 // lone '&' + } + + p.r.Entity(out, data[:end]) + + return end +} + +func linkEndsWithEntity(data []byte, linkEnd int) bool { + entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1) + return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd +} + +func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int { + // quick check to rule out most false hits on ':' + if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' { + return 0 + } + + // Now a more expensive check to see if we're not inside an anchor element + anchorStart := offset + offsetFromAnchor := 0 + for anchorStart > 0 && data[anchorStart] != '<' { + anchorStart-- + offsetFromAnchor++ + } + + anchorStr := anchorRe.Find(data[anchorStart:]) + if anchorStr != nil { + out.Write(anchorStr[offsetFromAnchor:]) + return len(anchorStr) - offsetFromAnchor + } + + // scan backward for a word boundary + rewind := 0 + for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) { + rewind++ + } + if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters + return 0 + } + + origData := data + data = data[offset-rewind:] + + if !isSafeLink(data) { + return 0 + } + + linkEnd := 0 + for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) { + linkEnd++ + } + + // Skip punctuation at the end of the link + if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' { + linkEnd-- + } + + // But don't skip semicolon if it's a part of escaped entity: + if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) { + linkEnd-- + } + + // See if the link finishes with a punctuation sign that can be closed. + var copen byte + switch data[linkEnd-1] { + case '"': + copen = '"' + case '\'': + copen = '\'' + case ')': + copen = '(' + case ']': + copen = '[' + case '}': + copen = '{' + default: + copen = 0 + } + + if copen != 0 { + bufEnd := offset - rewind + linkEnd - 2 + + openDelim := 1 + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { + if origData[bufEnd] == data[linkEnd-1] { + openDelim++ + } + + if origData[bufEnd] == copen { + openDelim-- + } + + bufEnd-- + } + + if openDelim == 0 { + linkEnd-- + } + } + + // we were triggered on the ':', so we need to rewind the output a bit + if out.Len() >= rewind { + out.Truncate(len(out.Bytes()) - rewind) + } + + var uLink bytes.Buffer + unescapeText(&uLink, data[:linkEnd]) + + if uLink.Len() > 0 { + p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL) + } + + return linkEnd - rewind +} + +func isEndOfLink(char byte) bool { + return isspace(char) || char == '<' +} + +var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} +var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")} + +func isSafeLink(link []byte) bool { + for _, path := range validPaths { + if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) { + if len(link) == len(path) { + return true + } else if isalnum(link[len(path)]) { + return true + } + } + } + + for _, prefix := range validUris { + // TODO: handle unicode here + // case-insensitive prefix test + if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) { + return true + } + } + + return false +} + +// return the length of the given tag, or 0 is it's not valid +func tagLength(data []byte, autolink *int) int { + var i, j int + + // a valid tag can't be shorter than 3 chars + if len(data) < 3 { + return 0 + } + + // begins with a '<' optionally followed by '/', followed by letter or number + if data[0] != '<' { + return 0 + } + if data[1] == '/' { + i = 2 + } else { + i = 1 + } + + if !isalnum(data[i]) { + return 0 + } + + // scheme test + *autolink = LINK_TYPE_NOT_AUTOLINK + + // try to find the beginning of an URI + for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { + i++ + } + + if i > 1 && i < len(data) && data[i] == '@' { + if j = isMailtoAutoLink(data[i:]); j != 0 { + *autolink = LINK_TYPE_EMAIL + return i + j + } + } + + if i > 2 && i < len(data) && data[i] == ':' { + *autolink = LINK_TYPE_NORMAL + i++ + } + + // complete autolink test: no whitespace or ' or " + switch { + case i >= len(data): + *autolink = LINK_TYPE_NOT_AUTOLINK + case *autolink != 0: + j = i + + for i < len(data) { + if data[i] == '\\' { + i += 2 + } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { + break + } else { + i++ + } + + } + + if i >= len(data) { + return 0 + } + if i > j && data[i] == '>' { + return i + 1 + } + + // one of the forbidden chars has been found + *autolink = LINK_TYPE_NOT_AUTOLINK + } + + // look for something looking like a tag end + for i < len(data) && data[i] != '>' { + i++ + } + if i >= len(data) { + return 0 + } + return i + 1 +} + +// look for the address part of a mail autolink and '>' +// this is less strict than the original markdown e-mail address matching +func isMailtoAutoLink(data []byte) int { + nb := 0 + + // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' + for i := 0; i < len(data); i++ { + if isalnum(data[i]) { + continue + } + + switch data[i] { + case '@': + nb++ + + case '-', '.', '_': + // Do nothing. + + case '>': + if nb == 1 { + return i + 1 + } else { + return 0 + } + default: + return 0 + } + } + + return 0 +} + +// look for the next emph char, skipping other constructs +func helperFindEmphChar(data []byte, c byte) int { + i := 0 + + for i < len(data) { + for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { + i++ + } + if i >= len(data) { + return 0 + } + // do not count escaped chars + if i != 0 && data[i-1] == '\\' { + i++ + continue + } + if data[i] == c { + return i + } + + if data[i] == '`' { + // skip a code span + tmpI := 0 + i++ + for i < len(data) && data[i] != '`' { + if tmpI == 0 && data[i] == c { + tmpI = i + } + i++ + } + if i >= len(data) { + return tmpI + } + i++ + } else if data[i] == '[' { + // skip a link + tmpI := 0 + i++ + for i < len(data) && data[i] != ']' { + if tmpI == 0 && data[i] == c { + tmpI = i + } + i++ + } + i++ + for i < len(data) && (data[i] == ' ' || data[i] == '\n') { + i++ + } + if i >= len(data) { + return tmpI + } + if data[i] != '[' && data[i] != '(' { // not a link + if tmpI > 0 { + return tmpI + } else { + continue + } + } + cc := data[i] + i++ + for i < len(data) && data[i] != cc { + if tmpI == 0 && data[i] == c { + return i + } + i++ + } + if i >= len(data) { + return tmpI + } + i++ + } + } + return 0 +} + +func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { + i := 0 + + // skip one symbol if coming from emph3 + if len(data) > 1 && data[0] == c && data[1] == c { + i = 1 + } + + for i < len(data) { + length := helperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + if i >= len(data) { + return 0 + } + + if i+1 < len(data) && data[i+1] == c { + i++ + continue + } + + if data[i] == c && !isspace(data[i-1]) { + + if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 { + if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { + continue + } + } + + var work bytes.Buffer + p.inline(&work, data[:i]) + p.r.Emphasis(out, work.Bytes()) + return i + 1 + } + } + + return 0 +} + +func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { + i := 0 + + for i < len(data) { + length := helperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + + if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { + var work bytes.Buffer + p.inline(&work, data[:i]) + + if work.Len() > 0 { + // pick the right renderer + if c == '~' { + p.r.StrikeThrough(out, work.Bytes()) + } else { + p.r.DoubleEmphasis(out, work.Bytes()) + } + } + return i + 2 + } + i++ + } + return 0 +} + +func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int { + i := 0 + origData := data + data = data[offset:] + + for i < len(data) { + length := helperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + + // skip whitespace preceded symbols + if data[i] != c || isspace(data[i-1]) { + continue + } + + switch { + case i+2 < len(data) && data[i+1] == c && data[i+2] == c: + // triple symbol found + var work bytes.Buffer + + p.inline(&work, data[:i]) + if work.Len() > 0 { + p.r.TripleEmphasis(out, work.Bytes()) + } + return i + 3 + case (i+1 < len(data) && data[i+1] == c): + // double symbol found, hand over to emph1 + length = helperEmphasis(p, out, origData[offset-2:], c) + if length == 0 { + return 0 + } else { + return length - 2 + } + default: + // single symbol found, hand over to emph2 + length = helperDoubleEmphasis(p, out, origData[offset-1:], c) + if length == 0 { + return 0 + } else { + return length - 1 + } + } + } + return 0 +} diff --git a/vendor/github.com/russross/blackfriday/latex.go b/vendor/github.com/russross/blackfriday/latex.go new file mode 100644 index 00000000..70705aa9 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/latex.go @@ -0,0 +1,332 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// +// LaTeX rendering backend +// +// + +package blackfriday + +import ( + "bytes" +) + +// Latex is a type that implements the Renderer interface for LaTeX output. +// +// Do not create this directly, instead use the LatexRenderer function. +type Latex struct { +} + +// LatexRenderer creates and configures a Latex object, which +// satisfies the Renderer interface. +// +// flags is a set of LATEX_* options ORed together (currently no such options +// are defined). +func LatexRenderer(flags int) Renderer { + return &Latex{} +} + +func (options *Latex) GetFlags() int { + return 0 +} + +// render code chunks using verbatim, or listings if we have a language +func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { + if lang == "" { + out.WriteString("\n\\begin{verbatim}\n") + } else { + out.WriteString("\n\\begin{lstlisting}[language=") + out.WriteString(lang) + out.WriteString("]\n") + } + out.Write(text) + if lang == "" { + out.WriteString("\n\\end{verbatim}\n") + } else { + out.WriteString("\n\\end{lstlisting}\n") + } +} + +func (options *Latex) TitleBlock(out *bytes.Buffer, text []byte) { + +} + +func (options *Latex) BlockQuote(out *bytes.Buffer, text []byte) { + out.WriteString("\n\\begin{quotation}\n") + out.Write(text) + out.WriteString("\n\\end{quotation}\n") +} + +func (options *Latex) BlockHtml(out *bytes.Buffer, text []byte) { + // a pretty lame thing to do... + out.WriteString("\n\\begin{verbatim}\n") + out.Write(text) + out.WriteString("\n\\end{verbatim}\n") +} + +func (options *Latex) Header(out *bytes.Buffer, text func() bool, level int, id string) { + marker := out.Len() + + switch level { + case 1: + out.WriteString("\n\\section{") + case 2: + out.WriteString("\n\\subsection{") + case 3: + out.WriteString("\n\\subsubsection{") + case 4: + out.WriteString("\n\\paragraph{") + case 5: + out.WriteString("\n\\subparagraph{") + case 6: + out.WriteString("\n\\textbf{") + } + if !text() { + out.Truncate(marker) + return + } + out.WriteString("}\n") +} + +func (options *Latex) HRule(out *bytes.Buffer) { + out.WriteString("\n\\HRule\n") +} + +func (options *Latex) List(out *bytes.Buffer, text func() bool, flags int) { + marker := out.Len() + if flags&LIST_TYPE_ORDERED != 0 { + out.WriteString("\n\\begin{enumerate}\n") + } else { + out.WriteString("\n\\begin{itemize}\n") + } + if !text() { + out.Truncate(marker) + return + } + if flags&LIST_TYPE_ORDERED != 0 { + out.WriteString("\n\\end{enumerate}\n") + } else { + out.WriteString("\n\\end{itemize}\n") + } +} + +func (options *Latex) ListItem(out *bytes.Buffer, text []byte, flags int) { + out.WriteString("\n\\item ") + out.Write(text) +} + +func (options *Latex) Paragraph(out *bytes.Buffer, text func() bool) { + marker := out.Len() + out.WriteString("\n") + if !text() { + out.Truncate(marker) + return + } + out.WriteString("\n") +} + +func (options *Latex) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) { + out.WriteString("\n\\begin{tabular}{") + for _, elt := range columnData { + switch elt { + case TABLE_ALIGNMENT_LEFT: + out.WriteByte('l') + case TABLE_ALIGNMENT_RIGHT: + out.WriteByte('r') + default: + out.WriteByte('c') + } + } + out.WriteString("}\n") + out.Write(header) + out.WriteString(" \\\\\n\\hline\n") + out.Write(body) + out.WriteString("\n\\end{tabular}\n") +} + +func (options *Latex) TableRow(out *bytes.Buffer, text []byte) { + if out.Len() > 0 { + out.WriteString(" \\\\\n") + } + out.Write(text) +} + +func (options *Latex) TableHeaderCell(out *bytes.Buffer, text []byte, align int) { + if out.Len() > 0 { + out.WriteString(" & ") + } + out.Write(text) +} + +func (options *Latex) TableCell(out *bytes.Buffer, text []byte, align int) { + if out.Len() > 0 { + out.WriteString(" & ") + } + out.Write(text) +} + +// TODO: this +func (options *Latex) Footnotes(out *bytes.Buffer, text func() bool) { + +} + +func (options *Latex) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) { + +} + +func (options *Latex) AutoLink(out *bytes.Buffer, link []byte, kind int) { + out.WriteString("\\href{") + if kind == LINK_TYPE_EMAIL { + out.WriteString("mailto:") + } + out.Write(link) + out.WriteString("}{") + out.Write(link) + out.WriteString("}") +} + +func (options *Latex) CodeSpan(out *bytes.Buffer, text []byte) { + out.WriteString("\\texttt{") + escapeSpecialChars(out, text) + out.WriteString("}") +} + +func (options *Latex) DoubleEmphasis(out *bytes.Buffer, text []byte) { + out.WriteString("\\textbf{") + out.Write(text) + out.WriteString("}") +} + +func (options *Latex) Emphasis(out *bytes.Buffer, text []byte) { + out.WriteString("\\textit{") + out.Write(text) + out.WriteString("}") +} + +func (options *Latex) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) { + if bytes.HasPrefix(link, []byte("http://")) || bytes.HasPrefix(link, []byte("https://")) { + // treat it like a link + out.WriteString("\\href{") + out.Write(link) + out.WriteString("}{") + out.Write(alt) + out.WriteString("}") + } else { + out.WriteString("\\includegraphics{") + out.Write(link) + out.WriteString("}") + } +} + +func (options *Latex) LineBreak(out *bytes.Buffer) { + out.WriteString(" \\\\\n") +} + +func (options *Latex) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) { + out.WriteString("\\href{") + out.Write(link) + out.WriteString("}{") + out.Write(content) + out.WriteString("}") +} + +func (options *Latex) RawHtmlTag(out *bytes.Buffer, tag []byte) { +} + +func (options *Latex) TripleEmphasis(out *bytes.Buffer, text []byte) { + out.WriteString("\\textbf{\\textit{") + out.Write(text) + out.WriteString("}}") +} + +func (options *Latex) StrikeThrough(out *bytes.Buffer, text []byte) { + out.WriteString("\\sout{") + out.Write(text) + out.WriteString("}") +} + +// TODO: this +func (options *Latex) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + +} + +func needsBackslash(c byte) bool { + for _, r := range []byte("_{}%$&\\~#") { + if c == r { + return true + } + } + return false +} + +func escapeSpecialChars(out *bytes.Buffer, text []byte) { + for i := 0; i < len(text); i++ { + // directly copy normal characters + org := i + + for i < len(text) && !needsBackslash(text[i]) { + i++ + } + if i > org { + out.Write(text[org:i]) + } + + // escape a character + if i >= len(text) { + break + } + out.WriteByte('\\') + out.WriteByte(text[i]) + } +} + +func (options *Latex) Entity(out *bytes.Buffer, entity []byte) { + // TODO: convert this into a unicode character or something + out.Write(entity) +} + +func (options *Latex) NormalText(out *bytes.Buffer, text []byte) { + escapeSpecialChars(out, text) +} + +// header and footer +func (options *Latex) DocumentHeader(out *bytes.Buffer) { + out.WriteString("\\documentclass{article}\n") + out.WriteString("\n") + out.WriteString("\\usepackage{graphicx}\n") + out.WriteString("\\usepackage{listings}\n") + out.WriteString("\\usepackage[margin=1in]{geometry}\n") + out.WriteString("\\usepackage[utf8]{inputenc}\n") + out.WriteString("\\usepackage{verbatim}\n") + out.WriteString("\\usepackage[normalem]{ulem}\n") + out.WriteString("\\usepackage{hyperref}\n") + out.WriteString("\n") + out.WriteString("\\hypersetup{colorlinks,%\n") + out.WriteString(" citecolor=black,%\n") + out.WriteString(" filecolor=black,%\n") + out.WriteString(" linkcolor=black,%\n") + out.WriteString(" urlcolor=black,%\n") + out.WriteString(" pdfstartview=FitH,%\n") + out.WriteString(" breaklinks=true,%\n") + out.WriteString(" pdfauthor={Blackfriday Markdown Processor v") + out.WriteString(VERSION) + out.WriteString("}}\n") + out.WriteString("\n") + out.WriteString("\\newcommand{\\HRule}{\\rule{\\linewidth}{0.5mm}}\n") + out.WriteString("\\addtolength{\\parskip}{0.5\\baselineskip}\n") + out.WriteString("\\parindent=0pt\n") + out.WriteString("\n") + out.WriteString("\\begin{document}\n") +} + +func (options *Latex) DocumentFooter(out *bytes.Buffer) { + out.WriteString("\n\\end{document}\n") +} diff --git a/vendor/github.com/russross/blackfriday/markdown.go b/vendor/github.com/russross/blackfriday/markdown.go new file mode 100644 index 00000000..58ba68de --- /dev/null +++ b/vendor/github.com/russross/blackfriday/markdown.go @@ -0,0 +1,926 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// +// Markdown parsing and processing +// +// + +// Blackfriday markdown processor. +// +// Translates plain text with simple formatting rules into HTML or LaTeX. +package blackfriday + +import ( + "bytes" + "fmt" + "strings" + "unicode/utf8" +) + +const VERSION = "1.5" + +// These are the supported markdown parsing extensions. +// OR these values together to select multiple extensions. +const ( + EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words + EXTENSION_TABLES // render tables + EXTENSION_FENCED_CODE // render fenced code blocks + EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked + EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ + EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules + EXTENSION_SPACE_HEADERS // be strict about prefix header rules + EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks + EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four + EXTENSION_FOOTNOTES // Pandoc-style footnotes + EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block + EXTENSION_HEADER_IDS // specify header IDs with {#id} + EXTENSION_TITLEBLOCK // Titleblock ala pandoc + EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text + EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks + EXTENSION_DEFINITION_LISTS // render definition lists + + commonHtmlFlags = 0 | + HTML_USE_XHTML | + HTML_USE_SMARTYPANTS | + HTML_SMARTYPANTS_FRACTIONS | + HTML_SMARTYPANTS_DASHES | + HTML_SMARTYPANTS_LATEX_DASHES + + commonExtensions = 0 | + EXTENSION_NO_INTRA_EMPHASIS | + EXTENSION_TABLES | + EXTENSION_FENCED_CODE | + EXTENSION_AUTOLINK | + EXTENSION_STRIKETHROUGH | + EXTENSION_SPACE_HEADERS | + EXTENSION_HEADER_IDS | + EXTENSION_BACKSLASH_LINE_BREAK | + EXTENSION_DEFINITION_LISTS +) + +// These are the possible flag values for the link renderer. +// Only a single one of these values will be used; they are not ORed together. +// These are mostly of interest if you are writing a new output format. +const ( + LINK_TYPE_NOT_AUTOLINK = iota + LINK_TYPE_NORMAL + LINK_TYPE_EMAIL +) + +// These are the possible flag values for the ListItem renderer. +// Multiple flag values may be ORed together. +// These are mostly of interest if you are writing a new output format. +const ( + LIST_TYPE_ORDERED = 1 << iota + LIST_TYPE_DEFINITION + LIST_TYPE_TERM + LIST_ITEM_CONTAINS_BLOCK + LIST_ITEM_BEGINNING_OF_LIST + LIST_ITEM_END_OF_LIST +) + +// These are the possible flag values for the table cell renderer. +// Only a single one of these values will be used; they are not ORed together. +// These are mostly of interest if you are writing a new output format. +const ( + TABLE_ALIGNMENT_LEFT = 1 << iota + TABLE_ALIGNMENT_RIGHT + TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) +) + +// The size of a tab stop. +const ( + TAB_SIZE_DEFAULT = 4 + TAB_SIZE_EIGHT = 8 +) + +// blockTags is a set of tags that are recognized as HTML block tags. +// Any of these can be included in markdown text without special escaping. +var blockTags = map[string]struct{}{ + "blockquote": {}, + "del": {}, + "div": {}, + "dl": {}, + "fieldset": {}, + "form": {}, + "h1": {}, + "h2": {}, + "h3": {}, + "h4": {}, + "h5": {}, + "h6": {}, + "iframe": {}, + "ins": {}, + "math": {}, + "noscript": {}, + "ol": {}, + "pre": {}, + "p": {}, + "script": {}, + "style": {}, + "table": {}, + "ul": {}, + + // HTML5 + "address": {}, + "article": {}, + "aside": {}, + "canvas": {}, + "figcaption": {}, + "figure": {}, + "footer": {}, + "header": {}, + "hgroup": {}, + "main": {}, + "nav": {}, + "output": {}, + "progress": {}, + "section": {}, + "video": {}, +} + +// Renderer is the rendering interface. +// This is mostly of interest if you are implementing a new rendering format. +// +// When a byte slice is provided, it contains the (rendered) contents of the +// element. +// +// When a callback is provided instead, it will write the contents of the +// respective element directly to the output buffer and return true on success. +// If the callback returns false, the rendering function should reset the +// output buffer as though it had never been called. +// +// Currently Html and Latex implementations are provided +type Renderer interface { + // block-level callbacks + BlockCode(out *bytes.Buffer, text []byte, lang string) + BlockQuote(out *bytes.Buffer, text []byte) + BlockHtml(out *bytes.Buffer, text []byte) + Header(out *bytes.Buffer, text func() bool, level int, id string) + HRule(out *bytes.Buffer) + List(out *bytes.Buffer, text func() bool, flags int) + ListItem(out *bytes.Buffer, text []byte, flags int) + Paragraph(out *bytes.Buffer, text func() bool) + Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) + TableRow(out *bytes.Buffer, text []byte) + TableHeaderCell(out *bytes.Buffer, text []byte, flags int) + TableCell(out *bytes.Buffer, text []byte, flags int) + Footnotes(out *bytes.Buffer, text func() bool) + FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) + TitleBlock(out *bytes.Buffer, text []byte) + + // Span-level callbacks + AutoLink(out *bytes.Buffer, link []byte, kind int) + CodeSpan(out *bytes.Buffer, text []byte) + DoubleEmphasis(out *bytes.Buffer, text []byte) + Emphasis(out *bytes.Buffer, text []byte) + Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) + LineBreak(out *bytes.Buffer) + Link(out *bytes.Buffer, link []byte, title []byte, content []byte) + RawHtmlTag(out *bytes.Buffer, tag []byte) + TripleEmphasis(out *bytes.Buffer, text []byte) + StrikeThrough(out *bytes.Buffer, text []byte) + FootnoteRef(out *bytes.Buffer, ref []byte, id int) + + // Low-level callbacks + Entity(out *bytes.Buffer, entity []byte) + NormalText(out *bytes.Buffer, text []byte) + + // Header and footer + DocumentHeader(out *bytes.Buffer) + DocumentFooter(out *bytes.Buffer) + + GetFlags() int +} + +// Callback functions for inline parsing. One such function is defined +// for each character that triggers a response when parsing inline data. +type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int + +// Parser holds runtime state used by the parser. +// This is constructed by the Markdown function. +type parser struct { + r Renderer + refOverride ReferenceOverrideFunc + refs map[string]*reference + inlineCallback [256]inlineParser + flags int + nesting int + maxNesting int + insideLink bool + + // Footnotes need to be ordered as well as available to quickly check for + // presence. If a ref is also a footnote, it's stored both in refs and here + // in notes. Slice is nil if footnotes not enabled. + notes []*reference +} + +func (p *parser) getRef(refid string) (ref *reference, found bool) { + if p.refOverride != nil { + r, overridden := p.refOverride(refid) + if overridden { + if r == nil { + return nil, false + } + return &reference{ + link: []byte(r.Link), + title: []byte(r.Title), + noteId: 0, + hasBlock: false, + text: []byte(r.Text)}, true + } + } + // refs are case insensitive + ref, found = p.refs[strings.ToLower(refid)] + return ref, found +} + +// +// +// Public interface +// +// + +// Reference represents the details of a link. +// See the documentation in Options for more details on use-case. +type Reference struct { + // Link is usually the URL the reference points to. + Link string + // Title is the alternate text describing the link in more detail. + Title string + // Text is the optional text to override the ref with if the syntax used was + // [refid][] + Text string +} + +// ReferenceOverrideFunc is expected to be called with a reference string and +// return either a valid Reference type that the reference string maps to or +// nil. If overridden is false, the default reference logic will be executed. +// See the documentation in Options for more details on use-case. +type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) + +// Options represents configurable overrides and callbacks (in addition to the +// extension flag set) for configuring a Markdown parse. +type Options struct { + // Extensions is a flag set of bit-wise ORed extension bits. See the + // EXTENSION_* flags defined in this package. + Extensions int + + // ReferenceOverride is an optional function callback that is called every + // time a reference is resolved. + // + // In Markdown, the link reference syntax can be made to resolve a link to + // a reference instead of an inline URL, in one of the following ways: + // + // * [link text][refid] + // * [refid][] + // + // Usually, the refid is defined at the bottom of the Markdown document. If + // this override function is provided, the refid is passed to the override + // function first, before consulting the defined refids at the bottom. If + // the override function indicates an override did not occur, the refids at + // the bottom will be used to fill in the link details. + ReferenceOverride ReferenceOverrideFunc +} + +// MarkdownBasic is a convenience function for simple rendering. +// It processes markdown input with no extensions enabled. +func MarkdownBasic(input []byte) []byte { + // set up the HTML renderer + htmlFlags := HTML_USE_XHTML + renderer := HtmlRenderer(htmlFlags, "", "") + + // set up the parser + return MarkdownOptions(input, renderer, Options{Extensions: 0}) +} + +// Call Markdown with most useful extensions enabled +// MarkdownCommon is a convenience function for simple rendering. +// It processes markdown input with common extensions enabled, including: +// +// * Smartypants processing with smart fractions and LaTeX dashes +// +// * Intra-word emphasis suppression +// +// * Tables +// +// * Fenced code blocks +// +// * Autolinking +// +// * Strikethrough support +// +// * Strict header parsing +// +// * Custom Header IDs +func MarkdownCommon(input []byte) []byte { + // set up the HTML renderer + renderer := HtmlRenderer(commonHtmlFlags, "", "") + return MarkdownOptions(input, renderer, Options{ + Extensions: commonExtensions}) +} + +// Markdown is the main rendering function. +// It parses and renders a block of markdown-encoded text. +// The supplied Renderer is used to format the output, and extensions dictates +// which non-standard extensions are enabled. +// +// To use the supplied Html or LaTeX renderers, see HtmlRenderer and +// LatexRenderer, respectively. +func Markdown(input []byte, renderer Renderer, extensions int) []byte { + return MarkdownOptions(input, renderer, Options{ + Extensions: extensions}) +} + +// MarkdownOptions is just like Markdown but takes additional options through +// the Options struct. +func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { + // no point in parsing if we can't render + if renderer == nil { + return nil + } + + extensions := opts.Extensions + + // fill in the render structure + p := new(parser) + p.r = renderer + p.flags = extensions + p.refOverride = opts.ReferenceOverride + p.refs = make(map[string]*reference) + p.maxNesting = 16 + p.insideLink = false + + // register inline parsers + p.inlineCallback['*'] = emphasis + p.inlineCallback['_'] = emphasis + if extensions&EXTENSION_STRIKETHROUGH != 0 { + p.inlineCallback['~'] = emphasis + } + p.inlineCallback['`'] = codeSpan + p.inlineCallback['\n'] = lineBreak + p.inlineCallback['['] = link + p.inlineCallback['<'] = leftAngle + p.inlineCallback['\\'] = escape + p.inlineCallback['&'] = entity + + if extensions&EXTENSION_AUTOLINK != 0 { + p.inlineCallback[':'] = autoLink + } + + if extensions&EXTENSION_FOOTNOTES != 0 { + p.notes = make([]*reference, 0) + } + + first := firstPass(p, input) + second := secondPass(p, first) + return second +} + +// first pass: +// - normalize newlines +// - extract references (outside of fenced code blocks) +// - expand tabs (outside of fenced code blocks) +// - copy everything else +func firstPass(p *parser, input []byte) []byte { + var out bytes.Buffer + tabSize := TAB_SIZE_DEFAULT + if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { + tabSize = TAB_SIZE_EIGHT + } + beg := 0 + lastFencedCodeBlockEnd := 0 + for beg < len(input) { + // Find end of this line, then process the line. + end := beg + for end < len(input) && input[end] != '\n' && input[end] != '\r' { + end++ + } + + if p.flags&EXTENSION_FENCED_CODE != 0 { + // track fenced code block boundaries to suppress tab expansion + // and reference extraction inside them: + if beg >= lastFencedCodeBlockEnd { + if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { + lastFencedCodeBlockEnd = beg + i + } + } + } + + // add the line body if present + if end > beg { + if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. + out.Write(input[beg:end]) + } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { + beg += refEnd + continue + } else { + expandTabs(&out, input[beg:end], tabSize) + } + } + + if end < len(input) && input[end] == '\r' { + end++ + } + if end < len(input) && input[end] == '\n' { + end++ + } + out.WriteByte('\n') + + beg = end + } + + // empty input? + if out.Len() == 0 { + out.WriteByte('\n') + } + + return out.Bytes() +} + +// second pass: actual rendering +func secondPass(p *parser, input []byte) []byte { + var output bytes.Buffer + + p.r.DocumentHeader(&output) + p.block(&output, input) + + if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { + p.r.Footnotes(&output, func() bool { + flags := LIST_ITEM_BEGINNING_OF_LIST + for i := 0; i < len(p.notes); i += 1 { + ref := p.notes[i] + var buf bytes.Buffer + if ref.hasBlock { + flags |= LIST_ITEM_CONTAINS_BLOCK + p.block(&buf, ref.title) + } else { + p.inline(&buf, ref.title) + } + p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) + flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK + } + + return true + }) + } + + p.r.DocumentFooter(&output) + + if p.nesting != 0 { + panic("Nesting level did not end at zero") + } + + return output.Bytes() +} + +// +// Link references +// +// This section implements support for references that (usually) appear +// as footnotes in a document, and can be referenced anywhere in the document. +// The basic format is: +// +// [1]: http://www.google.com/ "Google" +// [2]: http://www.github.com/ "Github" +// +// Anywhere in the document, the reference can be linked by referring to its +// label, i.e., 1 and 2 in this example, as in: +// +// This library is hosted on [Github][2], a git hosting site. +// +// Actual footnotes as specified in Pandoc and supported by some other Markdown +// libraries such as php-markdown are also taken care of. They look like this: +// +// This sentence needs a bit of further explanation.[^note] +// +// [^note]: This is the explanation. +// +// Footnotes should be placed at the end of the document in an ordered list. +// Inline footnotes such as: +// +// Inline footnotes^[Not supported.] also exist. +// +// are not yet supported. + +// References are parsed and stored in this struct. +type reference struct { + link []byte + title []byte + noteId int // 0 if not a footnote ref + hasBlock bool + text []byte +} + +func (r *reference) String() string { + return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", + r.link, r.title, r.text, r.noteId, r.hasBlock) +} + +// Check whether or not data starts with a reference link. +// If so, it is parsed and stored in the list of references +// (in the render struct). +// Returns the number of bytes to skip to move past it, +// or zero if the first line is not a reference. +func isReference(p *parser, data []byte, tabSize int) int { + // up to 3 optional leading spaces + if len(data) < 4 { + return 0 + } + i := 0 + for i < 3 && data[i] == ' ' { + i++ + } + + noteId := 0 + + // id part: anything but a newline between brackets + if data[i] != '[' { + return 0 + } + i++ + if p.flags&EXTENSION_FOOTNOTES != 0 { + if i < len(data) && data[i] == '^' { + // we can set it to anything here because the proper noteIds will + // be assigned later during the second pass. It just has to be != 0 + noteId = 1 + i++ + } + } + idOffset := i + for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { + i++ + } + if i >= len(data) || data[i] != ']' { + return 0 + } + idEnd := i + + // spacer: colon (space | tab)* newline? (space | tab)* + i++ + if i >= len(data) || data[i] != ':' { + return 0 + } + i++ + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + if i < len(data) && (data[i] == '\n' || data[i] == '\r') { + i++ + if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { + i++ + } + } + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + if i >= len(data) { + return 0 + } + + var ( + linkOffset, linkEnd int + titleOffset, titleEnd int + lineEnd int + raw []byte + hasBlock bool + ) + + if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { + linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) + lineEnd = linkEnd + } else { + linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) + } + if lineEnd == 0 { + return 0 + } + + // a valid ref has been found + + ref := &reference{ + noteId: noteId, + hasBlock: hasBlock, + } + + if noteId > 0 { + // reusing the link field for the id since footnotes don't have links + ref.link = data[idOffset:idEnd] + // if footnote, it's not really a title, it's the contained text + ref.title = raw + } else { + ref.link = data[linkOffset:linkEnd] + ref.title = data[titleOffset:titleEnd] + } + + // id matches are case-insensitive + id := string(bytes.ToLower(data[idOffset:idEnd])) + + p.refs[id] = ref + + return lineEnd +} + +func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { + // link: whitespace-free sequence, optionally between angle brackets + if data[i] == '<' { + i++ + } + linkOffset = i + if i == len(data) { + return + } + for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { + i++ + } + linkEnd = i + if data[linkOffset] == '<' && data[linkEnd-1] == '>' { + linkOffset++ + linkEnd-- + } + + // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { + return + } + + // compute end-of-line + if i >= len(data) || data[i] == '\r' || data[i] == '\n' { + lineEnd = i + } + if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { + lineEnd++ + } + + // optional (space|tab)* spacer after a newline + if lineEnd > 0 { + i = lineEnd + 1 + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + } + + // optional title: any non-newline sequence enclosed in '"() alone on its line + if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { + i++ + titleOffset = i + + // look for EOL + for i < len(data) && data[i] != '\n' && data[i] != '\r' { + i++ + } + if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { + titleEnd = i + 1 + } else { + titleEnd = i + } + + // step back + i-- + for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { + i-- + } + if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { + lineEnd = titleEnd + titleEnd = i + } + } + + return +} + +// The first bit of this logic is the same as (*parser).listItem, but the rest +// is much simpler. This function simply finds the entire block and shifts it +// over by one tab if it is indeed a block (just returns the line if it's not). +// blockEnd is the end of the section in the input buffer, and contents is the +// extracted text that was shifted over one tab. It will need to be rendered at +// the end of the document. +func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { + if i == 0 || len(data) == 0 { + return + } + + // skip leading whitespace on first line + for i < len(data) && data[i] == ' ' { + i++ + } + + blockStart = i + + // find the end of the line + blockEnd = i + for i < len(data) && data[i-1] != '\n' { + i++ + } + + // get working buffer + var raw bytes.Buffer + + // put the first line into the working buffer + raw.Write(data[blockEnd:i]) + blockEnd = i + + // process the following lines + containsBlankLine := false + +gatherLines: + for blockEnd < len(data) { + i++ + + // find the end of this line + for i < len(data) && data[i-1] != '\n' { + i++ + } + + // if it is an empty line, guess that it is part of this item + // and move on to the next line + if p.isEmpty(data[blockEnd:i]) > 0 { + containsBlankLine = true + blockEnd = i + continue + } + + n := 0 + if n = isIndented(data[blockEnd:i], indentSize); n == 0 { + // this is the end of the block. + // we don't want to include this last line in the index. + break gatherLines + } + + // if there were blank lines before this one, insert a new one now + if containsBlankLine { + raw.WriteByte('\n') + containsBlankLine = false + } + + // get rid of that first tab, write to buffer + raw.Write(data[blockEnd+n : i]) + hasBlock = true + + blockEnd = i + } + + if data[blockEnd-1] != '\n' { + raw.WriteByte('\n') + } + + contents = raw.Bytes() + + return +} + +// +// +// Miscellaneous helper functions +// +// + +// Test if a character is a punctuation symbol. +// Taken from a private function in regexp in the stdlib. +func ispunct(c byte) bool { + for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { + if c == r { + return true + } + } + return false +} + +// Test if a character is a whitespace character. +func isspace(c byte) bool { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v' +} + +// Test if a character is letter. +func isletter(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} + +// Test if a character is a letter or a digit. +// TODO: check when this is looking for ASCII alnum and when it should use unicode +func isalnum(c byte) bool { + return (c >= '0' && c <= '9') || isletter(c) +} + +// Replace tab characters with spaces, aligning to the next TAB_SIZE column. +// always ends output with a newline +func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { + // first, check for common cases: no tabs, or only tabs at beginning of line + i, prefix := 0, 0 + slowcase := false + for i = 0; i < len(line); i++ { + if line[i] == '\t' { + if prefix == i { + prefix++ + } else { + slowcase = true + break + } + } + } + + // no need to decode runes if all tabs are at the beginning of the line + if !slowcase { + for i = 0; i < prefix*tabSize; i++ { + out.WriteByte(' ') + } + out.Write(line[prefix:]) + return + } + + // the slow case: we need to count runes to figure out how + // many spaces to insert for each tab + column := 0 + i = 0 + for i < len(line) { + start := i + for i < len(line) && line[i] != '\t' { + _, size := utf8.DecodeRune(line[i:]) + i += size + column++ + } + + if i > start { + out.Write(line[start:i]) + } + + if i >= len(line) { + break + } + + for { + out.WriteByte(' ') + column++ + if column%tabSize == 0 { + break + } + } + + i++ + } +} + +// Find if a line counts as indented or not. +// Returns number of characters the indent is (0 = not indented). +func isIndented(data []byte, indentSize int) int { + if len(data) == 0 { + return 0 + } + if data[0] == '\t' { + return 1 + } + if len(data) < indentSize { + return 0 + } + for i := 0; i < indentSize; i++ { + if data[i] != ' ' { + return 0 + } + } + return indentSize +} + +// Create a url-safe slug for fragments +func slugify(in []byte) []byte { + if len(in) == 0 { + return in + } + out := make([]byte, 0, len(in)) + sym := false + + for _, ch := range in { + if isalnum(ch) { + sym = false + out = append(out, ch) + } else if sym { + continue + } else { + out = append(out, '-') + sym = true + } + } + var a, b int + var ch byte + for a, ch = range out { + if ch != '-' { + break + } + } + for b = len(out) - 1; b > 0; b-- { + if out[b] != '-' { + break + } + } + return out[a : b+1] +} diff --git a/vendor/github.com/russross/blackfriday/smartypants.go b/vendor/github.com/russross/blackfriday/smartypants.go new file mode 100644 index 00000000..eeffa5e1 --- /dev/null +++ b/vendor/github.com/russross/blackfriday/smartypants.go @@ -0,0 +1,400 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross <russ@russross.com>. +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// +// SmartyPants rendering +// +// + +package blackfriday + +import ( + "bytes" +) + +type smartypantsData struct { + inSingleQuote bool + inDoubleQuote bool +} + +func wordBoundary(c byte) bool { + return c == 0 || isspace(c) || ispunct(c) +} + +func tolower(c byte) byte { + if c >= 'A' && c <= 'Z' { + return c - 'A' + 'a' + } + return c +} + +func isdigit(c byte) bool { + return c >= '0' && c <= '9' +} + +func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool { + // edge of the buffer is likely to be a tag that we don't get to see, + // so we treat it like text sometimes + + // enumerate all sixteen possibilities for (previousChar, nextChar) + // each can be one of {0, space, punct, other} + switch { + case previousChar == 0 && nextChar == 0: + // context is not any help here, so toggle + *isOpen = !*isOpen + case isspace(previousChar) && nextChar == 0: + // [ "] might be [ "<code>foo...] + *isOpen = true + case ispunct(previousChar) && nextChar == 0: + // [!"] hmm... could be [Run!"] or [("<code>...] + *isOpen = false + case /* isnormal(previousChar) && */ nextChar == 0: + // [a"] is probably a close + *isOpen = false + case previousChar == 0 && isspace(nextChar): + // [" ] might be [...foo</code>" ] + *isOpen = false + case isspace(previousChar) && isspace(nextChar): + // [ " ] context is not any help here, so toggle + *isOpen = !*isOpen + case ispunct(previousChar) && isspace(nextChar): + // [!" ] is probably a close + *isOpen = false + case /* isnormal(previousChar) && */ isspace(nextChar): + // [a" ] this is one of the easy cases + *isOpen = false + case previousChar == 0 && ispunct(nextChar): + // ["!] hmm... could be ["$1.95] or [</code>"!...] + *isOpen = false + case isspace(previousChar) && ispunct(nextChar): + // [ "!] looks more like [ "$1.95] + *isOpen = true + case ispunct(previousChar) && ispunct(nextChar): + // [!"!] context is not any help here, so toggle + *isOpen = !*isOpen + case /* isnormal(previousChar) && */ ispunct(nextChar): + // [a"!] is probably a close + *isOpen = false + case previousChar == 0 /* && isnormal(nextChar) */ : + // ["a] is probably an open + *isOpen = true + case isspace(previousChar) /* && isnormal(nextChar) */ : + // [ "a] this is one of the easy cases + *isOpen = true + case ispunct(previousChar) /* && isnormal(nextChar) */ : + // [!"a] is probably an open + *isOpen = true + default: + // [a'b] maybe a contraction? + *isOpen = false + } + + out.WriteByte('&') + if *isOpen { + out.WriteByte('l') + } else { + out.WriteByte('r') + } + out.WriteByte(quote) + out.WriteString("quo;") + return true +} + +func smartSingleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 2 { + t1 := tolower(text[1]) + + if t1 == '\'' { + nextChar := byte(0) + if len(text) >= 3 { + nextChar = text[2] + } + if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { + return 1 + } + } + + if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) { + out.WriteString("’") + return 0 + } + + if len(text) >= 3 { + t2 := tolower(text[2]) + + if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) && + (len(text) < 4 || wordBoundary(text[3])) { + out.WriteString("’") + return 0 + } + } + } + + nextChar := byte(0) + if len(text) > 1 { + nextChar = text[1] + } + if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote) { + return 0 + } + + out.WriteByte(text[0]) + return 0 +} + +func smartParens(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 3 { + t1 := tolower(text[1]) + t2 := tolower(text[2]) + + if t1 == 'c' && t2 == ')' { + out.WriteString("©") + return 2 + } + + if t1 == 'r' && t2 == ')' { + out.WriteString("®") + return 2 + } + + if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' { + out.WriteString("™") + return 3 + } + } + + out.WriteByte(text[0]) + return 0 +} + +func smartDash(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 2 { + if text[1] == '-' { + out.WriteString("—") + return 1 + } + + if wordBoundary(previousChar) && wordBoundary(text[1]) { + out.WriteString("–") + return 0 + } + } + + out.WriteByte(text[0]) + return 0 +} + +func smartDashLatex(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 3 && text[1] == '-' && text[2] == '-' { + out.WriteString("—") + return 2 + } + if len(text) >= 2 && text[1] == '-' { + out.WriteString("–") + return 1 + } + + out.WriteByte(text[0]) + return 0 +} + +func smartAmpVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int { + if bytes.HasPrefix(text, []byte(""")) { + nextChar := byte(0) + if len(text) >= 7 { + nextChar = text[6] + } + if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) { + return 5 + } + } + + if bytes.HasPrefix(text, []byte("�")) { + return 3 + } + + out.WriteByte('&') + return 0 +} + +func smartAmp(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + return smartAmpVariant(out, smrt, previousChar, text, 'd') +} + +func smartAmpAngledQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + return smartAmpVariant(out, smrt, previousChar, text, 'a') +} + +func smartPeriod(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 3 && text[1] == '.' && text[2] == '.' { + out.WriteString("…") + return 2 + } + + if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' { + out.WriteString("…") + return 4 + } + + out.WriteByte(text[0]) + return 0 +} + +func smartBacktick(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if len(text) >= 2 && text[1] == '`' { + nextChar := byte(0) + if len(text) >= 3 { + nextChar = text[2] + } + if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { + return 1 + } + } + + out.WriteByte(text[0]) + return 0 +} + +func smartNumberGeneric(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 { + // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b + // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8) + // and avoid changing dates like 1/23/2005 into fractions. + numEnd := 0 + for len(text) > numEnd && isdigit(text[numEnd]) { + numEnd++ + } + if numEnd == 0 { + out.WriteByte(text[0]) + return 0 + } + denStart := numEnd + 1 + if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 { + denStart = numEnd + 3 + } else if len(text) < numEnd+2 || text[numEnd] != '/' { + out.WriteByte(text[0]) + return 0 + } + denEnd := denStart + for len(text) > denEnd && isdigit(text[denEnd]) { + denEnd++ + } + if denEnd == denStart { + out.WriteByte(text[0]) + return 0 + } + if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' { + out.WriteString("<sup>") + out.Write(text[:numEnd]) + out.WriteString("</sup>⁄<sub>") + out.Write(text[denStart:denEnd]) + out.WriteString("</sub>") + return denEnd - 1 + } + } + + out.WriteByte(text[0]) + return 0 +} + +func smartNumber(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 { + if text[0] == '1' && text[1] == '/' && text[2] == '2' { + if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' { + out.WriteString("½") + return 2 + } + } + + if text[0] == '1' && text[1] == '/' && text[2] == '4' { + if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') { + out.WriteString("¼") + return 2 + } + } + + if text[0] == '3' && text[1] == '/' && text[2] == '4' { + if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') { + out.WriteString("¾") + return 2 + } + } + } + + out.WriteByte(text[0]) + return 0 +} + +func smartDoubleQuoteVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int { + nextChar := byte(0) + if len(text) > 1 { + nextChar = text[1] + } + if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) { + out.WriteString(""") + } + + return 0 +} + +func smartDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'd') +} + +func smartAngledDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'a') +} + +func smartLeftAngle(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + i := 0 + + for i < len(text) && text[i] != '>' { + i++ + } + + out.Write(text[:i+1]) + return i +} + +type smartCallback func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int + +type smartypantsRenderer [256]smartCallback + +func smartypants(flags int) *smartypantsRenderer { + r := new(smartypantsRenderer) + if flags&HTML_SMARTYPANTS_ANGLED_QUOTES == 0 { + r['"'] = smartDoubleQuote + r['&'] = smartAmp + } else { + r['"'] = smartAngledDoubleQuote + r['&'] = smartAmpAngledQuote + } + r['\''] = smartSingleQuote + r['('] = smartParens + if flags&HTML_SMARTYPANTS_DASHES != 0 { + if flags&HTML_SMARTYPANTS_LATEX_DASHES == 0 { + r['-'] = smartDash + } else { + r['-'] = smartDashLatex + } + } + r['.'] = smartPeriod + if flags&HTML_SMARTYPANTS_FRACTIONS == 0 { + r['1'] = smartNumber + r['3'] = smartNumber + } else { + for ch := '1'; ch <= '9'; ch++ { + r[ch] = smartNumberGeneric + } + } + r['<'] = smartLeftAngle + r['`'] = smartBacktick + return r +} diff --git a/vendor/github.com/shurcooL/sanitized_anchor_name/LICENSE b/vendor/github.com/shurcooL/sanitized_anchor_name/LICENSE new file mode 100644 index 00000000..5f4e3ed5 --- /dev/null +++ b/vendor/github.com/shurcooL/sanitized_anchor_name/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Dmitri Shuralyov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/shurcooL/sanitized_anchor_name/main.go b/vendor/github.com/shurcooL/sanitized_anchor_name/main.go new file mode 100644 index 00000000..72a87535 --- /dev/null +++ b/vendor/github.com/shurcooL/sanitized_anchor_name/main.go @@ -0,0 +1,29 @@ +// Package sanitized_anchor_name provides a func to create sanitized anchor names. +// +// Its logic can be reused by multiple packages to create interoperable anchor names +// and links to those anchors. +// +// At this time, it does not try to ensure that generated anchor names +// are unique, that responsibility falls on the caller. +package sanitized_anchor_name // import "github.com/shurcooL/sanitized_anchor_name" + +import "unicode" + +// Create returns a sanitized anchor name for the given text. +func Create(text string) string { + var anchorName []rune + var futureDash = false + for _, r := range []rune(text) { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r): + if futureDash && len(anchorName) > 0 { + anchorName = append(anchorName, '-') + } + futureDash = false + anchorName = append(anchorName, unicode.ToLower(r)) + default: + futureDash = true + } + } + return string(anchorName) +} |