diff options
Diffstat (limited to 'vendor/gitlab.com/golang-commonmark/html/html.go')
-rw-r--r-- | vendor/gitlab.com/golang-commonmark/html/html.go | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/vendor/gitlab.com/golang-commonmark/html/html.go b/vendor/gitlab.com/golang-commonmark/html/html.go new file mode 100644 index 00000000..25bf7d82 --- /dev/null +++ b/vendor/gitlab.com/golang-commonmark/html/html.go @@ -0,0 +1,211 @@ +// Copyright 2015 The Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package html provides functions for escaping/unescaping HTML text and for parsing HTML entities. +package html + +import ( + "io" + "strconv" + "strings" + "unicode/utf8" +) + +const BadEntity = string(utf8.RuneError) + +var htmlEscapeReplacer = strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + `"`, """, +) + +func EscapeString(s string) string { + return htmlEscapeReplacer.Replace(s) +} + +func WriteEscapedString(w io.Writer, s string) error { + _, err := htmlEscapeReplacer.WriteString(w, s) + return err +} + +func isValidEntityCode(c int64) bool { + switch { + case !utf8.ValidRune(rune(c)): + return false + + // never used + case c >= 0xfdd0 && c <= 0xfdef: + return false + case c&0xffff == 0xffff || c&0xffff == 0xfffe: + return false + // control codes + case c >= 0x00 && c <= 0x08: + return false + case c == 0x0b: + return false + case c >= 0x0e && c <= 0x1f: + return false + case c >= 0x7f && c <= 0x9f: + return false + } + + return true +} + +func letter(b byte) bool { return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' } + +func digit(b byte) bool { return b >= '0' && b <= '9' } + +func alphanum(b byte) bool { return letter(b) || digit(b) } + +func hexDigit(b byte) bool { + return digit(b) || b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F' +} + +func ParseEntity(s string) (string, int) { + st := 0 + var n int + + for i := 1; i < len(s); i++ { + b := s[i] + + switch st { + case 0: // initial state + switch { + case b == '#': + st = 1 + case letter(b): + n = 1 + st = 2 + default: + return "", 0 + } + + case 1: // &# + switch { + case b == 'x' || b == 'X': + st = 3 + case digit(b): + n = 1 + st = 4 + default: + return "", 0 + } + + case 2: // &q + switch { + case alphanum(b): + n++ + if n > 31 { + return "", 0 + } + case b == ';': + if e, ok := entities[s[i-n:i]]; ok { + return e, i + 1 + } + return "", 0 + default: + return "", 0 + } + + case 3: // &#x + switch { + case hexDigit(b): + n = 1 + st = 5 + default: + return "", 0 + } + + case 4: // � + switch { + case digit(b): + n++ + if n > 8 { + return "", 0 + } + case b == ';': + c, _ := strconv.ParseInt(s[i-n:i], 10, 32) + if !isValidEntityCode(c) { + return BadEntity, i + 1 + } + return string(rune(c)), i + 1 + default: + return "", 0 + } + + case 5: // � + switch { + case hexDigit(b): + n++ + if n > 8 { + return "", 0 + } + case b == ';': + c, err := strconv.ParseInt(s[i-n:i], 16, 32) + if err != nil { + return BadEntity, i + 1 + } + if !isValidEntityCode(c) { + return BadEntity, i + 1 + } + return string(rune(c)), i + 1 + default: + return "", 0 + } + } + } + + return "", 0 +} + +func UnescapeString(s string) string { + i := strings.IndexByte(s, '&') + if i < 0 { + return s + } + + anyChanges := false + var entityStr string + var entityLen int + for i < len(s) { + if s[i] == '&' { + entityStr, entityLen = ParseEntity(s[i:]) + if entityLen > 0 { + anyChanges = true + break + } + } + i++ + } + + if !anyChanges { + return s + } + + buf := make([]byte, len(s)-entityLen+len(entityStr)) + copy(buf[:i], s) + n := copy(buf[i:], entityStr) + j := i + n + i += entityLen + for i < len(s) { + b := s[i] + if b == '&' { + entityStr, entityLen = ParseEntity(s[i:]) + if entityLen > 0 { + n = copy(buf[j:], entityStr) + j += n + i += entityLen + continue + } + } + + buf[j] = b + j++ + i++ + } + + return string(buf[:j]) +} |