// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package html provides functions for escaping/unescaping HTML text and for parsing HTML entities.
package html
import (
"io"
"strconv"
"strings"
"unicode/utf8"
)
const BadEntity = string(utf8.RuneError)
var htmlEscapeReplacer = strings.NewReplacer(
"&", "&",
"<", "<",
">", ">",
`"`, """,
)
func EscapeString(s string) string {
return htmlEscapeReplacer.Replace(s)
}
func WriteEscapedString(w io.Writer, s string) error {
_, err := htmlEscapeReplacer.WriteString(w, s)
return err
}
func isValidEntityCode(c int64) bool {
switch {
case !utf8.ValidRune(rune(c)):
return false
// never used
case c >= 0xfdd0 && c <= 0xfdef:
return false
case c&0xffff == 0xffff || c&0xffff == 0xfffe:
return false
// control codes
case c >= 0x00 && c <= 0x08:
return false
case c == 0x0b:
return false
case c >= 0x0e && c <= 0x1f:
return false
case c >= 0x7f && c <= 0x9f:
return false
}
return true
}
func letter(b byte) bool { return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' }
func digit(b byte) bool { return b >= '0' && b <= '9' }
func alphanum(b byte) bool { return letter(b) || digit(b) }
func hexDigit(b byte) bool {
return digit(b) || b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F'
}
func ParseEntity(s string) (string, int) {
st := 0
var n int
for i := 1; i < len(s); i++ {
b := s[i]
switch st {
case 0: // initial state
switch {
case b == '#':
st = 1
case letter(b):
n = 1
st = 2
default:
return "", 0
}
case 1: // &#
switch {
case b == 'x' || b == 'X':
st = 3
case digit(b):
n = 1
st = 4
default:
return "", 0
}
case 2: // &q
switch {
case alphanum(b):
n++
if n > 31 {
return "", 0
}
case b == ';':
if e, ok := entities[s[i-n:i]]; ok {
return e, i + 1
}
return "", 0
default:
return "", 0
}
case 3: // &#x
switch {
case hexDigit(b):
n = 1
st = 5
default:
return "", 0
}
case 4: // �
switch {
case digit(b):
n++
if n > 8 {
return "", 0
}
case b == ';':
c, _ := strconv.ParseInt(s[i-n:i], 10, 32)
if !isValidEntityCode(c) {
return BadEntity, i + 1
}
return string(rune(c)), i + 1
default:
return "", 0
}
case 5: // �
switch {
case hexDigit(b):
n++
if n > 8 {
return "", 0
}
case b == ';':
c, err := strconv.ParseInt(s[i-n:i], 16, 32)
if err != nil {
return BadEntity, i + 1
}
if !isValidEntityCode(c) {
return BadEntity, i + 1
}
return string(rune(c)), i + 1
default:
return "", 0
}
}
}
return "", 0
}
func UnescapeString(s string) string {
i := strings.IndexByte(s, '&')
if i < 0 {
return s
}
anyChanges := false
var entityStr string
var entityLen int
for i < len(s) {
if s[i] == '&' {
entityStr, entityLen = ParseEntity(s[i:])
if entityLen > 0 {
anyChanges = true
break
}
}
i++
}
if !anyChanges {
return s
}
buf := make([]byte, len(s)-entityLen+len(entityStr))
copy(buf[:i], s)
n := copy(buf[i:], entityStr)
j := i + n
i += entityLen
for i < len(s) {
b := s[i]
if b == '&' {
entityStr, entityLen = ParseEntity(s[i:])
if entityLen > 0 {
n = copy(buf[j:], entityStr)
j += n
i += entityLen
continue
}
}
buf[j] = b
j++
i++
}
return string(buf[:j])
}