123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package template
- import (
- "bytes"
- "fmt"
- "strings"
- "unicode/utf8"
- )
- // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
- func htmlNospaceEscaper(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeHTML {
- return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
- }
- return htmlReplacer(s, htmlNospaceReplacementTable, false)
- }
- // attrEscaper escapes for inclusion in quoted attribute values.
- func attrEscaper(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeHTML {
- return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
- }
- return htmlReplacer(s, htmlReplacementTable, true)
- }
- // rcdataEscaper escapes for inclusion in an RCDATA element body.
- func rcdataEscaper(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeHTML {
- return htmlReplacer(s, htmlNormReplacementTable, true)
- }
- return htmlReplacer(s, htmlReplacementTable, true)
- }
- // htmlEscaper escapes for inclusion in HTML text.
- func htmlEscaper(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeHTML {
- return s
- }
- return htmlReplacer(s, htmlReplacementTable, true)
- }
- // htmlReplacementTable contains the runes that need to be escaped
- // inside a quoted attribute value or in a text node.
- var htmlReplacementTable = []string{
- // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
- // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
- // CHARACTER character to the current attribute's value.
- // "
- // and similarly
- // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
- 0: "\uFFFD",
- '"': """,
- '&': "&",
- '\'': "'",
- '+': "+",
- '<': "<",
- '>': ">",
- }
- // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
- // avoid over-encoding existing entities.
- var htmlNormReplacementTable = []string{
- 0: "\uFFFD",
- '"': """,
- '\'': "'",
- '+': "+",
- '<': "<",
- '>': ">",
- }
- // htmlNospaceReplacementTable contains the runes that need to be escaped
- // inside an unquoted attribute value.
- // The set of runes escaped is the union of the HTML specials and
- // those determined by running the JS below in browsers:
- // <div id=d></div>
- // <script>(function () {
- // var a = [], d = document.getElementById("d"), i, c, s;
- // for (i = 0; i < 0x10000; ++i) {
- // c = String.fromCharCode(i);
- // d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
- // s = d.getElementsByTagName("SPAN")[0];
- // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
- // }
- // document.write(a.join(", "));
- // })()</script>
- var htmlNospaceReplacementTable = []string{
- 0: "�",
- '\t': "	",
- '\n': " ",
- '\v': "",
- '\f': "",
- '\r': " ",
- ' ': " ",
- '"': """,
- '&': "&",
- '\'': "'",
- '+': "+",
- '<': "<",
- '=': "=",
- '>': ">",
- // A parse error in the attribute value (unquoted) and
- // before attribute value states.
- // Treated as a quoting character by IE.
- '`': "`",
- }
- // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
- // without '&' to avoid over-encoding existing entities.
- var htmlNospaceNormReplacementTable = []string{
- 0: "�",
- '\t': "	",
- '\n': " ",
- '\v': "",
- '\f': "",
- '\r': " ",
- ' ': " ",
- '"': """,
- '\'': "'",
- '+': "+",
- '<': "<",
- '=': "=",
- '>': ">",
- // A parse error in the attribute value (unquoted) and
- // before attribute value states.
- // Treated as a quoting character by IE.
- '`': "`",
- }
- // htmlReplacer returns s with runes replaced according to replacementTable
- // and when badRunes is true, certain bad runes are allowed through unescaped.
- func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
- written, b := 0, new(bytes.Buffer)
- r, w := rune(0), 0
- for i := 0; i < len(s); i += w {
- // Cannot use 'for range s' because we need to preserve the width
- // of the runes in the input. If we see a decoding error, the input
- // width will not be utf8.Runelen(r) and we will overrun the buffer.
- r, w = utf8.DecodeRuneInString(s[i:])
- if int(r) < len(replacementTable) {
- if repl := replacementTable[r]; len(repl) != 0 {
- b.WriteString(s[written:i])
- b.WriteString(repl)
- written = i + w
- }
- } else if badRunes {
- // No-op.
- // IE does not allow these ranges in unquoted attrs.
- } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
- fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
- written = i + w
- }
- }
- if written == 0 {
- return s
- }
- b.WriteString(s[written:])
- return b.String()
- }
- // stripTags takes a snippet of HTML and returns only the text content.
- // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `.
- func stripTags(html string) string {
- var b bytes.Buffer
- s, c, i, allText := []byte(html), context{}, 0, true
- // Using the transition funcs helps us avoid mangling
- // `<div title="1>2">` or `I <3 Ponies!`.
- for i != len(s) {
- if c.delim == delimNone {
- st := c.state
- // Use RCDATA instead of parsing into JS or CSS styles.
- if c.element != elementNone && !isInTag(st) {
- st = stateRCDATA
- }
- d, nread := transitionFunc[st](c, s[i:])
- i1 := i + nread
- if c.state == stateText || c.state == stateRCDATA {
- // Emit text up to the start of the tag or comment.
- j := i1
- if d.state != c.state {
- for j1 := j - 1; j1 >= i; j1-- {
- if s[j1] == '<' {
- j = j1
- break
- }
- }
- }
- b.Write(s[i:j])
- } else {
- allText = false
- }
- c, i = d, i1
- continue
- }
- i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
- if i1 < i {
- break
- }
- if c.delim != delimSpaceOrTagEnd {
- // Consume any quote.
- i1++
- }
- c, i = context{state: stateTag, element: c.element}, i1
- }
- if allText {
- return html
- } else if c.state == stateText || c.state == stateRCDATA {
- b.Write(s[i:])
- }
- return b.String()
- }
- // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
- // a known-safe HTML attribute.
- func htmlNameFilter(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeHTMLAttr {
- return s
- }
- if len(s) == 0 {
- // Avoid violation of structure preservation.
- // <input checked {{.K}}={{.V}}>.
- // Without this, if .K is empty then .V is the value of
- // checked, but otherwise .V is the value of the attribute
- // named .K.
- return filterFailsafe
- }
- s = strings.ToLower(s)
- if t := attrType(s); t != contentTypePlain {
- // TODO: Split attr and element name part filters so we can whitelist
- // attributes.
- return filterFailsafe
- }
- for _, r := range s {
- switch {
- case '0' <= r && r <= '9':
- case 'a' <= r && r <= 'z':
- default:
- return filterFailsafe
- }
- }
- return s
- }
- // commentEscaper returns the empty string regardless of input.
- // Comment content does not correspond to any parsed structure or
- // human-readable content, so the simplest and most secure policy is to drop
- // content interpolated into comments.
- // This approach is equally valid whether or not static comment content is
- // removed from the template.
- func commentEscaper(args ...interface{}) string {
- return ""
- }
|