html.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package template
  5. import (
  6. "bytes"
  7. "fmt"
  8. "strings"
  9. "unicode/utf8"
  10. )
  11. // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
  12. func htmlNospaceEscaper(args ...interface{}) string {
  13. s, t := stringify(args...)
  14. if t == contentTypeHTML {
  15. return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
  16. }
  17. return htmlReplacer(s, htmlNospaceReplacementTable, false)
  18. }
  19. // attrEscaper escapes for inclusion in quoted attribute values.
  20. func attrEscaper(args ...interface{}) string {
  21. s, t := stringify(args...)
  22. if t == contentTypeHTML {
  23. return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
  24. }
  25. return htmlReplacer(s, htmlReplacementTable, true)
  26. }
  27. // rcdataEscaper escapes for inclusion in an RCDATA element body.
  28. func rcdataEscaper(args ...interface{}) string {
  29. s, t := stringify(args...)
  30. if t == contentTypeHTML {
  31. return htmlReplacer(s, htmlNormReplacementTable, true)
  32. }
  33. return htmlReplacer(s, htmlReplacementTable, true)
  34. }
  35. // htmlEscaper escapes for inclusion in HTML text.
  36. func htmlEscaper(args ...interface{}) string {
  37. s, t := stringify(args...)
  38. if t == contentTypeHTML {
  39. return s
  40. }
  41. return htmlReplacer(s, htmlReplacementTable, true)
  42. }
  43. // htmlReplacementTable contains the runes that need to be escaped
  44. // inside a quoted attribute value or in a text node.
  45. var htmlReplacementTable = []string{
  46. // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
  47. // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
  48. // CHARACTER character to the current attribute's value.
  49. // "
  50. // and similarly
  51. // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
  52. 0: "\uFFFD",
  53. '"': """,
  54. '&': "&",
  55. '\'': "'",
  56. '+': "+",
  57. '<': "&lt;",
  58. '>': "&gt;",
  59. }
  60. // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
  61. // avoid over-encoding existing entities.
  62. var htmlNormReplacementTable = []string{
  63. 0: "\uFFFD",
  64. '"': "&#34;",
  65. '\'': "&#39;",
  66. '+': "&#43;",
  67. '<': "&lt;",
  68. '>': "&gt;",
  69. }
  70. // htmlNospaceReplacementTable contains the runes that need to be escaped
  71. // inside an unquoted attribute value.
  72. // The set of runes escaped is the union of the HTML specials and
  73. // those determined by running the JS below in browsers:
  74. // <div id=d></div>
  75. // <script>(function () {
  76. // var a = [], d = document.getElementById("d"), i, c, s;
  77. // for (i = 0; i < 0x10000; ++i) {
  78. // c = String.fromCharCode(i);
  79. // d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
  80. // s = d.getElementsByTagName("SPAN")[0];
  81. // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
  82. // }
  83. // document.write(a.join(", "));
  84. // })()</script>
  85. var htmlNospaceReplacementTable = []string{
  86. 0: "&#xfffd;",
  87. '\t': "&#9;",
  88. '\n': "&#10;",
  89. '\v': "&#11;",
  90. '\f': "&#12;",
  91. '\r': "&#13;",
  92. ' ': "&#32;",
  93. '"': "&#34;",
  94. '&': "&amp;",
  95. '\'': "&#39;",
  96. '+': "&#43;",
  97. '<': "&lt;",
  98. '=': "&#61;",
  99. '>': "&gt;",
  100. // A parse error in the attribute value (unquoted) and
  101. // before attribute value states.
  102. // Treated as a quoting character by IE.
  103. '`': "&#96;",
  104. }
  105. // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
  106. // without '&' to avoid over-encoding existing entities.
  107. var htmlNospaceNormReplacementTable = []string{
  108. 0: "&#xfffd;",
  109. '\t': "&#9;",
  110. '\n': "&#10;",
  111. '\v': "&#11;",
  112. '\f': "&#12;",
  113. '\r': "&#13;",
  114. ' ': "&#32;",
  115. '"': "&#34;",
  116. '\'': "&#39;",
  117. '+': "&#43;",
  118. '<': "&lt;",
  119. '=': "&#61;",
  120. '>': "&gt;",
  121. // A parse error in the attribute value (unquoted) and
  122. // before attribute value states.
  123. // Treated as a quoting character by IE.
  124. '`': "&#96;",
  125. }
  126. // htmlReplacer returns s with runes replaced according to replacementTable
  127. // and when badRunes is true, certain bad runes are allowed through unescaped.
  128. func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
  129. written, b := 0, new(bytes.Buffer)
  130. r, w := rune(0), 0
  131. for i := 0; i < len(s); i += w {
  132. // Cannot use 'for range s' because we need to preserve the width
  133. // of the runes in the input. If we see a decoding error, the input
  134. // width will not be utf8.Runelen(r) and we will overrun the buffer.
  135. r, w = utf8.DecodeRuneInString(s[i:])
  136. if int(r) < len(replacementTable) {
  137. if repl := replacementTable[r]; len(repl) != 0 {
  138. b.WriteString(s[written:i])
  139. b.WriteString(repl)
  140. written = i + w
  141. }
  142. } else if badRunes {
  143. // No-op.
  144. // IE does not allow these ranges in unquoted attrs.
  145. } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
  146. fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
  147. written = i + w
  148. }
  149. }
  150. if written == 0 {
  151. return s
  152. }
  153. b.WriteString(s[written:])
  154. return b.String()
  155. }
  156. // stripTags takes a snippet of HTML and returns only the text content.
  157. // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
  158. func stripTags(html string) string {
  159. var b bytes.Buffer
  160. s, c, i, allText := []byte(html), context{}, 0, true
  161. // Using the transition funcs helps us avoid mangling
  162. // `<div title="1>2">` or `I <3 Ponies!`.
  163. for i != len(s) {
  164. if c.delim == delimNone {
  165. st := c.state
  166. // Use RCDATA instead of parsing into JS or CSS styles.
  167. if c.element != elementNone && !isInTag(st) {
  168. st = stateRCDATA
  169. }
  170. d, nread := transitionFunc[st](c, s[i:])
  171. i1 := i + nread
  172. if c.state == stateText || c.state == stateRCDATA {
  173. // Emit text up to the start of the tag or comment.
  174. j := i1
  175. if d.state != c.state {
  176. for j1 := j - 1; j1 >= i; j1-- {
  177. if s[j1] == '<' {
  178. j = j1
  179. break
  180. }
  181. }
  182. }
  183. b.Write(s[i:j])
  184. } else {
  185. allText = false
  186. }
  187. c, i = d, i1
  188. continue
  189. }
  190. i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
  191. if i1 < i {
  192. break
  193. }
  194. if c.delim != delimSpaceOrTagEnd {
  195. // Consume any quote.
  196. i1++
  197. }
  198. c, i = context{state: stateTag, element: c.element}, i1
  199. }
  200. if allText {
  201. return html
  202. } else if c.state == stateText || c.state == stateRCDATA {
  203. b.Write(s[i:])
  204. }
  205. return b.String()
  206. }
  207. // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
  208. // a known-safe HTML attribute.
  209. func htmlNameFilter(args ...interface{}) string {
  210. s, t := stringify(args...)
  211. if t == contentTypeHTMLAttr {
  212. return s
  213. }
  214. if len(s) == 0 {
  215. // Avoid violation of structure preservation.
  216. // <input checked {{.K}}={{.V}}>.
  217. // Without this, if .K is empty then .V is the value of
  218. // checked, but otherwise .V is the value of the attribute
  219. // named .K.
  220. return filterFailsafe
  221. }
  222. s = strings.ToLower(s)
  223. if t := attrType(s); t != contentTypePlain {
  224. // TODO: Split attr and element name part filters so we can whitelist
  225. // attributes.
  226. return filterFailsafe
  227. }
  228. for _, r := range s {
  229. switch {
  230. case '0' <= r && r <= '9':
  231. case 'a' <= r && r <= 'z':
  232. default:
  233. return filterFailsafe
  234. }
  235. }
  236. return s
  237. }
  238. // commentEscaper returns the empty string regardless of input.
  239. // Comment content does not correspond to any parsed structure or
  240. // human-readable content, so the simplest and most secure policy is to drop
  241. // content interpolated into comments.
  242. // This approach is equally valid whether or not static comment content is
  243. // removed from the template.
  244. func commentEscaper(args ...interface{}) string {
  245. return ""
  246. }