escape.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package template
  5. import (
  6. "bytes"
  7. "fmt"
  8. "html"
  9. "io"
  10. "text/template"
  11. "text/template/parse"
  12. )
  13. // escapeTemplate rewrites the named template, which must be
  14. // associated with t, to guarantee that the output of any of the named
  15. // templates is properly escaped. If no error is returned, then the named templates have
  16. // been modified. Otherwise the named templates have been rendered
  17. // unusable.
  18. func escapeTemplate(tmpl *Template, node parse.Node, name string) error {
  19. c, _ := tmpl.esc.escapeTree(context{}, node, name, 0)
  20. var err error
  21. if c.err != nil {
  22. err, c.err.Name = c.err, name
  23. } else if c.state != stateText {
  24. err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
  25. }
  26. if err != nil {
  27. // Prevent execution of unsafe templates.
  28. if t := tmpl.set[name]; t != nil {
  29. t.escapeErr = err
  30. t.text.Tree = nil
  31. t.Tree = nil
  32. }
  33. return err
  34. }
  35. tmpl.esc.commit()
  36. if t := tmpl.set[name]; t != nil {
  37. t.escapeErr = escapeOK
  38. t.Tree = t.text.Tree
  39. }
  40. return nil
  41. }
  42. // evalArgs formats the list of arguments into a string. It is equivalent to
  43. // fmt.Sprint(args...), except that it deferences all pointers.
  44. func evalArgs(args ...interface{}) string {
  45. // Optimization for simple common case of a single string argument.
  46. if len(args) == 1 {
  47. if s, ok := args[0].(string); ok {
  48. return s
  49. }
  50. }
  51. for i, arg := range args {
  52. args[i] = indirectToStringerOrError(arg)
  53. }
  54. return fmt.Sprint(args...)
  55. }
  56. // funcMap maps command names to functions that render their inputs safe.
  57. var funcMap = template.FuncMap{
  58. "_html_template_attrescaper": attrEscaper,
  59. "_html_template_commentescaper": commentEscaper,
  60. "_html_template_cssescaper": cssEscaper,
  61. "_html_template_cssvaluefilter": cssValueFilter,
  62. "_html_template_htmlnamefilter": htmlNameFilter,
  63. "_html_template_htmlescaper": htmlEscaper,
  64. "_html_template_jsregexpescaper": jsRegexpEscaper,
  65. "_html_template_jsstrescaper": jsStrEscaper,
  66. "_html_template_jsvalescaper": jsValEscaper,
  67. "_html_template_nospaceescaper": htmlNospaceEscaper,
  68. "_html_template_rcdataescaper": rcdataEscaper,
  69. "_html_template_urlescaper": urlEscaper,
  70. "_html_template_urlfilter": urlFilter,
  71. "_html_template_urlnormalizer": urlNormalizer,
  72. "_eval_args_": evalArgs,
  73. }
  74. // escaper collects type inferences about templates and changes needed to make
  75. // templates injection safe.
  76. type escaper struct {
  77. // ns is the nameSpace that this escaper is associated with.
  78. ns *nameSpace
  79. // output[templateName] is the output context for a templateName that
  80. // has been mangled to include its input context.
  81. output map[string]context
  82. // derived[c.mangle(name)] maps to a template derived from the template
  83. // named name templateName for the start context c.
  84. derived map[string]*template.Template
  85. // called[templateName] is a set of called mangled template names.
  86. called map[string]bool
  87. // xxxNodeEdits are the accumulated edits to apply during commit.
  88. // Such edits are not applied immediately in case a template set
  89. // executes a given template in different escaping contexts.
  90. actionNodeEdits map[*parse.ActionNode][]string
  91. templateNodeEdits map[*parse.TemplateNode]string
  92. textNodeEdits map[*parse.TextNode][]byte
  93. }
  94. // makeEscaper creates a blank escaper for the given set.
  95. func makeEscaper(n *nameSpace) escaper {
  96. return escaper{
  97. n,
  98. map[string]context{},
  99. map[string]*template.Template{},
  100. map[string]bool{},
  101. map[*parse.ActionNode][]string{},
  102. map[*parse.TemplateNode]string{},
  103. map[*parse.TextNode][]byte{},
  104. }
  105. }
  106. // filterFailsafe is an innocuous word that is emitted in place of unsafe values
  107. // by sanitizer functions. It is not a keyword in any programming language,
  108. // contains no special characters, is not empty, and when it appears in output
  109. // it is distinct enough that a developer can find the source of the problem
  110. // via a search engine.
  111. const filterFailsafe = "ZgotmplZ"
  112. // escape escapes a template node.
  113. func (e *escaper) escape(c context, n parse.Node) context {
  114. switch n := n.(type) {
  115. case *parse.ActionNode:
  116. return e.escapeAction(c, n)
  117. case *parse.IfNode:
  118. return e.escapeBranch(c, &n.BranchNode, "if")
  119. case *parse.ListNode:
  120. return e.escapeList(c, n)
  121. case *parse.RangeNode:
  122. return e.escapeBranch(c, &n.BranchNode, "range")
  123. case *parse.TemplateNode:
  124. return e.escapeTemplate(c, n)
  125. case *parse.TextNode:
  126. return e.escapeText(c, n)
  127. case *parse.WithNode:
  128. return e.escapeBranch(c, &n.BranchNode, "with")
  129. }
  130. panic("escaping " + n.String() + " is unimplemented")
  131. }
  132. // escapeAction escapes an action template node.
  133. func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
  134. if len(n.Pipe.Decl) != 0 {
  135. // A local variable assignment, not an interpolation.
  136. return c
  137. }
  138. c = nudge(c)
  139. // Check for disallowed use of predefined escapers in the pipeline.
  140. for pos, idNode := range n.Pipe.Cmds {
  141. node, ok := idNode.Args[0].(*parse.IdentifierNode)
  142. if !ok {
  143. // A predefined escaper "esc" will never be found as an identifier in a
  144. // Chain or Field node, since:
  145. // - "esc.x ..." is invalid, since predefined escapers return strings, and
  146. // strings do not have methods, keys or fields.
  147. // - "... .esc" is invalid, since predefined escapers are global functions,
  148. // not methods or fields of any types.
  149. // Therefore, it is safe to ignore these two node types.
  150. continue
  151. }
  152. ident := node.Ident
  153. if _, ok := predefinedEscapers[ident]; ok {
  154. if pos < len(n.Pipe.Cmds)-1 ||
  155. c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
  156. return context{
  157. state: stateError,
  158. err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
  159. }
  160. }
  161. }
  162. }
  163. s := make([]string, 0, 3)
  164. switch c.state {
  165. case stateError:
  166. return c
  167. case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
  168. switch c.urlPart {
  169. case urlPartNone:
  170. s = append(s, "_html_template_urlfilter")
  171. fallthrough
  172. case urlPartPreQuery:
  173. switch c.state {
  174. case stateCSSDqStr, stateCSSSqStr:
  175. s = append(s, "_html_template_cssescaper")
  176. default:
  177. s = append(s, "_html_template_urlnormalizer")
  178. }
  179. case urlPartQueryOrFrag:
  180. s = append(s, "_html_template_urlescaper")
  181. case urlPartUnknown:
  182. return context{
  183. state: stateError,
  184. err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
  185. }
  186. default:
  187. panic(c.urlPart.String())
  188. }
  189. case stateJS:
  190. s = append(s, "_html_template_jsvalescaper")
  191. // A slash after a value starts a div operator.
  192. c.jsCtx = jsCtxDivOp
  193. case stateJSDqStr, stateJSSqStr:
  194. s = append(s, "_html_template_jsstrescaper")
  195. case stateJSRegexp:
  196. s = append(s, "_html_template_jsregexpescaper")
  197. case stateCSS:
  198. s = append(s, "_html_template_cssvaluefilter")
  199. case stateText:
  200. s = append(s, "_html_template_htmlescaper")
  201. case stateRCDATA:
  202. s = append(s, "_html_template_rcdataescaper")
  203. case stateAttr:
  204. // Handled below in delim check.
  205. case stateAttrName, stateTag:
  206. c.state = stateAttrName
  207. s = append(s, "_html_template_htmlnamefilter")
  208. default:
  209. if isComment(c.state) {
  210. s = append(s, "_html_template_commentescaper")
  211. } else {
  212. panic("unexpected state " + c.state.String())
  213. }
  214. }
  215. switch c.delim {
  216. case delimNone:
  217. // No extra-escaping needed for raw text content.
  218. case delimSpaceOrTagEnd:
  219. s = append(s, "_html_template_nospaceescaper")
  220. default:
  221. s = append(s, "_html_template_attrescaper")
  222. }
  223. e.editActionNode(n, s)
  224. return c
  225. }
  226. // ensurePipelineContains ensures that the pipeline ends with the commands with
  227. // the identifiers in s in order. If the pipeline ends with a predefined escaper
  228. // (i.e. "html" or "urlquery"), merge it with the identifiers in s.
  229. func ensurePipelineContains(p *parse.PipeNode, s []string) {
  230. if len(s) == 0 {
  231. // Do not rewrite pipeline if we have no escapers to insert.
  232. return
  233. }
  234. // Precondition: p.Cmds contains at most one predefined escaper and the
  235. // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
  236. // always true because of the checks in escapeAction.
  237. pipelineLen := len(p.Cmds)
  238. if pipelineLen > 0 {
  239. lastCmd := p.Cmds[pipelineLen-1]
  240. if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
  241. if esc := idNode.Ident; predefinedEscapers[esc] {
  242. // Pipeline ends with a predefined escaper.
  243. if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
  244. // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
  245. // where esc is the predefined escaper, and arg1...argN are its arguments.
  246. // Convert this into the equivalent form
  247. // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
  248. // merged with the escapers in s.
  249. lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
  250. p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
  251. pipelineLen++
  252. }
  253. // If any of the commands in s that we are about to insert is equivalent
  254. // to the predefined escaper, use the predefined escaper instead.
  255. dup := false
  256. for i, escaper := range s {
  257. if escFnsEq(esc, escaper) {
  258. s[i] = idNode.Ident
  259. dup = true
  260. }
  261. }
  262. if dup {
  263. // The predefined escaper will already be inserted along with the
  264. // escapers in s, so do not copy it to the rewritten pipeline.
  265. pipelineLen--
  266. }
  267. }
  268. }
  269. }
  270. // Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
  271. newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
  272. copy(newCmds, p.Cmds)
  273. for _, name := range s {
  274. newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
  275. }
  276. p.Cmds = newCmds
  277. }
  278. // predefinedEscapers contains template predefined escapers that are equivalent
  279. // to some contextual escapers. Keep in sync with equivEscapers.
  280. var predefinedEscapers = map[string]bool{
  281. "html": true,
  282. "urlquery": true,
  283. }
  284. // equivEscapers matches contextual escapers to equivalent predefined
  285. // template escapers.
  286. var equivEscapers = map[string]string{
  287. // The following pairs of HTML escapers provide equivalent security
  288. // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'.
  289. "_html_template_attrescaper": "html",
  290. "_html_template_htmlescaper": "html",
  291. "_html_template_rcdataescaper": "html",
  292. // These two URL escapers produce URLs safe for embedding in a URL query by
  293. // percent-encoding all the reserved characters specified in RFC 3986 Section
  294. // 2.2
  295. "_html_template_urlescaper": "urlquery",
  296. // These two functions are not actually equivalent; urlquery is stricter as it
  297. // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer
  298. // does not. It is therefore only safe to replace _html_template_urlnormalizer
  299. // with urlquery (this happens in ensurePipelineContains), but not the otherI've
  300. // way around. We keep this entry around to preserve the behavior of templates
  301. // written before Go 1.9, which might depend on this substitution taking place.
  302. "_html_template_urlnormalizer": "urlquery",
  303. }
  304. // escFnsEq reports whether the two escaping functions are equivalent.
  305. func escFnsEq(a, b string) bool {
  306. if e := equivEscapers[a]; e != "" {
  307. a = e
  308. }
  309. if e := equivEscapers[b]; e != "" {
  310. b = e
  311. }
  312. return a == b
  313. }
  314. // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
  315. // for all x.
  316. var redundantFuncs = map[string]map[string]bool{
  317. "_html_template_commentescaper": {
  318. "_html_template_attrescaper": true,
  319. "_html_template_nospaceescaper": true,
  320. "_html_template_htmlescaper": true,
  321. },
  322. "_html_template_cssescaper": {
  323. "_html_template_attrescaper": true,
  324. },
  325. "_html_template_jsregexpescaper": {
  326. "_html_template_attrescaper": true,
  327. },
  328. "_html_template_jsstrescaper": {
  329. "_html_template_attrescaper": true,
  330. },
  331. "_html_template_urlescaper": {
  332. "_html_template_urlnormalizer": true,
  333. },
  334. }
  335. // appendCmd appends the given command to the end of the command pipeline
  336. // unless it is redundant with the last command.
  337. func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
  338. if n := len(cmds); n != 0 {
  339. last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
  340. next, okNext := cmd.Args[0].(*parse.IdentifierNode)
  341. if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
  342. return cmds
  343. }
  344. }
  345. return append(cmds, cmd)
  346. }
  347. // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found.
  348. func indexOfStr(s string, strs []string, eq func(a, b string) bool) int {
  349. for i, t := range strs {
  350. if eq(s, t) {
  351. return i
  352. }
  353. }
  354. return -1
  355. }
  356. // newIdentCmd produces a command containing a single identifier node.
  357. func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
  358. return &parse.CommandNode{
  359. NodeType: parse.NodeCommand,
  360. Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
  361. }
  362. }
  363. // nudge returns the context that would result from following empty string
  364. // transitions from the input context.
  365. // For example, parsing:
  366. // `<a href=`
  367. // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
  368. // `<a href=x`
  369. // will end in context{stateURL, delimSpaceOrTagEnd, ...}.
  370. // There are two transitions that happen when the 'x' is seen:
  371. // (1) Transition from a before-value state to a start-of-value state without
  372. // consuming any character.
  373. // (2) Consume 'x' and transition past the first value character.
  374. // In this case, nudging produces the context after (1) happens.
  375. func nudge(c context) context {
  376. switch c.state {
  377. case stateTag:
  378. // In `<foo {{.}}`, the action should emit an attribute.
  379. c.state = stateAttrName
  380. case stateBeforeValue:
  381. // In `<foo bar={{.}}`, the action is an undelimited value.
  382. c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
  383. case stateAfterName:
  384. // In `<foo bar {{.}}`, the action is an attribute name.
  385. c.state, c.attr = stateAttrName, attrNone
  386. }
  387. return c
  388. }
  389. // join joins the two contexts of a branch template node. The result is an
  390. // error context if either of the input contexts are error contexts, or if the
  391. // the input contexts differ.
  392. func join(a, b context, node parse.Node, nodeName string) context {
  393. if a.state == stateError {
  394. return a
  395. }
  396. if b.state == stateError {
  397. return b
  398. }
  399. if a.eq(b) {
  400. return a
  401. }
  402. c := a
  403. c.urlPart = b.urlPart
  404. if c.eq(b) {
  405. // The contexts differ only by urlPart.
  406. c.urlPart = urlPartUnknown
  407. return c
  408. }
  409. c = a
  410. c.jsCtx = b.jsCtx
  411. if c.eq(b) {
  412. // The contexts differ only by jsCtx.
  413. c.jsCtx = jsCtxUnknown
  414. return c
  415. }
  416. // Allow a nudged context to join with an unnudged one.
  417. // This means that
  418. // <p title={{if .C}}{{.}}{{end}}
  419. // ends in an unquoted value state even though the else branch
  420. // ends in stateBeforeValue.
  421. if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
  422. if e := join(c, d, node, nodeName); e.state != stateError {
  423. return e
  424. }
  425. }
  426. return context{
  427. state: stateError,
  428. err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
  429. }
  430. }
  431. // escapeBranch escapes a branch template node: "if", "range" and "with".
  432. func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
  433. c0 := e.escapeList(c, n.List)
  434. if nodeName == "range" && c0.state != stateError {
  435. // The "true" branch of a "range" node can execute multiple times.
  436. // We check that executing n.List once results in the same context
  437. // as executing n.List twice.
  438. c1, _ := e.escapeListConditionally(c0, n.List, nil)
  439. c0 = join(c0, c1, n, nodeName)
  440. if c0.state == stateError {
  441. // Make clear that this is a problem on loop re-entry
  442. // since developers tend to overlook that branch when
  443. // debugging templates.
  444. c0.err.Line = n.Line
  445. c0.err.Description = "on range loop re-entry: " + c0.err.Description
  446. return c0
  447. }
  448. }
  449. c1 := e.escapeList(c, n.ElseList)
  450. return join(c0, c1, n, nodeName)
  451. }
  452. // escapeList escapes a list template node.
  453. func (e *escaper) escapeList(c context, n *parse.ListNode) context {
  454. if n == nil {
  455. return c
  456. }
  457. for _, m := range n.Nodes {
  458. c = e.escape(c, m)
  459. }
  460. return c
  461. }
  462. // escapeListConditionally escapes a list node but only preserves edits and
  463. // inferences in e if the inferences and output context satisfy filter.
  464. // It returns the best guess at an output context, and the result of the filter
  465. // which is the same as whether e was updated.
  466. func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
  467. e1 := makeEscaper(e.ns)
  468. // Make type inferences available to f.
  469. for k, v := range e.output {
  470. e1.output[k] = v
  471. }
  472. c = e1.escapeList(c, n)
  473. ok := filter != nil && filter(&e1, c)
  474. if ok {
  475. // Copy inferences and edits from e1 back into e.
  476. for k, v := range e1.output {
  477. e.output[k] = v
  478. }
  479. for k, v := range e1.derived {
  480. e.derived[k] = v
  481. }
  482. for k, v := range e1.called {
  483. e.called[k] = v
  484. }
  485. for k, v := range e1.actionNodeEdits {
  486. e.editActionNode(k, v)
  487. }
  488. for k, v := range e1.templateNodeEdits {
  489. e.editTemplateNode(k, v)
  490. }
  491. for k, v := range e1.textNodeEdits {
  492. e.editTextNode(k, v)
  493. }
  494. }
  495. return c, ok
  496. }
  497. // escapeTemplate escapes a {{template}} call node.
  498. func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
  499. c, name := e.escapeTree(c, n, n.Name, n.Line)
  500. if name != n.Name {
  501. e.editTemplateNode(n, name)
  502. }
  503. return c
  504. }
  505. // escapeTree escapes the named template starting in the given context as
  506. // necessary and returns its output context.
  507. func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) {
  508. // Mangle the template name with the input context to produce a reliable
  509. // identifier.
  510. dname := c.mangle(name)
  511. e.called[dname] = true
  512. if out, ok := e.output[dname]; ok {
  513. // Already escaped.
  514. return out, dname
  515. }
  516. t := e.template(name)
  517. if t == nil {
  518. // Two cases: The template exists but is empty, or has never been mentioned at
  519. // all. Distinguish the cases in the error messages.
  520. if e.ns.set[name] != nil {
  521. return context{
  522. state: stateError,
  523. err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name),
  524. }, dname
  525. }
  526. return context{
  527. state: stateError,
  528. err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name),
  529. }, dname
  530. }
  531. if dname != name {
  532. // Use any template derived during an earlier call to escapeTemplate
  533. // with different top level templates, or clone if necessary.
  534. dt := e.template(dname)
  535. if dt == nil {
  536. dt = template.New(dname)
  537. dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()}
  538. e.derived[dname] = dt
  539. }
  540. t = dt
  541. }
  542. return e.computeOutCtx(c, t), dname
  543. }
  544. // computeOutCtx takes a template and its start context and computes the output
  545. // context while storing any inferences in e.
  546. func (e *escaper) computeOutCtx(c context, t *template.Template) context {
  547. // Propagate context over the body.
  548. c1, ok := e.escapeTemplateBody(c, t)
  549. if !ok {
  550. // Look for a fixed point by assuming c1 as the output context.
  551. if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
  552. c1, ok = c2, true
  553. }
  554. // Use c1 as the error context if neither assumption worked.
  555. }
  556. if !ok && c1.state != stateError {
  557. return context{
  558. state: stateError,
  559. err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()),
  560. }
  561. }
  562. return c1
  563. }
  564. // escapeTemplateBody escapes the given template assuming the given output
  565. // context, and returns the best guess at the output context and whether the
  566. // assumption was correct.
  567. func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
  568. filter := func(e1 *escaper, c1 context) bool {
  569. if c1.state == stateError {
  570. // Do not update the input escaper, e.
  571. return false
  572. }
  573. if !e1.called[t.Name()] {
  574. // If t is not recursively called, then c1 is an
  575. // accurate output context.
  576. return true
  577. }
  578. // c1 is accurate if it matches our assumed output context.
  579. return c.eq(c1)
  580. }
  581. // We need to assume an output context so that recursive template calls
  582. // take the fast path out of escapeTree instead of infinitely recursing.
  583. // Naively assuming that the input context is the same as the output
  584. // works >90% of the time.
  585. e.output[t.Name()] = c
  586. return e.escapeListConditionally(c, t.Tree.Root, filter)
  587. }
  588. // delimEnds maps each delim to a string of characters that terminate it.
  589. var delimEnds = [...]string{
  590. delimDoubleQuote: `"`,
  591. delimSingleQuote: "'",
  592. // Determined empirically by running the below in various browsers.
  593. // var div = document.createElement("DIV");
  594. // for (var i = 0; i < 0x10000; ++i) {
  595. // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
  596. // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
  597. // document.write("<p>U+" + i.toString(16));
  598. // }
  599. delimSpaceOrTagEnd: " \t\n\f\r>",
  600. }
  601. var doctypeBytes = []byte("<!DOCTYPE")
  602. // escapeText escapes a text template node.
  603. func (e *escaper) escapeText(c context, n *parse.TextNode) context {
  604. s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
  605. for i != len(s) {
  606. c1, nread := contextAfterText(c, s[i:])
  607. i1 := i + nread
  608. if c.state == stateText || c.state == stateRCDATA {
  609. end := i1
  610. if c1.state != c.state {
  611. for j := end - 1; j >= i; j-- {
  612. if s[j] == '<' {
  613. end = j
  614. break
  615. }
  616. }
  617. }
  618. for j := i; j < end; j++ {
  619. if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
  620. b.Write(s[written:j])
  621. b.WriteString("&lt;")
  622. written = j + 1
  623. }
  624. }
  625. } else if isComment(c.state) && c.delim == delimNone {
  626. switch c.state {
  627. case stateJSBlockCmt:
  628. // http://es5.github.com/#x7.4:
  629. // "Comments behave like white space and are
  630. // discarded except that, if a MultiLineComment
  631. // contains a line terminator character, then
  632. // the entire comment is considered to be a
  633. // LineTerminator for purposes of parsing by
  634. // the syntactic grammar."
  635. if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
  636. b.WriteByte('\n')
  637. } else {
  638. b.WriteByte(' ')
  639. }
  640. case stateCSSBlockCmt:
  641. b.WriteByte(' ')
  642. }
  643. written = i1
  644. }
  645. if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
  646. // Preserve the portion between written and the comment start.
  647. cs := i1 - 2
  648. if c1.state == stateHTMLCmt {
  649. // "<!--" instead of "/*" or "//"
  650. cs -= 2
  651. }
  652. b.Write(s[written:cs])
  653. written = i1
  654. }
  655. if i == i1 && c.state == c1.state {
  656. panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
  657. }
  658. c, i = c1, i1
  659. }
  660. if written != 0 && c.state != stateError {
  661. if !isComment(c.state) || c.delim != delimNone {
  662. b.Write(n.Text[written:])
  663. }
  664. e.editTextNode(n, b.Bytes())
  665. }
  666. return c
  667. }
  668. // contextAfterText starts in context c, consumes some tokens from the front of
  669. // s, then returns the context after those tokens and the unprocessed suffix.
  670. func contextAfterText(c context, s []byte) (context, int) {
  671. if c.delim == delimNone {
  672. c1, i := tSpecialTagEnd(c, s)
  673. if i == 0 {
  674. // A special end tag (`</script>`) has been seen and
  675. // all content preceding it has been consumed.
  676. return c1, 0
  677. }
  678. // Consider all content up to any end tag.
  679. return transitionFunc[c.state](c, s[:i])
  680. }
  681. // We are at the beginning of an attribute value.
  682. i := bytes.IndexAny(s, delimEnds[c.delim])
  683. if i == -1 {
  684. i = len(s)
  685. }
  686. if c.delim == delimSpaceOrTagEnd {
  687. // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
  688. // lists the runes below as error characters.
  689. // Error out because HTML parsers may differ on whether
  690. // "<a id= onclick=f(" ends inside id's or onclick's value,
  691. // "<a class=`foo " ends inside a value,
  692. // "<a style=font:'Arial'" needs open-quote fixup.
  693. // IE treats '`' as a quotation character.
  694. if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
  695. return context{
  696. state: stateError,
  697. err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
  698. }, len(s)
  699. }
  700. }
  701. if i == len(s) {
  702. // Remain inside the attribute.
  703. // Decode the value so non-HTML rules can easily handle
  704. // <button onclick="alert(&quot;Hi!&quot;)">
  705. // without having to entity decode token boundaries.
  706. for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
  707. c1, i1 := transitionFunc[c.state](c, u)
  708. c, u = c1, u[i1:]
  709. }
  710. return c, len(s)
  711. }
  712. element := c.element
  713. // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
  714. if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
  715. element = elementNone
  716. }
  717. if c.delim != delimSpaceOrTagEnd {
  718. // Consume any quote.
  719. i++
  720. }
  721. // On exiting an attribute, we discard all state information
  722. // except the state and element.
  723. return context{state: stateTag, element: element}, i
  724. }
  725. // editActionNode records a change to an action pipeline for later commit.
  726. func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
  727. if _, ok := e.actionNodeEdits[n]; ok {
  728. panic(fmt.Sprintf("node %s shared between templates", n))
  729. }
  730. e.actionNodeEdits[n] = cmds
  731. }
  732. // editTemplateNode records a change to a {{template}} callee for later commit.
  733. func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
  734. if _, ok := e.templateNodeEdits[n]; ok {
  735. panic(fmt.Sprintf("node %s shared between templates", n))
  736. }
  737. e.templateNodeEdits[n] = callee
  738. }
  739. // editTextNode records a change to a text node for later commit.
  740. func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
  741. if _, ok := e.textNodeEdits[n]; ok {
  742. panic(fmt.Sprintf("node %s shared between templates", n))
  743. }
  744. e.textNodeEdits[n] = text
  745. }
  746. // commit applies changes to actions and template calls needed to contextually
  747. // autoescape content and adds any derived templates to the set.
  748. func (e *escaper) commit() {
  749. for name := range e.output {
  750. e.template(name).Funcs(funcMap)
  751. }
  752. // Any template from the name space associated with this escaper can be used
  753. // to add derived templates to the underlying text/template name space.
  754. tmpl := e.arbitraryTemplate()
  755. for _, t := range e.derived {
  756. if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
  757. panic("error adding derived template")
  758. }
  759. }
  760. for n, s := range e.actionNodeEdits {
  761. ensurePipelineContains(n.Pipe, s)
  762. }
  763. for n, name := range e.templateNodeEdits {
  764. n.Name = name
  765. }
  766. for n, s := range e.textNodeEdits {
  767. n.Text = s
  768. }
  769. // Reset state that is specific to this commit so that the same changes are
  770. // not re-applied to the template on subsequent calls to commit.
  771. e.called = make(map[string]bool)
  772. e.actionNodeEdits = make(map[*parse.ActionNode][]string)
  773. e.templateNodeEdits = make(map[*parse.TemplateNode]string)
  774. e.textNodeEdits = make(map[*parse.TextNode][]byte)
  775. }
  776. // template returns the named template given a mangled template name.
  777. func (e *escaper) template(name string) *template.Template {
  778. // Any template from the name space associated with this escaper can be used
  779. // to look up templates in the underlying text/template name space.
  780. t := e.arbitraryTemplate().text.Lookup(name)
  781. if t == nil {
  782. t = e.derived[name]
  783. }
  784. return t
  785. }
  786. // arbitraryTemplate returns an arbitrary template from the name space
  787. // associated with e and panics if no templates are found.
  788. func (e *escaper) arbitraryTemplate() *Template {
  789. for _, t := range e.ns.set {
  790. return t
  791. }
  792. panic("no templates in name space")
  793. }
  794. // Forwarding functions so that clients need only import this package
  795. // to reach the general escaping functions of text/template.
  796. // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
  797. func HTMLEscape(w io.Writer, b []byte) {
  798. template.HTMLEscape(w, b)
  799. }
  800. // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
  801. func HTMLEscapeString(s string) string {
  802. return template.HTMLEscapeString(s)
  803. }
  804. // HTMLEscaper returns the escaped HTML equivalent of the textual
  805. // representation of its arguments.
  806. func HTMLEscaper(args ...interface{}) string {
  807. return template.HTMLEscaper(args...)
  808. }
  809. // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
  810. func JSEscape(w io.Writer, b []byte) {
  811. template.JSEscape(w, b)
  812. }
  813. // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
  814. func JSEscapeString(s string) string {
  815. return template.JSEscapeString(s)
  816. }
  817. // JSEscaper returns the escaped JavaScript equivalent of the textual
  818. // representation of its arguments.
  819. func JSEscaper(args ...interface{}) string {
  820. return template.JSEscaper(args...)
  821. }
  822. // URLQueryEscaper returns the escaped value of the textual representation of
  823. // its arguments in a form suitable for embedding in a URL query.
  824. func URLQueryEscaper(args ...interface{}) string {
  825. return template.URLQueryEscaper(args...)
  826. }