transition.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package template
  5. import (
  6. "bytes"
  7. "strings"
  8. )
  9. // transitionFunc is the array of context transition functions for text nodes.
  10. // A transition function takes a context and template text input, and returns
  11. // the updated context and the number of bytes consumed from the front of the
  12. // input.
  13. var transitionFunc = [...]func(context, []byte) (context, int){
  14. stateText: tText,
  15. stateTag: tTag,
  16. stateAttrName: tAttrName,
  17. stateAfterName: tAfterName,
  18. stateBeforeValue: tBeforeValue,
  19. stateHTMLCmt: tHTMLCmt,
  20. stateRCDATA: tSpecialTagEnd,
  21. stateAttr: tAttr,
  22. stateURL: tURL,
  23. stateJS: tJS,
  24. stateJSDqStr: tJSDelimited,
  25. stateJSSqStr: tJSDelimited,
  26. stateJSRegexp: tJSDelimited,
  27. stateJSBlockCmt: tBlockCmt,
  28. stateJSLineCmt: tLineCmt,
  29. stateCSS: tCSS,
  30. stateCSSDqStr: tCSSStr,
  31. stateCSSSqStr: tCSSStr,
  32. stateCSSDqURL: tCSSStr,
  33. stateCSSSqURL: tCSSStr,
  34. stateCSSURL: tCSSStr,
  35. stateCSSBlockCmt: tBlockCmt,
  36. stateCSSLineCmt: tLineCmt,
  37. stateError: tError,
  38. }
  39. var commentStart = []byte("<!--")
  40. var commentEnd = []byte("-->")
  41. // tText is the context transition function for the text state.
  42. func tText(c context, s []byte) (context, int) {
  43. k := 0
  44. for {
  45. i := k + bytes.IndexByte(s[k:], '<')
  46. if i < k || i+1 == len(s) {
  47. return c, len(s)
  48. } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
  49. return context{state: stateHTMLCmt}, i + 4
  50. }
  51. i++
  52. end := false
  53. if s[i] == '/' {
  54. if i+1 == len(s) {
  55. return c, len(s)
  56. }
  57. end, i = true, i+1
  58. }
  59. j, e := eatTagName(s, i)
  60. if j != i {
  61. if end {
  62. e = elementNone
  63. }
  64. // We've found an HTML tag.
  65. return context{state: stateTag, element: e}, j
  66. }
  67. k = j
  68. }
  69. }
  70. var elementContentType = [...]state{
  71. elementNone: stateText,
  72. elementScript: stateJS,
  73. elementStyle: stateCSS,
  74. elementTextarea: stateRCDATA,
  75. elementTitle: stateRCDATA,
  76. }
  77. // tTag is the context transition function for the tag state.
  78. func tTag(c context, s []byte) (context, int) {
  79. // Find the attribute name.
  80. i := eatWhiteSpace(s, 0)
  81. if i == len(s) {
  82. return c, len(s)
  83. }
  84. if s[i] == '>' {
  85. return context{
  86. state: elementContentType[c.element],
  87. element: c.element,
  88. }, i + 1
  89. }
  90. j, err := eatAttrName(s, i)
  91. if err != nil {
  92. return context{state: stateError, err: err}, len(s)
  93. }
  94. state, attr := stateTag, attrNone
  95. if i == j {
  96. return context{
  97. state: stateError,
  98. err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
  99. }, len(s)
  100. }
  101. attrName := strings.ToLower(string(s[i:j]))
  102. if c.element == elementScript && attrName == "type" {
  103. attr = attrScriptType
  104. } else {
  105. switch attrType(attrName) {
  106. case contentTypeURL:
  107. attr = attrURL
  108. case contentTypeCSS:
  109. attr = attrStyle
  110. case contentTypeJS:
  111. attr = attrScript
  112. }
  113. }
  114. if j == len(s) {
  115. state = stateAttrName
  116. } else {
  117. state = stateAfterName
  118. }
  119. return context{state: state, element: c.element, attr: attr}, j
  120. }
  121. // tAttrName is the context transition function for stateAttrName.
  122. func tAttrName(c context, s []byte) (context, int) {
  123. i, err := eatAttrName(s, 0)
  124. if err != nil {
  125. return context{state: stateError, err: err}, len(s)
  126. } else if i != len(s) {
  127. c.state = stateAfterName
  128. }
  129. return c, i
  130. }
  131. // tAfterName is the context transition function for stateAfterName.
  132. func tAfterName(c context, s []byte) (context, int) {
  133. // Look for the start of the value.
  134. i := eatWhiteSpace(s, 0)
  135. if i == len(s) {
  136. return c, len(s)
  137. } else if s[i] != '=' {
  138. // Occurs due to tag ending '>', and valueless attribute.
  139. c.state = stateTag
  140. return c, i
  141. }
  142. c.state = stateBeforeValue
  143. // Consume the "=".
  144. return c, i + 1
  145. }
  146. var attrStartStates = [...]state{
  147. attrNone: stateAttr,
  148. attrScript: stateJS,
  149. attrScriptType: stateAttr,
  150. attrStyle: stateCSS,
  151. attrURL: stateURL,
  152. }
  153. // tBeforeValue is the context transition function for stateBeforeValue.
  154. func tBeforeValue(c context, s []byte) (context, int) {
  155. i := eatWhiteSpace(s, 0)
  156. if i == len(s) {
  157. return c, len(s)
  158. }
  159. // Find the attribute delimiter.
  160. delim := delimSpaceOrTagEnd
  161. switch s[i] {
  162. case '\'':
  163. delim, i = delimSingleQuote, i+1
  164. case '"':
  165. delim, i = delimDoubleQuote, i+1
  166. }
  167. c.state, c.delim = attrStartStates[c.attr], delim
  168. return c, i
  169. }
  170. // tHTMLCmt is the context transition function for stateHTMLCmt.
  171. func tHTMLCmt(c context, s []byte) (context, int) {
  172. if i := bytes.Index(s, commentEnd); i != -1 {
  173. return context{}, i + 3
  174. }
  175. return c, len(s)
  176. }
  177. // specialTagEndMarkers maps element types to the character sequence that
  178. // case-insensitively signals the end of the special tag body.
  179. var specialTagEndMarkers = [...][]byte{
  180. elementScript: []byte("script"),
  181. elementStyle: []byte("style"),
  182. elementTextarea: []byte("textarea"),
  183. elementTitle: []byte("title"),
  184. }
  185. var (
  186. specialTagEndPrefix = []byte("</")
  187. tagEndSeparators = []byte("> \t\n\f/")
  188. )
  189. // tSpecialTagEnd is the context transition function for raw text and RCDATA
  190. // element states.
  191. func tSpecialTagEnd(c context, s []byte) (context, int) {
  192. if c.element != elementNone {
  193. if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
  194. return context{}, i
  195. }
  196. }
  197. return c, len(s)
  198. }
  199. // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
  200. func indexTagEnd(s []byte, tag []byte) int {
  201. res := 0
  202. plen := len(specialTagEndPrefix)
  203. for len(s) > 0 {
  204. // Try to find the tag end prefix first
  205. i := bytes.Index(s, specialTagEndPrefix)
  206. if i == -1 {
  207. return i
  208. }
  209. s = s[i+plen:]
  210. // Try to match the actual tag if there is still space for it
  211. if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
  212. s = s[len(tag):]
  213. // Check the tag is followed by a proper separator
  214. if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
  215. return res + i
  216. }
  217. res += len(tag)
  218. }
  219. res += i + plen
  220. }
  221. return -1
  222. }
  223. // tAttr is the context transition function for the attribute state.
  224. func tAttr(c context, s []byte) (context, int) {
  225. return c, len(s)
  226. }
  227. // tURL is the context transition function for the URL state.
  228. func tURL(c context, s []byte) (context, int) {
  229. if bytes.ContainsAny(s, "#?") {
  230. c.urlPart = urlPartQueryOrFrag
  231. } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
  232. // HTML5 uses "Valid URL potentially surrounded by spaces" for
  233. // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
  234. c.urlPart = urlPartPreQuery
  235. }
  236. return c, len(s)
  237. }
  238. // tJS is the context transition function for the JS state.
  239. func tJS(c context, s []byte) (context, int) {
  240. i := bytes.IndexAny(s, `"'/`)
  241. if i == -1 {
  242. // Entire input is non string, comment, regexp tokens.
  243. c.jsCtx = nextJSCtx(s, c.jsCtx)
  244. return c, len(s)
  245. }
  246. c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
  247. switch s[i] {
  248. case '"':
  249. c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
  250. case '\'':
  251. c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
  252. case '/':
  253. switch {
  254. case i+1 < len(s) && s[i+1] == '/':
  255. c.state, i = stateJSLineCmt, i+1
  256. case i+1 < len(s) && s[i+1] == '*':
  257. c.state, i = stateJSBlockCmt, i+1
  258. case c.jsCtx == jsCtxRegexp:
  259. c.state = stateJSRegexp
  260. case c.jsCtx == jsCtxDivOp:
  261. c.jsCtx = jsCtxRegexp
  262. default:
  263. return context{
  264. state: stateError,
  265. err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
  266. }, len(s)
  267. }
  268. default:
  269. panic("unreachable")
  270. }
  271. return c, i + 1
  272. }
  273. // tJSDelimited is the context transition function for the JS string and regexp
  274. // states.
  275. func tJSDelimited(c context, s []byte) (context, int) {
  276. specials := `\"`
  277. switch c.state {
  278. case stateJSSqStr:
  279. specials = `\'`
  280. case stateJSRegexp:
  281. specials = `\/[]`
  282. }
  283. k, inCharset := 0, false
  284. for {
  285. i := k + bytes.IndexAny(s[k:], specials)
  286. if i < k {
  287. break
  288. }
  289. switch s[i] {
  290. case '\\':
  291. i++
  292. if i == len(s) {
  293. return context{
  294. state: stateError,
  295. err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
  296. }, len(s)
  297. }
  298. case '[':
  299. inCharset = true
  300. case ']':
  301. inCharset = false
  302. default:
  303. // end delimiter
  304. if !inCharset {
  305. c.state, c.jsCtx = stateJS, jsCtxDivOp
  306. return c, i + 1
  307. }
  308. }
  309. k = i + 1
  310. }
  311. if inCharset {
  312. // This can be fixed by making context richer if interpolation
  313. // into charsets is desired.
  314. return context{
  315. state: stateError,
  316. err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
  317. }, len(s)
  318. }
  319. return c, len(s)
  320. }
  321. var blockCommentEnd = []byte("*/")
  322. // tBlockCmt is the context transition function for /*comment*/ states.
  323. func tBlockCmt(c context, s []byte) (context, int) {
  324. i := bytes.Index(s, blockCommentEnd)
  325. if i == -1 {
  326. return c, len(s)
  327. }
  328. switch c.state {
  329. case stateJSBlockCmt:
  330. c.state = stateJS
  331. case stateCSSBlockCmt:
  332. c.state = stateCSS
  333. default:
  334. panic(c.state.String())
  335. }
  336. return c, i + 2
  337. }
  338. // tLineCmt is the context transition function for //comment states.
  339. func tLineCmt(c context, s []byte) (context, int) {
  340. var lineTerminators string
  341. var endState state
  342. switch c.state {
  343. case stateJSLineCmt:
  344. lineTerminators, endState = "\n\r\u2028\u2029", stateJS
  345. case stateCSSLineCmt:
  346. lineTerminators, endState = "\n\f\r", stateCSS
  347. // Line comments are not part of any published CSS standard but
  348. // are supported by the 4 major browsers.
  349. // This defines line comments as
  350. // LINECOMMENT ::= "//" [^\n\f\d]*
  351. // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
  352. // newlines:
  353. // nl ::= #xA | #xD #xA | #xD | #xC
  354. default:
  355. panic(c.state.String())
  356. }
  357. i := bytes.IndexAny(s, lineTerminators)
  358. if i == -1 {
  359. return c, len(s)
  360. }
  361. c.state = endState
  362. // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
  363. // "However, the LineTerminator at the end of the line is not
  364. // considered to be part of the single-line comment; it is
  365. // recognized separately by the lexical grammar and becomes part
  366. // of the stream of input elements for the syntactic grammar."
  367. return c, i
  368. }
  369. // tCSS is the context transition function for the CSS state.
  370. func tCSS(c context, s []byte) (context, int) {
  371. // CSS quoted strings are almost never used except for:
  372. // (1) URLs as in background: "/foo.png"
  373. // (2) Multiword font-names as in font-family: "Times New Roman"
  374. // (3) List separators in content values as in inline-lists:
  375. // <style>
  376. // ul.inlineList { list-style: none; padding:0 }
  377. // ul.inlineList > li { display: inline }
  378. // ul.inlineList > li:before { content: ", " }
  379. // ul.inlineList > li:first-child:before { content: "" }
  380. // </style>
  381. // <ul class=inlineList><li>One<li>Two<li>Three</ul>
  382. // (4) Attribute value selectors as in a[href="http://example.com/"]
  383. //
  384. // We conservatively treat all strings as URLs, but make some
  385. // allowances to avoid confusion.
  386. //
  387. // In (1), our conservative assumption is justified.
  388. // In (2), valid font names do not contain ':', '?', or '#', so our
  389. // conservative assumption is fine since we will never transition past
  390. // urlPartPreQuery.
  391. // In (3), our protocol heuristic should not be tripped, and there
  392. // should not be non-space content after a '?' or '#', so as long as
  393. // we only %-encode RFC 3986 reserved characters we are ok.
  394. // In (4), we should URL escape for URL attributes, and for others we
  395. // have the attribute name available if our conservative assumption
  396. // proves problematic for real code.
  397. k := 0
  398. for {
  399. i := k + bytes.IndexAny(s[k:], `("'/`)
  400. if i < k {
  401. return c, len(s)
  402. }
  403. switch s[i] {
  404. case '(':
  405. // Look for url to the left.
  406. p := bytes.TrimRight(s[:i], "\t\n\f\r ")
  407. if endsWithCSSKeyword(p, "url") {
  408. j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
  409. switch {
  410. case j != len(s) && s[j] == '"':
  411. c.state, j = stateCSSDqURL, j+1
  412. case j != len(s) && s[j] == '\'':
  413. c.state, j = stateCSSSqURL, j+1
  414. default:
  415. c.state = stateCSSURL
  416. }
  417. return c, j
  418. }
  419. case '/':
  420. if i+1 < len(s) {
  421. switch s[i+1] {
  422. case '/':
  423. c.state = stateCSSLineCmt
  424. return c, i + 2
  425. case '*':
  426. c.state = stateCSSBlockCmt
  427. return c, i + 2
  428. }
  429. }
  430. case '"':
  431. c.state = stateCSSDqStr
  432. return c, i + 1
  433. case '\'':
  434. c.state = stateCSSSqStr
  435. return c, i + 1
  436. }
  437. k = i + 1
  438. }
  439. }
  440. // tCSSStr is the context transition function for the CSS string and URL states.
  441. func tCSSStr(c context, s []byte) (context, int) {
  442. var endAndEsc string
  443. switch c.state {
  444. case stateCSSDqStr, stateCSSDqURL:
  445. endAndEsc = `\"`
  446. case stateCSSSqStr, stateCSSSqURL:
  447. endAndEsc = `\'`
  448. case stateCSSURL:
  449. // Unquoted URLs end with a newline or close parenthesis.
  450. // The below includes the wc (whitespace character) and nl.
  451. endAndEsc = "\\\t\n\f\r )"
  452. default:
  453. panic(c.state.String())
  454. }
  455. k := 0
  456. for {
  457. i := k + bytes.IndexAny(s[k:], endAndEsc)
  458. if i < k {
  459. c, nread := tURL(c, decodeCSS(s[k:]))
  460. return c, k + nread
  461. }
  462. if s[i] == '\\' {
  463. i++
  464. if i == len(s) {
  465. return context{
  466. state: stateError,
  467. err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
  468. }, len(s)
  469. }
  470. } else {
  471. c.state = stateCSS
  472. return c, i + 1
  473. }
  474. c, _ = tURL(c, decodeCSS(s[:i+1]))
  475. k = i + 1
  476. }
  477. }
  478. // tError is the context transition function for the error state.
  479. func tError(c context, s []byte) (context, int) {
  480. return c, len(s)
  481. }
  482. // eatAttrName returns the largest j such that s[i:j] is an attribute name.
  483. // It returns an error if s[i:] does not look like it begins with an
  484. // attribute name, such as encountering a quote mark without a preceding
  485. // equals sign.
  486. func eatAttrName(s []byte, i int) (int, *Error) {
  487. for j := i; j < len(s); j++ {
  488. switch s[j] {
  489. case ' ', '\t', '\n', '\f', '\r', '=', '>':
  490. return j, nil
  491. case '\'', '"', '<':
  492. // These result in a parse warning in HTML5 and are
  493. // indicative of serious problems if seen in an attr
  494. // name in a template.
  495. return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
  496. default:
  497. // No-op.
  498. }
  499. }
  500. return len(s), nil
  501. }
  502. var elementNameMap = map[string]element{
  503. "script": elementScript,
  504. "style": elementStyle,
  505. "textarea": elementTextarea,
  506. "title": elementTitle,
  507. }
  508. // asciiAlpha reports whether c is an ASCII letter.
  509. func asciiAlpha(c byte) bool {
  510. return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
  511. }
  512. // asciiAlphaNum reports whether c is an ASCII letter or digit.
  513. func asciiAlphaNum(c byte) bool {
  514. return asciiAlpha(c) || '0' <= c && c <= '9'
  515. }
  516. // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
  517. func eatTagName(s []byte, i int) (int, element) {
  518. if i == len(s) || !asciiAlpha(s[i]) {
  519. return i, elementNone
  520. }
  521. j := i + 1
  522. for j < len(s) {
  523. x := s[j]
  524. if asciiAlphaNum(x) {
  525. j++
  526. continue
  527. }
  528. // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
  529. if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
  530. j += 2
  531. continue
  532. }
  533. break
  534. }
  535. return j, elementNameMap[strings.ToLower(string(s[i:j]))]
  536. }
  537. // eatWhiteSpace returns the largest j such that s[i:j] is white space.
  538. func eatWhiteSpace(s []byte, i int) int {
  539. for j := i; j < len(s); j++ {
  540. switch s[j] {
  541. case ' ', '\t', '\n', '\f', '\r':
  542. // No-op.
  543. default:
  544. return j
  545. }
  546. }
  547. return len(s)
  548. }