js.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package template
  5. import (
  6. "bytes"
  7. "encoding/json"
  8. "fmt"
  9. "reflect"
  10. "strings"
  11. "unicode/utf8"
  12. )
  13. // nextJSCtx returns the context that determines whether a slash after the
  14. // given run of tokens starts a regular expression instead of a division
  15. // operator: / or /=.
  16. //
  17. // This assumes that the token run does not include any string tokens, comment
  18. // tokens, regular expression literal tokens, or division operators.
  19. //
  20. // This fails on some valid but nonsensical JavaScript programs like
  21. // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
  22. // fail on any known useful programs. It is based on the draft
  23. // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
  24. // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
  25. func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
  26. s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
  27. if len(s) == 0 {
  28. return preceding
  29. }
  30. // All cases below are in the single-byte UTF-8 group.
  31. switch c, n := s[len(s)-1], len(s); c {
  32. case '+', '-':
  33. // ++ and -- are not regexp preceders, but + and - are whether
  34. // they are used as infix or prefix operators.
  35. start := n - 1
  36. // Count the number of adjacent dashes or pluses.
  37. for start > 0 && s[start-1] == c {
  38. start--
  39. }
  40. if (n-start)&1 == 1 {
  41. // Reached for trailing minus signs since "---" is the
  42. // same as "-- -".
  43. return jsCtxRegexp
  44. }
  45. return jsCtxDivOp
  46. case '.':
  47. // Handle "42."
  48. if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
  49. return jsCtxDivOp
  50. }
  51. return jsCtxRegexp
  52. // Suffixes for all punctuators from section 7.7 of the language spec
  53. // that only end binary operators not handled above.
  54. case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
  55. return jsCtxRegexp
  56. // Suffixes for all punctuators from section 7.7 of the language spec
  57. // that are prefix operators not handled above.
  58. case '!', '~':
  59. return jsCtxRegexp
  60. // Matches all the punctuators from section 7.7 of the language spec
  61. // that are open brackets not handled above.
  62. case '(', '[':
  63. return jsCtxRegexp
  64. // Matches all the punctuators from section 7.7 of the language spec
  65. // that precede expression starts.
  66. case ':', ';', '{':
  67. return jsCtxRegexp
  68. // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
  69. // are handled in the default except for '}' which can precede a
  70. // division op as in
  71. // ({ valueOf: function () { return 42 } } / 2
  72. // which is valid, but, in practice, developers don't divide object
  73. // literals, so our heuristic works well for code like
  74. // function () { ... } /foo/.test(x) && sideEffect();
  75. // The ')' punctuator can precede a regular expression as in
  76. // if (b) /foo/.test(x) && ...
  77. // but this is much less likely than
  78. // (a + b) / c
  79. case '}':
  80. return jsCtxRegexp
  81. default:
  82. // Look for an IdentifierName and see if it is a keyword that
  83. // can precede a regular expression.
  84. j := n
  85. for j > 0 && isJSIdentPart(rune(s[j-1])) {
  86. j--
  87. }
  88. if regexpPrecederKeywords[string(s[j:])] {
  89. return jsCtxRegexp
  90. }
  91. }
  92. // Otherwise is a punctuator not listed above, or
  93. // a string which precedes a div op, or an identifier
  94. // which precedes a div op.
  95. return jsCtxDivOp
  96. }
  97. // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
  98. // regular expression in JS source.
  99. var regexpPrecederKeywords = map[string]bool{
  100. "break": true,
  101. "case": true,
  102. "continue": true,
  103. "delete": true,
  104. "do": true,
  105. "else": true,
  106. "finally": true,
  107. "in": true,
  108. "instanceof": true,
  109. "return": true,
  110. "throw": true,
  111. "try": true,
  112. "typeof": true,
  113. "void": true,
  114. }
  115. var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
  116. // indirectToJSONMarshaler returns the value, after dereferencing as many times
  117. // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
  118. func indirectToJSONMarshaler(a interface{}) interface{} {
  119. v := reflect.ValueOf(a)
  120. for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
  121. v = v.Elem()
  122. }
  123. return v.Interface()
  124. }
  125. // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
  126. // neither side-effects nor free variables outside (NaN, Infinity).
  127. func jsValEscaper(args ...interface{}) string {
  128. var a interface{}
  129. if len(args) == 1 {
  130. a = indirectToJSONMarshaler(args[0])
  131. switch t := a.(type) {
  132. case JS:
  133. return string(t)
  134. case JSStr:
  135. // TODO: normalize quotes.
  136. return `"` + string(t) + `"`
  137. case json.Marshaler:
  138. // Do not treat as a Stringer.
  139. case fmt.Stringer:
  140. a = t.String()
  141. }
  142. } else {
  143. for i, arg := range args {
  144. args[i] = indirectToJSONMarshaler(arg)
  145. }
  146. a = fmt.Sprint(args...)
  147. }
  148. // TODO: detect cycles before calling Marshal which loops infinitely on
  149. // cyclic data. This may be an unacceptable DoS risk.
  150. b, err := json.Marshal(a)
  151. if err != nil {
  152. // Put a space before comment so that if it is flush against
  153. // a division operator it is not turned into a line comment:
  154. // x/{{y}}
  155. // turning into
  156. // x//* error marshaling y:
  157. // second line of error message */null
  158. return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
  159. }
  160. // TODO: maybe post-process output to prevent it from containing
  161. // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
  162. // in case custom marshalers produce output containing those.
  163. // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
  164. if len(b) == 0 {
  165. // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
  166. // not cause the output `x=y/*z`.
  167. return " null "
  168. }
  169. first, _ := utf8.DecodeRune(b)
  170. last, _ := utf8.DecodeLastRune(b)
  171. var buf bytes.Buffer
  172. // Prevent IdentifierNames and NumericLiterals from running into
  173. // keywords: in, instanceof, typeof, void
  174. pad := isJSIdentPart(first) || isJSIdentPart(last)
  175. if pad {
  176. buf.WriteByte(' ')
  177. }
  178. written := 0
  179. // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
  180. // so it falls within the subset of JSON which is valid JS.
  181. for i := 0; i < len(b); {
  182. rune, n := utf8.DecodeRune(b[i:])
  183. repl := ""
  184. if rune == 0x2028 {
  185. repl = `\u2028`
  186. } else if rune == 0x2029 {
  187. repl = `\u2029`
  188. }
  189. if repl != "" {
  190. buf.Write(b[written:i])
  191. buf.WriteString(repl)
  192. written = i + n
  193. }
  194. i += n
  195. }
  196. if buf.Len() != 0 {
  197. buf.Write(b[written:])
  198. if pad {
  199. buf.WriteByte(' ')
  200. }
  201. b = buf.Bytes()
  202. }
  203. return string(b)
  204. }
  205. // jsStrEscaper produces a string that can be included between quotes in
  206. // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
  207. // or in an HTML5 event handler attribute such as onclick.
  208. func jsStrEscaper(args ...interface{}) string {
  209. s, t := stringify(args...)
  210. if t == contentTypeJSStr {
  211. return replace(s, jsStrNormReplacementTable)
  212. }
  213. return replace(s, jsStrReplacementTable)
  214. }
  215. // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
  216. // specials so the result is treated literally when included in a regular
  217. // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
  218. // the literal text of {{.X}} followed by the string "bar".
  219. func jsRegexpEscaper(args ...interface{}) string {
  220. s, _ := stringify(args...)
  221. s = replace(s, jsRegexpReplacementTable)
  222. if s == "" {
  223. // /{{.X}}/ should not produce a line comment when .X == "".
  224. return "(?:)"
  225. }
  226. return s
  227. }
  228. // replace replaces each rune r of s with replacementTable[r], provided that
  229. // r < len(replacementTable). If replacementTable[r] is the empty string then
  230. // no replacement is made.
  231. // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
  232. // `\u2029`.
  233. func replace(s string, replacementTable []string) string {
  234. var b bytes.Buffer
  235. r, w, written := rune(0), 0, 0
  236. for i := 0; i < len(s); i += w {
  237. // See comment in htmlEscaper.
  238. r, w = utf8.DecodeRuneInString(s[i:])
  239. var repl string
  240. switch {
  241. case int(r) < len(replacementTable) && replacementTable[r] != "":
  242. repl = replacementTable[r]
  243. case r == '\u2028':
  244. repl = `\u2028`
  245. case r == '\u2029':
  246. repl = `\u2029`
  247. default:
  248. continue
  249. }
  250. b.WriteString(s[written:i])
  251. b.WriteString(repl)
  252. written = i + w
  253. }
  254. if written == 0 {
  255. return s
  256. }
  257. b.WriteString(s[written:])
  258. return b.String()
  259. }
  260. var jsStrReplacementTable = []string{
  261. 0: `\0`,
  262. '\t': `\t`,
  263. '\n': `\n`,
  264. '\v': `\x0b`, // "\v" == "v" on IE 6.
  265. '\f': `\f`,
  266. '\r': `\r`,
  267. // Encode HTML specials as hex so the output can be embedded
  268. // in HTML attributes without further encoding.
  269. '"': `\x22`,
  270. '&': `\x26`,
  271. '\'': `\x27`,
  272. '+': `\x2b`,
  273. '/': `\/`,
  274. '<': `\x3c`,
  275. '>': `\x3e`,
  276. '\\': `\\`,
  277. }
  278. // jsStrNormReplacementTable is like jsStrReplacementTable but does not
  279. // overencode existing escapes since this table has no entry for `\`.
  280. var jsStrNormReplacementTable = []string{
  281. 0: `\0`,
  282. '\t': `\t`,
  283. '\n': `\n`,
  284. '\v': `\x0b`, // "\v" == "v" on IE 6.
  285. '\f': `\f`,
  286. '\r': `\r`,
  287. // Encode HTML specials as hex so the output can be embedded
  288. // in HTML attributes without further encoding.
  289. '"': `\x22`,
  290. '&': `\x26`,
  291. '\'': `\x27`,
  292. '+': `\x2b`,
  293. '/': `\/`,
  294. '<': `\x3c`,
  295. '>': `\x3e`,
  296. }
  297. var jsRegexpReplacementTable = []string{
  298. 0: `\0`,
  299. '\t': `\t`,
  300. '\n': `\n`,
  301. '\v': `\x0b`, // "\v" == "v" on IE 6.
  302. '\f': `\f`,
  303. '\r': `\r`,
  304. // Encode HTML specials as hex so the output can be embedded
  305. // in HTML attributes without further encoding.
  306. '"': `\x22`,
  307. '$': `\$`,
  308. '&': `\x26`,
  309. '\'': `\x27`,
  310. '(': `\(`,
  311. ')': `\)`,
  312. '*': `\*`,
  313. '+': `\x2b`,
  314. '-': `\-`,
  315. '.': `\.`,
  316. '/': `\/`,
  317. '<': `\x3c`,
  318. '>': `\x3e`,
  319. '?': `\?`,
  320. '[': `\[`,
  321. '\\': `\\`,
  322. ']': `\]`,
  323. '^': `\^`,
  324. '{': `\{`,
  325. '|': `\|`,
  326. '}': `\}`,
  327. }
  328. // isJSIdentPart reports whether the given rune is a JS identifier part.
  329. // It does not handle all the non-Latin letters, joiners, and combining marks,
  330. // but it does handle every codepoint that can occur in a numeric literal or
  331. // a keyword.
  332. func isJSIdentPart(r rune) bool {
  333. switch {
  334. case r == '$':
  335. return true
  336. case '0' <= r && r <= '9':
  337. return true
  338. case 'A' <= r && r <= 'Z':
  339. return true
  340. case r == '_':
  341. return true
  342. case 'a' <= r && r <= 'z':
  343. return true
  344. }
  345. return false
  346. }
  347. // isJSType returns true if the given MIME type should be considered JavaScript.
  348. //
  349. // It is used to determine whether a script tag with a type attribute is a javascript container.
  350. func isJSType(mimeType string) bool {
  351. // per
  352. // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
  353. // https://tools.ietf.org/html/rfc7231#section-3.1.1
  354. // https://tools.ietf.org/html/rfc4329#section-3
  355. // https://www.ietf.org/rfc/rfc4627.txt
  356. mimeType = strings.ToLower(mimeType)
  357. // discard parameters
  358. if i := strings.Index(mimeType, ";"); i >= 0 {
  359. mimeType = mimeType[:i]
  360. }
  361. mimeType = strings.TrimSpace(mimeType)
  362. switch mimeType {
  363. case
  364. "application/ecmascript",
  365. "application/javascript",
  366. "application/json",
  367. "application/x-ecmascript",
  368. "application/x-javascript",
  369. "text/ecmascript",
  370. "text/javascript",
  371. "text/javascript1.0",
  372. "text/javascript1.1",
  373. "text/javascript1.2",
  374. "text/javascript1.3",
  375. "text/javascript1.4",
  376. "text/javascript1.5",
  377. "text/jscript",
  378. "text/livescript",
  379. "text/x-ecmascript",
  380. "text/x-javascript":
  381. return true
  382. default:
  383. return false
  384. }
  385. }