123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package template
- import (
- "bytes"
- "fmt"
- "unicode"
- "unicode/utf8"
- )
- // endsWithCSSKeyword reports whether b ends with an ident that
- // case-insensitively matches the lower-case kw.
- func endsWithCSSKeyword(b []byte, kw string) bool {
- i := len(b) - len(kw)
- if i < 0 {
- // Too short.
- return false
- }
- if i != 0 {
- r, _ := utf8.DecodeLastRune(b[:i])
- if isCSSNmchar(r) {
- // Too long.
- return false
- }
- }
- // Many CSS keywords, such as "!important" can have characters encoded,
- // but the URI production does not allow that according to
- // http://www.w3.org/TR/css3-syntax/#TOK-URI
- // This does not attempt to recognize encoded keywords. For example,
- // given "\75\72\6c" and "url" this return false.
- return string(bytes.ToLower(b[i:])) == kw
- }
- // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
- func isCSSNmchar(r rune) bool {
- // Based on the CSS3 nmchar production but ignores multi-rune escape
- // sequences.
- // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
- return 'a' <= r && r <= 'z' ||
- 'A' <= r && r <= 'Z' ||
- '0' <= r && r <= '9' ||
- r == '-' ||
- r == '_' ||
- // Non-ASCII cases below.
- 0x80 <= r && r <= 0xd7ff ||
- 0xe000 <= r && r <= 0xfffd ||
- 0x10000 <= r && r <= 0x10ffff
- }
- // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
- // If there is no change, it returns the input, otherwise it returns a slice
- // backed by a new array.
- // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
- func decodeCSS(s []byte) []byte {
- i := bytes.IndexByte(s, '\\')
- if i == -1 {
- return s
- }
- // The UTF-8 sequence for a codepoint is never longer than 1 + the
- // number hex digits need to represent that codepoint, so len(s) is an
- // upper bound on the output length.
- b := make([]byte, 0, len(s))
- for len(s) != 0 {
- i := bytes.IndexByte(s, '\\')
- if i == -1 {
- i = len(s)
- }
- b, s = append(b, s[:i]...), s[i:]
- if len(s) < 2 {
- break
- }
- // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
- // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
- if isHex(s[1]) {
- // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
- // unicode ::= '\' [0-9a-fA-F]{1,6} wc?
- j := 2
- for j < len(s) && j < 7 && isHex(s[j]) {
- j++
- }
- r := hexDecode(s[1:j])
- if r > unicode.MaxRune {
- r, j = r/16, j-1
- }
- n := utf8.EncodeRune(b[len(b):cap(b)], r)
- // The optional space at the end allows a hex
- // sequence to be followed by a literal hex.
- // string(decodeCSS([]byte(`\A B`))) == "\nB"
- b, s = b[:len(b)+n], skipCSSSpace(s[j:])
- } else {
- // `\\` decodes to `\` and `\"` to `"`.
- _, n := utf8.DecodeRune(s[1:])
- b, s = append(b, s[1:1+n]...), s[1+n:]
- }
- }
- return b
- }
- // isHex reports whether the given character is a hex digit.
- func isHex(c byte) bool {
- return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
- }
- // hexDecode decodes a short hex digit sequence: "10" -> 16.
- func hexDecode(s []byte) rune {
- n := '\x00'
- for _, c := range s {
- n <<= 4
- switch {
- case '0' <= c && c <= '9':
- n |= rune(c - '0')
- case 'a' <= c && c <= 'f':
- n |= rune(c-'a') + 10
- case 'A' <= c && c <= 'F':
- n |= rune(c-'A') + 10
- default:
- panic(fmt.Sprintf("Bad hex digit in %q", s))
- }
- }
- return n
- }
- // skipCSSSpace returns a suffix of c, skipping over a single space.
- func skipCSSSpace(c []byte) []byte {
- if len(c) == 0 {
- return c
- }
- // wc ::= #x9 | #xA | #xC | #xD | #x20
- switch c[0] {
- case '\t', '\n', '\f', ' ':
- return c[1:]
- case '\r':
- // This differs from CSS3's wc production because it contains a
- // probable spec error whereby wc contains all the single byte
- // sequences in nl (newline) but not CRLF.
- if len(c) >= 2 && c[1] == '\n' {
- return c[2:]
- }
- return c[1:]
- }
- return c
- }
- // isCSSSpace reports whether b is a CSS space char as defined in wc.
- func isCSSSpace(b byte) bool {
- switch b {
- case '\t', '\n', '\f', '\r', ' ':
- return true
- }
- return false
- }
- // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
- func cssEscaper(args ...interface{}) string {
- s, _ := stringify(args...)
- var b bytes.Buffer
- r, w, written := rune(0), 0, 0
- for i := 0; i < len(s); i += w {
- // See comment in htmlEscaper.
- r, w = utf8.DecodeRuneInString(s[i:])
- var repl string
- switch {
- case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
- repl = cssReplacementTable[r]
- default:
- continue
- }
- b.WriteString(s[written:i])
- b.WriteString(repl)
- written = i + w
- if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
- b.WriteByte(' ')
- }
- }
- if written == 0 {
- return s
- }
- b.WriteString(s[written:])
- return b.String()
- }
- var cssReplacementTable = []string{
- 0: `\0`,
- '\t': `\9`,
- '\n': `\a`,
- '\f': `\c`,
- '\r': `\d`,
- // Encode HTML specials as hex so the output can be embedded
- // in HTML attributes without further encoding.
- '"': `\22`,
- '&': `\26`,
- '\'': `\27`,
- '(': `\28`,
- ')': `\29`,
- '+': `\2b`,
- '/': `\2f`,
- ':': `\3a`,
- ';': `\3b`,
- '<': `\3c`,
- '>': `\3e`,
- '\\': `\\`,
- '{': `\7b`,
- '}': `\7d`,
- }
- var expressionBytes = []byte("expression")
- var mozBindingBytes = []byte("mozbinding")
- // cssValueFilter allows innocuous CSS values in the output including CSS
- // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
- // (inherit, blue), and colors (#888).
- // It filters out unsafe values, such as those that affect token boundaries,
- // and anything that might execute scripts.
- func cssValueFilter(args ...interface{}) string {
- s, t := stringify(args...)
- if t == contentTypeCSS {
- return s
- }
- b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
- // CSS3 error handling is specified as honoring string boundaries per
- // http://www.w3.org/TR/css3-syntax/#error-handling :
- // Malformed declarations. User agents must handle unexpected
- // tokens encountered while parsing a declaration by reading until
- // the end of the declaration, while observing the rules for
- // matching pairs of (), [], {}, "", and '', and correctly handling
- // escapes. For example, a malformed declaration may be missing a
- // property, colon (:) or value.
- // So we need to make sure that values do not have mismatched bracket
- // or quote characters to prevent the browser from restarting parsing
- // inside a string that might embed JavaScript source.
- for i, c := range b {
- switch c {
- case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
- return filterFailsafe
- case '-':
- // Disallow <!-- or -->.
- // -- should not appear in valid identifiers.
- if i != 0 && b[i-1] == '-' {
- return filterFailsafe
- }
- default:
- if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
- id = append(id, c)
- }
- }
- }
- id = bytes.ToLower(id)
- if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
- return filterFailsafe
- }
- return string(b)
- }
|