123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package template
- import (
- "bytes"
- "strings"
- )
- // transitionFunc is the array of context transition functions for text nodes.
- // A transition function takes a context and template text input, and returns
- // the updated context and the number of bytes consumed from the front of the
- // input.
- var transitionFunc = [...]func(context, []byte) (context, int){
- stateText: tText,
- stateTag: tTag,
- stateAttrName: tAttrName,
- stateAfterName: tAfterName,
- stateBeforeValue: tBeforeValue,
- stateHTMLCmt: tHTMLCmt,
- stateRCDATA: tSpecialTagEnd,
- stateAttr: tAttr,
- stateURL: tURL,
- stateJS: tJS,
- stateJSDqStr: tJSDelimited,
- stateJSSqStr: tJSDelimited,
- stateJSRegexp: tJSDelimited,
- stateJSBlockCmt: tBlockCmt,
- stateJSLineCmt: tLineCmt,
- stateCSS: tCSS,
- stateCSSDqStr: tCSSStr,
- stateCSSSqStr: tCSSStr,
- stateCSSDqURL: tCSSStr,
- stateCSSSqURL: tCSSStr,
- stateCSSURL: tCSSStr,
- stateCSSBlockCmt: tBlockCmt,
- stateCSSLineCmt: tLineCmt,
- stateError: tError,
- }
- var commentStart = []byte("<!--")
- var commentEnd = []byte("-->")
- // tText is the context transition function for the text state.
- func tText(c context, s []byte) (context, int) {
- k := 0
- for {
- i := k + bytes.IndexByte(s[k:], '<')
- if i < k || i+1 == len(s) {
- return c, len(s)
- } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
- return context{state: stateHTMLCmt}, i + 4
- }
- i++
- end := false
- if s[i] == '/' {
- if i+1 == len(s) {
- return c, len(s)
- }
- end, i = true, i+1
- }
- j, e := eatTagName(s, i)
- if j != i {
- if end {
- e = elementNone
- }
- // We've found an HTML tag.
- return context{state: stateTag, element: e}, j
- }
- k = j
- }
- }
- var elementContentType = [...]state{
- elementNone: stateText,
- elementScript: stateJS,
- elementStyle: stateCSS,
- elementTextarea: stateRCDATA,
- elementTitle: stateRCDATA,
- }
- // tTag is the context transition function for the tag state.
- func tTag(c context, s []byte) (context, int) {
- // Find the attribute name.
- i := eatWhiteSpace(s, 0)
- if i == len(s) {
- return c, len(s)
- }
- if s[i] == '>' {
- return context{
- state: elementContentType[c.element],
- element: c.element,
- }, i + 1
- }
- j, err := eatAttrName(s, i)
- if err != nil {
- return context{state: stateError, err: err}, len(s)
- }
- state, attr := stateTag, attrNone
- if i == j {
- return context{
- state: stateError,
- err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
- }, len(s)
- }
- attrName := strings.ToLower(string(s[i:j]))
- if c.element == elementScript && attrName == "type" {
- attr = attrScriptType
- } else {
- switch attrType(attrName) {
- case contentTypeURL:
- attr = attrURL
- case contentTypeCSS:
- attr = attrStyle
- case contentTypeJS:
- attr = attrScript
- }
- }
- if j == len(s) {
- state = stateAttrName
- } else {
- state = stateAfterName
- }
- return context{state: state, element: c.element, attr: attr}, j
- }
- // tAttrName is the context transition function for stateAttrName.
- func tAttrName(c context, s []byte) (context, int) {
- i, err := eatAttrName(s, 0)
- if err != nil {
- return context{state: stateError, err: err}, len(s)
- } else if i != len(s) {
- c.state = stateAfterName
- }
- return c, i
- }
- // tAfterName is the context transition function for stateAfterName.
- func tAfterName(c context, s []byte) (context, int) {
- // Look for the start of the value.
- i := eatWhiteSpace(s, 0)
- if i == len(s) {
- return c, len(s)
- } else if s[i] != '=' {
- // Occurs due to tag ending '>', and valueless attribute.
- c.state = stateTag
- return c, i
- }
- c.state = stateBeforeValue
- // Consume the "=".
- return c, i + 1
- }
- var attrStartStates = [...]state{
- attrNone: stateAttr,
- attrScript: stateJS,
- attrScriptType: stateAttr,
- attrStyle: stateCSS,
- attrURL: stateURL,
- }
- // tBeforeValue is the context transition function for stateBeforeValue.
- func tBeforeValue(c context, s []byte) (context, int) {
- i := eatWhiteSpace(s, 0)
- if i == len(s) {
- return c, len(s)
- }
- // Find the attribute delimiter.
- delim := delimSpaceOrTagEnd
- switch s[i] {
- case '\'':
- delim, i = delimSingleQuote, i+1
- case '"':
- delim, i = delimDoubleQuote, i+1
- }
- c.state, c.delim = attrStartStates[c.attr], delim
- return c, i
- }
- // tHTMLCmt is the context transition function for stateHTMLCmt.
- func tHTMLCmt(c context, s []byte) (context, int) {
- if i := bytes.Index(s, commentEnd); i != -1 {
- return context{}, i + 3
- }
- return c, len(s)
- }
- // specialTagEndMarkers maps element types to the character sequence that
- // case-insensitively signals the end of the special tag body.
- var specialTagEndMarkers = [...][]byte{
- elementScript: []byte("script"),
- elementStyle: []byte("style"),
- elementTextarea: []byte("textarea"),
- elementTitle: []byte("title"),
- }
- var (
- specialTagEndPrefix = []byte("</")
- tagEndSeparators = []byte("> \t\n\f/")
- )
- // tSpecialTagEnd is the context transition function for raw text and RCDATA
- // element states.
- func tSpecialTagEnd(c context, s []byte) (context, int) {
- if c.element != elementNone {
- if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
- return context{}, i
- }
- }
- return c, len(s)
- }
- // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
- func indexTagEnd(s []byte, tag []byte) int {
- res := 0
- plen := len(specialTagEndPrefix)
- for len(s) > 0 {
- // Try to find the tag end prefix first
- i := bytes.Index(s, specialTagEndPrefix)
- if i == -1 {
- return i
- }
- s = s[i+plen:]
- // Try to match the actual tag if there is still space for it
- if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
- s = s[len(tag):]
- // Check the tag is followed by a proper separator
- if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
- return res + i
- }
- res += len(tag)
- }
- res += i + plen
- }
- return -1
- }
- // tAttr is the context transition function for the attribute state.
- func tAttr(c context, s []byte) (context, int) {
- return c, len(s)
- }
- // tURL is the context transition function for the URL state.
- func tURL(c context, s []byte) (context, int) {
- if bytes.ContainsAny(s, "#?") {
- c.urlPart = urlPartQueryOrFrag
- } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
- // HTML5 uses "Valid URL potentially surrounded by spaces" for
- // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
- c.urlPart = urlPartPreQuery
- }
- return c, len(s)
- }
- // tJS is the context transition function for the JS state.
- func tJS(c context, s []byte) (context, int) {
- i := bytes.IndexAny(s, `"'/`)
- if i == -1 {
- // Entire input is non string, comment, regexp tokens.
- c.jsCtx = nextJSCtx(s, c.jsCtx)
- return c, len(s)
- }
- c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
- switch s[i] {
- case '"':
- c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
- case '\'':
- c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
- case '/':
- switch {
- case i+1 < len(s) && s[i+1] == '/':
- c.state, i = stateJSLineCmt, i+1
- case i+1 < len(s) && s[i+1] == '*':
- c.state, i = stateJSBlockCmt, i+1
- case c.jsCtx == jsCtxRegexp:
- c.state = stateJSRegexp
- case c.jsCtx == jsCtxDivOp:
- c.jsCtx = jsCtxRegexp
- default:
- return context{
- state: stateError,
- err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
- }, len(s)
- }
- default:
- panic("unreachable")
- }
- return c, i + 1
- }
- // tJSDelimited is the context transition function for the JS string and regexp
- // states.
- func tJSDelimited(c context, s []byte) (context, int) {
- specials := `\"`
- switch c.state {
- case stateJSSqStr:
- specials = `\'`
- case stateJSRegexp:
- specials = `\/[]`
- }
- k, inCharset := 0, false
- for {
- i := k + bytes.IndexAny(s[k:], specials)
- if i < k {
- break
- }
- switch s[i] {
- case '\\':
- i++
- if i == len(s) {
- return context{
- state: stateError,
- err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
- }, len(s)
- }
- case '[':
- inCharset = true
- case ']':
- inCharset = false
- default:
- // end delimiter
- if !inCharset {
- c.state, c.jsCtx = stateJS, jsCtxDivOp
- return c, i + 1
- }
- }
- k = i + 1
- }
- if inCharset {
- // This can be fixed by making context richer if interpolation
- // into charsets is desired.
- return context{
- state: stateError,
- err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
- }, len(s)
- }
- return c, len(s)
- }
- var blockCommentEnd = []byte("*/")
- // tBlockCmt is the context transition function for /*comment*/ states.
- func tBlockCmt(c context, s []byte) (context, int) {
- i := bytes.Index(s, blockCommentEnd)
- if i == -1 {
- return c, len(s)
- }
- switch c.state {
- case stateJSBlockCmt:
- c.state = stateJS
- case stateCSSBlockCmt:
- c.state = stateCSS
- default:
- panic(c.state.String())
- }
- return c, i + 2
- }
- // tLineCmt is the context transition function for //comment states.
- func tLineCmt(c context, s []byte) (context, int) {
- var lineTerminators string
- var endState state
- switch c.state {
- case stateJSLineCmt:
- lineTerminators, endState = "\n\r\u2028\u2029", stateJS
- case stateCSSLineCmt:
- lineTerminators, endState = "\n\f\r", stateCSS
- // Line comments are not part of any published CSS standard but
- // are supported by the 4 major browsers.
- // This defines line comments as
- // LINECOMMENT ::= "//" [^\n\f\d]*
- // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
- // newlines:
- // nl ::= #xA | #xD #xA | #xD | #xC
- default:
- panic(c.state.String())
- }
- i := bytes.IndexAny(s, lineTerminators)
- if i == -1 {
- return c, len(s)
- }
- c.state = endState
- // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
- // "However, the LineTerminator at the end of the line is not
- // considered to be part of the single-line comment; it is
- // recognized separately by the lexical grammar and becomes part
- // of the stream of input elements for the syntactic grammar."
- return c, i
- }
- // tCSS is the context transition function for the CSS state.
- func tCSS(c context, s []byte) (context, int) {
- // CSS quoted strings are almost never used except for:
- // (1) URLs as in background: "/foo.png"
- // (2) Multiword font-names as in font-family: "Times New Roman"
- // (3) List separators in content values as in inline-lists:
- // <style>
- // ul.inlineList { list-style: none; padding:0 }
- // ul.inlineList > li { display: inline }
- // ul.inlineList > li:before { content: ", " }
- // ul.inlineList > li:first-child:before { content: "" }
- // </style>
- // <ul class=inlineList><li>One<li>Two<li>Three</ul>
- // (4) Attribute value selectors as in a[href="http://example.com/"]
- //
- // We conservatively treat all strings as URLs, but make some
- // allowances to avoid confusion.
- //
- // In (1), our conservative assumption is justified.
- // In (2), valid font names do not contain ':', '?', or '#', so our
- // conservative assumption is fine since we will never transition past
- // urlPartPreQuery.
- // In (3), our protocol heuristic should not be tripped, and there
- // should not be non-space content after a '?' or '#', so as long as
- // we only %-encode RFC 3986 reserved characters we are ok.
- // In (4), we should URL escape for URL attributes, and for others we
- // have the attribute name available if our conservative assumption
- // proves problematic for real code.
- k := 0
- for {
- i := k + bytes.IndexAny(s[k:], `("'/`)
- if i < k {
- return c, len(s)
- }
- switch s[i] {
- case '(':
- // Look for url to the left.
- p := bytes.TrimRight(s[:i], "\t\n\f\r ")
- if endsWithCSSKeyword(p, "url") {
- j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
- switch {
- case j != len(s) && s[j] == '"':
- c.state, j = stateCSSDqURL, j+1
- case j != len(s) && s[j] == '\'':
- c.state, j = stateCSSSqURL, j+1
- default:
- c.state = stateCSSURL
- }
- return c, j
- }
- case '/':
- if i+1 < len(s) {
- switch s[i+1] {
- case '/':
- c.state = stateCSSLineCmt
- return c, i + 2
- case '*':
- c.state = stateCSSBlockCmt
- return c, i + 2
- }
- }
- case '"':
- c.state = stateCSSDqStr
- return c, i + 1
- case '\'':
- c.state = stateCSSSqStr
- return c, i + 1
- }
- k = i + 1
- }
- }
- // tCSSStr is the context transition function for the CSS string and URL states.
- func tCSSStr(c context, s []byte) (context, int) {
- var endAndEsc string
- switch c.state {
- case stateCSSDqStr, stateCSSDqURL:
- endAndEsc = `\"`
- case stateCSSSqStr, stateCSSSqURL:
- endAndEsc = `\'`
- case stateCSSURL:
- // Unquoted URLs end with a newline or close parenthesis.
- // The below includes the wc (whitespace character) and nl.
- endAndEsc = "\\\t\n\f\r )"
- default:
- panic(c.state.String())
- }
- k := 0
- for {
- i := k + bytes.IndexAny(s[k:], endAndEsc)
- if i < k {
- c, nread := tURL(c, decodeCSS(s[k:]))
- return c, k + nread
- }
- if s[i] == '\\' {
- i++
- if i == len(s) {
- return context{
- state: stateError,
- err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
- }, len(s)
- }
- } else {
- c.state = stateCSS
- return c, i + 1
- }
- c, _ = tURL(c, decodeCSS(s[:i+1]))
- k = i + 1
- }
- }
- // tError is the context transition function for the error state.
- func tError(c context, s []byte) (context, int) {
- return c, len(s)
- }
- // eatAttrName returns the largest j such that s[i:j] is an attribute name.
- // It returns an error if s[i:] does not look like it begins with an
- // attribute name, such as encountering a quote mark without a preceding
- // equals sign.
- func eatAttrName(s []byte, i int) (int, *Error) {
- for j := i; j < len(s); j++ {
- switch s[j] {
- case ' ', '\t', '\n', '\f', '\r', '=', '>':
- return j, nil
- case '\'', '"', '<':
- // These result in a parse warning in HTML5 and are
- // indicative of serious problems if seen in an attr
- // name in a template.
- return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
- default:
- // No-op.
- }
- }
- return len(s), nil
- }
- var elementNameMap = map[string]element{
- "script": elementScript,
- "style": elementStyle,
- "textarea": elementTextarea,
- "title": elementTitle,
- }
- // asciiAlpha reports whether c is an ASCII letter.
- func asciiAlpha(c byte) bool {
- return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
- }
- // asciiAlphaNum reports whether c is an ASCII letter or digit.
- func asciiAlphaNum(c byte) bool {
- return asciiAlpha(c) || '0' <= c && c <= '9'
- }
- // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
- func eatTagName(s []byte, i int) (int, element) {
- if i == len(s) || !asciiAlpha(s[i]) {
- return i, elementNone
- }
- j := i + 1
- for j < len(s) {
- x := s[j]
- if asciiAlphaNum(x) {
- j++
- continue
- }
- // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
- if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
- j += 2
- continue
- }
- break
- }
- return j, elementNameMap[strings.ToLower(string(s[i:j]))]
- }
- // eatWhiteSpace returns the largest j such that s[i:j] is white space.
- func eatWhiteSpace(s []byte, i int) int {
- for j := i; j < len(s); j++ {
- switch s[j] {
- case ' ', '\t', '\n', '\f', '\r':
- // No-op.
- default:
- return j
- }
- }
- return len(s)
- }
|