url.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package template
  5. import (
  6. "bytes"
  7. "fmt"
  8. "strings"
  9. )
  10. // urlFilter returns its input unless it contains an unsafe protocol in which
  11. // case it defangs the entire URL.
  12. func urlFilter(args ...interface{}) string {
  13. s, t := stringify(args...)
  14. if t == contentTypeURL {
  15. return s
  16. }
  17. if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
  18. protocol := strings.ToLower(s[:i])
  19. if protocol != "http" && protocol != "https" && protocol != "mailto" {
  20. return "#" + filterFailsafe
  21. }
  22. }
  23. return s
  24. }
  25. // urlEscaper produces an output that can be embedded in a URL query.
  26. // The output can be embedded in an HTML attribute without further escaping.
  27. func urlEscaper(args ...interface{}) string {
  28. return urlProcessor(false, args...)
  29. }
  30. // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
  31. // string or parenthesis delimited url(...).
  32. // The normalizer does not encode all HTML specials. Specifically, it does not
  33. // encode '&' so correct embedding in an HTML attribute requires escaping of
  34. // '&' to '&'.
  35. func urlNormalizer(args ...interface{}) string {
  36. return urlProcessor(true, args...)
  37. }
  38. // urlProcessor normalizes (when norm is true) or escapes its input to produce
  39. // a valid hierarchical or opaque URL part.
  40. func urlProcessor(norm bool, args ...interface{}) string {
  41. s, t := stringify(args...)
  42. if t == contentTypeURL {
  43. norm = true
  44. }
  45. var b bytes.Buffer
  46. written := 0
  47. // The byte loop below assumes that all URLs use UTF-8 as the
  48. // content-encoding. This is similar to the URI to IRI encoding scheme
  49. // defined in section 3.1 of RFC 3987, and behaves the same as the
  50. // EcmaScript builtin encodeURIComponent.
  51. // It should not cause any misencoding of URLs in pages with
  52. // Content-type: text/html;charset=UTF-8.
  53. for i, n := 0, len(s); i < n; i++ {
  54. c := s[i]
  55. switch c {
  56. // Single quote and parens are sub-delims in RFC 3986, but we
  57. // escape them so the output can be embedded in single
  58. // quoted attributes and unquoted CSS url(...) constructs.
  59. // Single quotes are reserved in URLs, but are only used in
  60. // the obsolete "mark" rule in an appendix in RFC 3986
  61. // so can be safely encoded.
  62. case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
  63. if norm {
  64. continue
  65. }
  66. // Unreserved according to RFC 3986 sec 2.3
  67. // "For consistency, percent-encoded octets in the ranges of
  68. // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
  69. // period (%2E), underscore (%5F), or tilde (%7E) should not be
  70. // created by URI producers
  71. case '-', '.', '_', '~':
  72. continue
  73. case '%':
  74. // When normalizing do not re-encode valid escapes.
  75. if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
  76. continue
  77. }
  78. default:
  79. // Unreserved according to RFC 3986 sec 2.3
  80. if 'a' <= c && c <= 'z' {
  81. continue
  82. }
  83. if 'A' <= c && c <= 'Z' {
  84. continue
  85. }
  86. if '0' <= c && c <= '9' {
  87. continue
  88. }
  89. }
  90. b.WriteString(s[written:i])
  91. fmt.Fprintf(&b, "%%%02x", c)
  92. written = i + 1
  93. }
  94. if written == 0 {
  95. return s
  96. }
  97. b.WriteString(s[written:])
  98. return b.String()
  99. }