fold.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "unicode/utf8"
  8. )
  9. const (
  10. caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
  11. kelvin = '\u212a'
  12. smallLongEss = '\u017f'
  13. )
  14. // foldFunc returns one of four different case folding equivalence
  15. // functions, from most general (and slow) to fastest:
  16. //
  17. // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
  18. // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
  19. // 3) asciiEqualFold, no special, but includes non-letters (including _)
  20. // 4) simpleLetterEqualFold, no specials, no non-letters.
  21. //
  22. // The letters S and K are special because they map to 3 runes, not just 2:
  23. // * S maps to s and to U+017F 'ſ' Latin small letter long s
  24. // * k maps to K and to U+212A 'K' Kelvin sign
  25. // See http://play.golang.org/p/tTxjOc0OGo
  26. //
  27. // The returned function is specialized for matching against s and
  28. // should only be given s. It's not curried for performance reasons.
  29. func foldFunc(s []byte) func(s, t []byte) bool {
  30. nonLetter := false
  31. special := false // special letter
  32. for _, b := range s {
  33. if b >= utf8.RuneSelf {
  34. return bytes.EqualFold
  35. }
  36. upper := b & caseMask
  37. if upper < 'A' || upper > 'Z' {
  38. nonLetter = true
  39. } else if upper == 'K' || upper == 'S' {
  40. // See above for why these letters are special.
  41. special = true
  42. }
  43. }
  44. if special {
  45. return equalFoldRight
  46. }
  47. if nonLetter {
  48. return asciiEqualFold
  49. }
  50. return simpleLetterEqualFold
  51. }
  52. // equalFoldRight is a specialization of bytes.EqualFold when s is
  53. // known to be all ASCII (including punctuation), but contains an 's',
  54. // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
  55. // See comments on foldFunc.
  56. func equalFoldRight(s, t []byte) bool {
  57. for _, sb := range s {
  58. if len(t) == 0 {
  59. return false
  60. }
  61. tb := t[0]
  62. if tb < utf8.RuneSelf {
  63. if sb != tb {
  64. sbUpper := sb & caseMask
  65. if 'A' <= sbUpper && sbUpper <= 'Z' {
  66. if sbUpper != tb&caseMask {
  67. return false
  68. }
  69. } else {
  70. return false
  71. }
  72. }
  73. t = t[1:]
  74. continue
  75. }
  76. // sb is ASCII and t is not. t must be either kelvin
  77. // sign or long s; sb must be s, S, k, or K.
  78. tr, size := utf8.DecodeRune(t)
  79. switch sb {
  80. case 's', 'S':
  81. if tr != smallLongEss {
  82. return false
  83. }
  84. case 'k', 'K':
  85. if tr != kelvin {
  86. return false
  87. }
  88. default:
  89. return false
  90. }
  91. t = t[size:]
  92. }
  93. if len(t) > 0 {
  94. return false
  95. }
  96. return true
  97. }
  98. // asciiEqualFold is a specialization of bytes.EqualFold for use when
  99. // s is all ASCII (but may contain non-letters) and contains no
  100. // special-folding letters.
  101. // See comments on foldFunc.
  102. func asciiEqualFold(s, t []byte) bool {
  103. if len(s) != len(t) {
  104. return false
  105. }
  106. for i, sb := range s {
  107. tb := t[i]
  108. if sb == tb {
  109. continue
  110. }
  111. if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
  112. if sb&caseMask != tb&caseMask {
  113. return false
  114. }
  115. } else {
  116. return false
  117. }
  118. }
  119. return true
  120. }
  121. // simpleLetterEqualFold is a specialization of bytes.EqualFold for
  122. // use when s is all ASCII letters (no underscores, etc) and also
  123. // doesn't contain 'k', 'K', 's', or 'S'.
  124. // See comments on foldFunc.
  125. func simpleLetterEqualFold(s, t []byte) bool {
  126. if len(s) != len(t) {
  127. return false
  128. }
  129. for i, b := range s {
  130. if b&caseMask != t[i]&caseMask {
  131. return false
  132. }
  133. }
  134. return true
  135. }