search.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package search
  15. import (
  16. "fmt"
  17. "reflect"
  18. "sort"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/size"
  21. )
  22. var reflectStaticSizeDocumentMatch int
  23. var reflectStaticSizeSearchContext int
  24. var reflectStaticSizeLocation int
  25. func init() {
  26. var dm DocumentMatch
  27. reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
  28. var sc SearchContext
  29. reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
  30. var l Location
  31. reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
  32. }
  33. type ArrayPositions []uint64
  34. func (ap ArrayPositions) Equals(other ArrayPositions) bool {
  35. if len(ap) != len(other) {
  36. return false
  37. }
  38. for i := range ap {
  39. if ap[i] != other[i] {
  40. return false
  41. }
  42. }
  43. return true
  44. }
  45. func (ap ArrayPositions) Compare(other ArrayPositions) int {
  46. for i, p := range ap {
  47. if i >= len(other) {
  48. return 1
  49. }
  50. if p < other[i] {
  51. return -1
  52. }
  53. if p > other[i] {
  54. return 1
  55. }
  56. }
  57. if len(ap) < len(other) {
  58. return -1
  59. }
  60. return 0
  61. }
  62. type Location struct {
  63. // Pos is the position of the term within the field, starting at 1
  64. Pos uint64 `json:"pos"`
  65. // Start and End are the byte offsets of the term in the field
  66. Start uint64 `json:"start"`
  67. End uint64 `json:"end"`
  68. // ArrayPositions contains the positions of the term within any elements.
  69. ArrayPositions ArrayPositions `json:"array_positions"`
  70. }
  71. func (l *Location) Size() int {
  72. return reflectStaticSizeLocation + size.SizeOfPtr +
  73. len(l.ArrayPositions)*size.SizeOfUint64
  74. }
  75. type Locations []*Location
  76. func (p Locations) Len() int { return len(p) }
  77. func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  78. func (p Locations) Less(i, j int) bool {
  79. c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
  80. if c < 0 {
  81. return true
  82. }
  83. if c > 0 {
  84. return false
  85. }
  86. return p[i].Pos < p[j].Pos
  87. }
  88. func (p Locations) Dedupe() Locations { // destructive!
  89. if len(p) <= 1 {
  90. return p
  91. }
  92. sort.Sort(p)
  93. slow := 0
  94. for _, pfast := range p {
  95. pslow := p[slow]
  96. if pslow.Pos == pfast.Pos &&
  97. pslow.Start == pfast.Start &&
  98. pslow.End == pfast.End &&
  99. pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
  100. continue // duplicate, so only move fast ahead
  101. }
  102. slow++
  103. p[slow] = pfast
  104. }
  105. return p[:slow+1]
  106. }
  107. type TermLocationMap map[string]Locations
  108. func (t TermLocationMap) AddLocation(term string, location *Location) {
  109. t[term] = append(t[term], location)
  110. }
  111. type FieldTermLocationMap map[string]TermLocationMap
  112. type FieldTermLocation struct {
  113. Field string
  114. Term string
  115. Location Location
  116. }
  117. type FieldFragmentMap map[string][]string
  118. type DocumentMatch struct {
  119. Index string `json:"index,omitempty"`
  120. ID string `json:"id"`
  121. IndexInternalID index.IndexInternalID `json:"-"`
  122. Score float64 `json:"score"`
  123. Expl *Explanation `json:"explanation,omitempty"`
  124. Locations FieldTermLocationMap `json:"locations,omitempty"`
  125. Fragments FieldFragmentMap `json:"fragments,omitempty"`
  126. Sort []string `json:"sort,omitempty"`
  127. // Fields contains the values for document fields listed in
  128. // SearchRequest.Fields. Text fields are returned as strings, numeric
  129. // fields as float64s and date fields as time.RFC3339 formatted strings.
  130. Fields map[string]interface{} `json:"fields,omitempty"`
  131. // used to maintain natural index order
  132. HitNumber uint64 `json:"-"`
  133. // used to temporarily hold field term location information during
  134. // search processing in an efficient, recycle-friendly manner, to
  135. // be later incorporated into the Locations map when search
  136. // results are completed
  137. FieldTermLocations []FieldTermLocation `json:"-"`
  138. }
  139. func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
  140. if dm.Fields == nil {
  141. dm.Fields = make(map[string]interface{})
  142. }
  143. existingVal, ok := dm.Fields[name]
  144. if !ok {
  145. dm.Fields[name] = value
  146. return
  147. }
  148. valSlice, ok := existingVal.([]interface{})
  149. if ok {
  150. // already a slice, append to it
  151. valSlice = append(valSlice, value)
  152. } else {
  153. // create a slice
  154. valSlice = []interface{}{existingVal, value}
  155. }
  156. dm.Fields[name] = valSlice
  157. }
  158. // Reset allows an already allocated DocumentMatch to be reused
  159. func (dm *DocumentMatch) Reset() *DocumentMatch {
  160. // remember the []byte used for the IndexInternalID
  161. indexInternalID := dm.IndexInternalID
  162. // remember the []interface{} used for sort
  163. sort := dm.Sort
  164. // remember the FieldTermLocations backing array
  165. ftls := dm.FieldTermLocations
  166. for i := range ftls { // recycle the ArrayPositions of each location
  167. ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
  168. }
  169. // idiom to copy over from empty DocumentMatch (0 allocations)
  170. *dm = DocumentMatch{}
  171. // reuse the []byte already allocated (and reset len to 0)
  172. dm.IndexInternalID = indexInternalID[:0]
  173. // reuse the []interface{} already allocated (and reset len to 0)
  174. dm.Sort = sort[:0]
  175. // reuse the FieldTermLocations already allocated (and reset len to 0)
  176. dm.FieldTermLocations = ftls[:0]
  177. return dm
  178. }
  179. func (dm *DocumentMatch) Size() int {
  180. sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
  181. len(dm.Index) +
  182. len(dm.ID) +
  183. len(dm.IndexInternalID)
  184. if dm.Expl != nil {
  185. sizeInBytes += dm.Expl.Size()
  186. }
  187. for k, v := range dm.Locations {
  188. sizeInBytes += size.SizeOfString + len(k)
  189. for k1, v1 := range v {
  190. sizeInBytes += size.SizeOfString + len(k1) +
  191. size.SizeOfSlice
  192. for _, entry := range v1 {
  193. sizeInBytes += entry.Size()
  194. }
  195. }
  196. }
  197. for k, v := range dm.Fragments {
  198. sizeInBytes += size.SizeOfString + len(k) +
  199. size.SizeOfSlice
  200. for _, entry := range v {
  201. sizeInBytes += size.SizeOfString + len(entry)
  202. }
  203. }
  204. for _, entry := range dm.Sort {
  205. sizeInBytes += size.SizeOfString + len(entry)
  206. }
  207. for k, _ := range dm.Fields {
  208. sizeInBytes += size.SizeOfString + len(k) +
  209. size.SizeOfPtr
  210. }
  211. return sizeInBytes
  212. }
  213. // Complete performs final preparation & transformation of the
  214. // DocumentMatch at the end of search processing, also allowing the
  215. // caller to provide an optional preallocated locations slice
  216. func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
  217. // transform the FieldTermLocations slice into the Locations map
  218. nlocs := len(dm.FieldTermLocations)
  219. if nlocs > 0 {
  220. if cap(prealloc) < nlocs {
  221. prealloc = make([]Location, nlocs)
  222. }
  223. prealloc = prealloc[:nlocs]
  224. var lastField string
  225. var tlm TermLocationMap
  226. var needsDedupe bool
  227. for i, ftl := range dm.FieldTermLocations {
  228. if lastField != ftl.Field {
  229. lastField = ftl.Field
  230. if dm.Locations == nil {
  231. dm.Locations = make(FieldTermLocationMap)
  232. }
  233. tlm = dm.Locations[ftl.Field]
  234. if tlm == nil {
  235. tlm = make(TermLocationMap)
  236. dm.Locations[ftl.Field] = tlm
  237. }
  238. }
  239. loc := &prealloc[i]
  240. *loc = ftl.Location
  241. if len(loc.ArrayPositions) > 0 { // copy
  242. loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
  243. }
  244. locs := tlm[ftl.Term]
  245. // if the loc is before or at the last location, then there
  246. // might be duplicates that need to be deduplicated
  247. if !needsDedupe && len(locs) > 0 {
  248. last := locs[len(locs)-1]
  249. cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
  250. if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
  251. needsDedupe = true
  252. }
  253. }
  254. tlm[ftl.Term] = append(locs, loc)
  255. dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
  256. Location: Location{
  257. ArrayPositions: ftl.Location.ArrayPositions[:0],
  258. },
  259. }
  260. }
  261. if needsDedupe {
  262. for _, tlm := range dm.Locations {
  263. for term, locs := range tlm {
  264. tlm[term] = locs.Dedupe()
  265. }
  266. }
  267. }
  268. }
  269. dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
  270. return prealloc
  271. }
  272. func (dm *DocumentMatch) String() string {
  273. return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
  274. }
  275. type DocumentMatchCollection []*DocumentMatch
  276. func (c DocumentMatchCollection) Len() int { return len(c) }
  277. func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
  278. func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
  279. type Searcher interface {
  280. Next(ctx *SearchContext) (*DocumentMatch, error)
  281. Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
  282. Close() error
  283. Weight() float64
  284. SetQueryNorm(float64)
  285. Count() uint64
  286. Min() int
  287. Size() int
  288. DocumentMatchPoolSize() int
  289. }
  290. type SearcherOptions struct {
  291. Explain bool
  292. IncludeTermVectors bool
  293. Score string
  294. }
  295. // SearchContext represents the context around a single search
  296. type SearchContext struct {
  297. DocumentMatchPool *DocumentMatchPool
  298. Collector Collector
  299. IndexReader index.IndexReader
  300. }
  301. func (sc *SearchContext) Size() int {
  302. sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
  303. reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
  304. if sc.DocumentMatchPool != nil {
  305. for _, entry := range sc.DocumentMatchPool.avail {
  306. if entry != nil {
  307. sizeInBytes += entry.Size()
  308. }
  309. }
  310. }
  311. return sizeInBytes
  312. }