123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- // Copyright (c) 2014 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package search
- import (
- "fmt"
- "reflect"
- "sort"
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/size"
- )
- var reflectStaticSizeDocumentMatch int
- var reflectStaticSizeSearchContext int
- var reflectStaticSizeLocation int
- func init() {
- var dm DocumentMatch
- reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
- var sc SearchContext
- reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
- var l Location
- reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
- }
- type ArrayPositions []uint64
- func (ap ArrayPositions) Equals(other ArrayPositions) bool {
- if len(ap) != len(other) {
- return false
- }
- for i := range ap {
- if ap[i] != other[i] {
- return false
- }
- }
- return true
- }
- func (ap ArrayPositions) Compare(other ArrayPositions) int {
- for i, p := range ap {
- if i >= len(other) {
- return 1
- }
- if p < other[i] {
- return -1
- }
- if p > other[i] {
- return 1
- }
- }
- if len(ap) < len(other) {
- return -1
- }
- return 0
- }
- type Location struct {
- // Pos is the position of the term within the field, starting at 1
- Pos uint64 `json:"pos"`
- // Start and End are the byte offsets of the term in the field
- Start uint64 `json:"start"`
- End uint64 `json:"end"`
- // ArrayPositions contains the positions of the term within any elements.
- ArrayPositions ArrayPositions `json:"array_positions"`
- }
- func (l *Location) Size() int {
- return reflectStaticSizeLocation + size.SizeOfPtr +
- len(l.ArrayPositions)*size.SizeOfUint64
- }
- type Locations []*Location
- func (p Locations) Len() int { return len(p) }
- func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
- func (p Locations) Less(i, j int) bool {
- c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
- if c < 0 {
- return true
- }
- if c > 0 {
- return false
- }
- return p[i].Pos < p[j].Pos
- }
- func (p Locations) Dedupe() Locations { // destructive!
- if len(p) <= 1 {
- return p
- }
- sort.Sort(p)
- slow := 0
- for _, pfast := range p {
- pslow := p[slow]
- if pslow.Pos == pfast.Pos &&
- pslow.Start == pfast.Start &&
- pslow.End == pfast.End &&
- pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
- continue // duplicate, so only move fast ahead
- }
- slow++
- p[slow] = pfast
- }
- return p[:slow+1]
- }
- type TermLocationMap map[string]Locations
- func (t TermLocationMap) AddLocation(term string, location *Location) {
- t[term] = append(t[term], location)
- }
- type FieldTermLocationMap map[string]TermLocationMap
- type FieldTermLocation struct {
- Field string
- Term string
- Location Location
- }
- type FieldFragmentMap map[string][]string
- type DocumentMatch struct {
- Index string `json:"index,omitempty"`
- ID string `json:"id"`
- IndexInternalID index.IndexInternalID `json:"-"`
- Score float64 `json:"score"`
- Expl *Explanation `json:"explanation,omitempty"`
- Locations FieldTermLocationMap `json:"locations,omitempty"`
- Fragments FieldFragmentMap `json:"fragments,omitempty"`
- Sort []string `json:"sort,omitempty"`
- // Fields contains the values for document fields listed in
- // SearchRequest.Fields. Text fields are returned as strings, numeric
- // fields as float64s and date fields as time.RFC3339 formatted strings.
- Fields map[string]interface{} `json:"fields,omitempty"`
- // used to maintain natural index order
- HitNumber uint64 `json:"-"`
- // used to temporarily hold field term location information during
- // search processing in an efficient, recycle-friendly manner, to
- // be later incorporated into the Locations map when search
- // results are completed
- FieldTermLocations []FieldTermLocation `json:"-"`
- }
- func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
- if dm.Fields == nil {
- dm.Fields = make(map[string]interface{})
- }
- existingVal, ok := dm.Fields[name]
- if !ok {
- dm.Fields[name] = value
- return
- }
- valSlice, ok := existingVal.([]interface{})
- if ok {
- // already a slice, append to it
- valSlice = append(valSlice, value)
- } else {
- // create a slice
- valSlice = []interface{}{existingVal, value}
- }
- dm.Fields[name] = valSlice
- }
- // Reset allows an already allocated DocumentMatch to be reused
- func (dm *DocumentMatch) Reset() *DocumentMatch {
- // remember the []byte used for the IndexInternalID
- indexInternalID := dm.IndexInternalID
- // remember the []interface{} used for sort
- sort := dm.Sort
- // remember the FieldTermLocations backing array
- ftls := dm.FieldTermLocations
- for i := range ftls { // recycle the ArrayPositions of each location
- ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
- }
- // idiom to copy over from empty DocumentMatch (0 allocations)
- *dm = DocumentMatch{}
- // reuse the []byte already allocated (and reset len to 0)
- dm.IndexInternalID = indexInternalID[:0]
- // reuse the []interface{} already allocated (and reset len to 0)
- dm.Sort = sort[:0]
- // reuse the FieldTermLocations already allocated (and reset len to 0)
- dm.FieldTermLocations = ftls[:0]
- return dm
- }
- func (dm *DocumentMatch) Size() int {
- sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
- len(dm.Index) +
- len(dm.ID) +
- len(dm.IndexInternalID)
- if dm.Expl != nil {
- sizeInBytes += dm.Expl.Size()
- }
- for k, v := range dm.Locations {
- sizeInBytes += size.SizeOfString + len(k)
- for k1, v1 := range v {
- sizeInBytes += size.SizeOfString + len(k1) +
- size.SizeOfSlice
- for _, entry := range v1 {
- sizeInBytes += entry.Size()
- }
- }
- }
- for k, v := range dm.Fragments {
- sizeInBytes += size.SizeOfString + len(k) +
- size.SizeOfSlice
- for _, entry := range v {
- sizeInBytes += size.SizeOfString + len(entry)
- }
- }
- for _, entry := range dm.Sort {
- sizeInBytes += size.SizeOfString + len(entry)
- }
- for k, _ := range dm.Fields {
- sizeInBytes += size.SizeOfString + len(k) +
- size.SizeOfPtr
- }
- return sizeInBytes
- }
- // Complete performs final preparation & transformation of the
- // DocumentMatch at the end of search processing, also allowing the
- // caller to provide an optional preallocated locations slice
- func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
- // transform the FieldTermLocations slice into the Locations map
- nlocs := len(dm.FieldTermLocations)
- if nlocs > 0 {
- if cap(prealloc) < nlocs {
- prealloc = make([]Location, nlocs)
- }
- prealloc = prealloc[:nlocs]
- var lastField string
- var tlm TermLocationMap
- var needsDedupe bool
- for i, ftl := range dm.FieldTermLocations {
- if lastField != ftl.Field {
- lastField = ftl.Field
- if dm.Locations == nil {
- dm.Locations = make(FieldTermLocationMap)
- }
- tlm = dm.Locations[ftl.Field]
- if tlm == nil {
- tlm = make(TermLocationMap)
- dm.Locations[ftl.Field] = tlm
- }
- }
- loc := &prealloc[i]
- *loc = ftl.Location
- if len(loc.ArrayPositions) > 0 { // copy
- loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
- }
- locs := tlm[ftl.Term]
- // if the loc is before or at the last location, then there
- // might be duplicates that need to be deduplicated
- if !needsDedupe && len(locs) > 0 {
- last := locs[len(locs)-1]
- cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
- if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
- needsDedupe = true
- }
- }
- tlm[ftl.Term] = append(locs, loc)
- dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
- Location: Location{
- ArrayPositions: ftl.Location.ArrayPositions[:0],
- },
- }
- }
- if needsDedupe {
- for _, tlm := range dm.Locations {
- for term, locs := range tlm {
- tlm[term] = locs.Dedupe()
- }
- }
- }
- }
- dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
- return prealloc
- }
- func (dm *DocumentMatch) String() string {
- return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
- }
- type DocumentMatchCollection []*DocumentMatch
- func (c DocumentMatchCollection) Len() int { return len(c) }
- func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
- func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
- type Searcher interface {
- Next(ctx *SearchContext) (*DocumentMatch, error)
- Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
- Close() error
- Weight() float64
- SetQueryNorm(float64)
- Count() uint64
- Min() int
- Size() int
- DocumentMatchPoolSize() int
- }
- type SearcherOptions struct {
- Explain bool
- IncludeTermVectors bool
- Score string
- }
- // SearchContext represents the context around a single search
- type SearchContext struct {
- DocumentMatchPool *DocumentMatchPool
- Collector Collector
- IndexReader index.IndexReader
- }
- func (sc *SearchContext) Size() int {
- sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
- reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
- if sc.DocumentMatchPool != nil {
- for _, entry := range sc.DocumentMatchPool.avail {
- if entry != nil {
- sizeInBytes += entry.Size()
- }
- }
- }
- return sizeInBytes
- }
|