123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741 |
- // Copyright (c) 2014 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package search
- import (
- "bytes"
- "encoding/json"
- "fmt"
- "math"
- "sort"
- "strings"
- "github.com/blevesearch/bleve/geo"
- "github.com/blevesearch/bleve/numeric"
- )
- var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
- var LowTerm = string([]byte{0x00})
- type SearchSort interface {
- UpdateVisitor(field string, term []byte)
- Value(a *DocumentMatch) string
- Descending() bool
- RequiresDocID() bool
- RequiresScoring() bool
- RequiresFields() []string
- Reverse()
- Copy() SearchSort
- }
- func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
- descending, ok := input["desc"].(bool)
- by, ok := input["by"].(string)
- if !ok {
- return nil, fmt.Errorf("search sort must specify by")
- }
- switch by {
- case "id":
- return &SortDocID{
- Desc: descending,
- }, nil
- case "score":
- return &SortScore{
- Desc: descending,
- }, nil
- case "geo_distance":
- field, ok := input["field"].(string)
- if !ok {
- return nil, fmt.Errorf("search sort mode geo_distance must specify field")
- }
- lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"])
- if !foundLocation {
- return nil, fmt.Errorf("unable to parse geo_distance location")
- }
- rvd := &SortGeoDistance{
- Field: field,
- Desc: descending,
- Lon: lon,
- Lat: lat,
- unitMult: 1.0,
- }
- if distUnit, ok := input["unit"].(string); ok {
- var err error
- rvd.unitMult, err = geo.ParseDistanceUnit(distUnit)
- if err != nil {
- return nil, err
- }
- rvd.Unit = distUnit
- }
- return rvd, nil
- case "field":
- field, ok := input["field"].(string)
- if !ok {
- return nil, fmt.Errorf("search sort mode field must specify field")
- }
- rv := &SortField{
- Field: field,
- Desc: descending,
- }
- typ, ok := input["type"].(string)
- if ok {
- switch typ {
- case "auto":
- rv.Type = SortFieldAuto
- case "string":
- rv.Type = SortFieldAsString
- case "number":
- rv.Type = SortFieldAsNumber
- case "date":
- rv.Type = SortFieldAsDate
- default:
- return nil, fmt.Errorf("unknown sort field type: %s", typ)
- }
- }
- mode, ok := input["mode"].(string)
- if ok {
- switch mode {
- case "default":
- rv.Mode = SortFieldDefault
- case "min":
- rv.Mode = SortFieldMin
- case "max":
- rv.Mode = SortFieldMax
- default:
- return nil, fmt.Errorf("unknown sort field mode: %s", mode)
- }
- }
- missing, ok := input["missing"].(string)
- if ok {
- switch missing {
- case "first":
- rv.Missing = SortFieldMissingFirst
- case "last":
- rv.Missing = SortFieldMissingLast
- default:
- return nil, fmt.Errorf("unknown sort field missing: %s", missing)
- }
- }
- return rv, nil
- }
- return nil, fmt.Errorf("unknown search sort by: %s", by)
- }
- func ParseSearchSortString(input string) SearchSort {
- descending := false
- if strings.HasPrefix(input, "-") {
- descending = true
- input = input[1:]
- } else if strings.HasPrefix(input, "+") {
- input = input[1:]
- }
- if input == "_id" {
- return &SortDocID{
- Desc: descending,
- }
- } else if input == "_score" {
- return &SortScore{
- Desc: descending,
- }
- }
- return &SortField{
- Field: input,
- Desc: descending,
- }
- }
- func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
- // first try to parse it as string
- var sortString string
- err := json.Unmarshal(input, &sortString)
- if err != nil {
- var sortObj map[string]interface{}
- err = json.Unmarshal(input, &sortObj)
- if err != nil {
- return nil, err
- }
- return ParseSearchSortObj(sortObj)
- }
- return ParseSearchSortString(sortString), nil
- }
- func ParseSortOrderStrings(in []string) SortOrder {
- rv := make(SortOrder, 0, len(in))
- for _, i := range in {
- ss := ParseSearchSortString(i)
- rv = append(rv, ss)
- }
- return rv
- }
- func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
- rv := make(SortOrder, 0, len(in))
- for _, i := range in {
- ss, err := ParseSearchSortJSON(i)
- if err != nil {
- return nil, err
- }
- rv = append(rv, ss)
- }
- return rv, nil
- }
- type SortOrder []SearchSort
- func (so SortOrder) Value(doc *DocumentMatch) {
- for _, soi := range so {
- doc.Sort = append(doc.Sort, soi.Value(doc))
- }
- }
- func (so SortOrder) UpdateVisitor(field string, term []byte) {
- for _, soi := range so {
- soi.UpdateVisitor(field, term)
- }
- }
- func (so SortOrder) Copy() SortOrder {
- rv := make(SortOrder, len(so))
- for i, soi := range so {
- rv[i] = soi.Copy()
- }
- return rv
- }
- // Compare will compare two document matches using the specified sort order
- // if both are numbers, we avoid converting back to term
- func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
- // compare the documents on all search sorts until a differences is found
- for x := range so {
- c := 0
- if cachedScoring[x] {
- if i.Score < j.Score {
- c = -1
- } else if i.Score > j.Score {
- c = 1
- }
- } else {
- iVal := i.Sort[x]
- jVal := j.Sort[x]
- c = strings.Compare(iVal, jVal)
- }
- if c == 0 {
- continue
- }
- if cachedDesc[x] {
- c = -c
- }
- return c
- }
- // if they are the same at this point, impose order based on index natural sort order
- if i.HitNumber == j.HitNumber {
- return 0
- } else if i.HitNumber > j.HitNumber {
- return 1
- }
- return -1
- }
- func (so SortOrder) RequiresScore() bool {
- for _, soi := range so {
- if soi.RequiresScoring() {
- return true
- }
- }
- return false
- }
- func (so SortOrder) RequiresDocID() bool {
- for _, soi := range so {
- if soi.RequiresDocID() {
- return true
- }
- }
- return false
- }
- func (so SortOrder) RequiredFields() []string {
- var rv []string
- for _, soi := range so {
- rv = append(rv, soi.RequiresFields()...)
- }
- return rv
- }
- func (so SortOrder) CacheIsScore() []bool {
- rv := make([]bool, 0, len(so))
- for _, soi := range so {
- rv = append(rv, soi.RequiresScoring())
- }
- return rv
- }
- func (so SortOrder) CacheDescending() []bool {
- rv := make([]bool, 0, len(so))
- for _, soi := range so {
- rv = append(rv, soi.Descending())
- }
- return rv
- }
- func (so SortOrder) Reverse() {
- for _, soi := range so {
- soi.Reverse()
- }
- }
- // SortFieldType lets you control some internal sort behavior
- // normally leaving this to the zero-value of SortFieldAuto is fine
- type SortFieldType int
- const (
- // SortFieldAuto applies heuristics attempt to automatically sort correctly
- SortFieldAuto SortFieldType = iota
- // SortFieldAsString forces sort as string (no prefix coded terms removed)
- SortFieldAsString
- // SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
- SortFieldAsNumber
- // SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
- SortFieldAsDate
- )
- // SortFieldMode describes the behavior if the field has multiple values
- type SortFieldMode int
- const (
- // SortFieldDefault uses the first (or only) value, this is the default zero-value
- SortFieldDefault SortFieldMode = iota // FIXME name is confusing
- // SortFieldMin uses the minimum value
- SortFieldMin
- // SortFieldMax uses the maximum value
- SortFieldMax
- )
- // SortFieldMissing controls where documents missing a field value should be sorted
- type SortFieldMissing int
- const (
- // SortFieldMissingLast sorts documents missing a field at the end
- SortFieldMissingLast SortFieldMissing = iota
- // SortFieldMissingFirst sorts documents missing a field at the beginning
- SortFieldMissingFirst
- )
- // SortField will sort results by the value of a stored field
- // Field is the name of the field
- // Descending reverse the sort order (default false)
- // Type allows forcing of string/number/date behavior (default auto)
- // Mode controls behavior for multi-values fields (default first)
- // Missing controls behavior of missing values (default last)
- type SortField struct {
- Field string
- Desc bool
- Type SortFieldType
- Mode SortFieldMode
- Missing SortFieldMissing
- values [][]byte
- tmp [][]byte
- }
- // UpdateVisitor notifies this sort field that in this document
- // this field has the specified term
- func (s *SortField) UpdateVisitor(field string, term []byte) {
- if field == s.Field {
- s.values = append(s.values, term)
- }
- }
- // Value returns the sort value of the DocumentMatch
- // it also resets the state of this SortField for
- // processing the next document
- func (s *SortField) Value(i *DocumentMatch) string {
- iTerms := s.filterTermsByType(s.values)
- iTerm := s.filterTermsByMode(iTerms)
- s.values = s.values[:0]
- return iTerm
- }
- // Descending determines the order of the sort
- func (s *SortField) Descending() bool {
- return s.Desc
- }
- func (s *SortField) filterTermsByMode(terms [][]byte) string {
- if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
- return string(terms[0])
- } else if len(terms) > 1 {
- switch s.Mode {
- case SortFieldMin:
- sort.Sort(BytesSlice(terms))
- return string(terms[0])
- case SortFieldMax:
- sort.Sort(BytesSlice(terms))
- return string(terms[len(terms)-1])
- }
- }
- // handle missing terms
- if s.Missing == SortFieldMissingLast {
- if s.Desc {
- return LowTerm
- }
- return HighTerm
- }
- if s.Desc {
- return HighTerm
- }
- return LowTerm
- }
- // filterTermsByType attempts to make one pass on the terms
- // if we are in auto-mode AND all the terms look like prefix-coded numbers
- // return only the terms which had shift of 0
- // if we are in explicit number or date mode, return only valid
- // prefix coded numbers with shift of 0
- func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
- stype := s.Type
- if stype == SortFieldAuto {
- allTermsPrefixCoded := true
- termsWithShiftZero := s.tmp[:0]
- for _, term := range terms {
- valid, shift := numeric.ValidPrefixCodedTermBytes(term)
- if valid && shift == 0 {
- termsWithShiftZero = append(termsWithShiftZero, term)
- } else if !valid {
- allTermsPrefixCoded = false
- }
- }
- if allTermsPrefixCoded {
- terms = termsWithShiftZero
- s.tmp = termsWithShiftZero[:0]
- }
- } else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
- termsWithShiftZero := s.tmp[:0]
- for _, term := range terms {
- valid, shift := numeric.ValidPrefixCodedTermBytes(term)
- if valid && shift == 0 {
- termsWithShiftZero = append(termsWithShiftZero, term)
- }
- }
- terms = termsWithShiftZero
- s.tmp = termsWithShiftZero[:0]
- }
- return terms
- }
- // RequiresDocID says this SearchSort does not require the DocID be loaded
- func (s *SortField) RequiresDocID() bool { return false }
- // RequiresScoring says this SearchStore does not require scoring
- func (s *SortField) RequiresScoring() bool { return false }
- // RequiresFields says this SearchStore requires the specified stored field
- func (s *SortField) RequiresFields() []string { return []string{s.Field} }
- func (s *SortField) MarshalJSON() ([]byte, error) {
- // see if simple format can be used
- if s.Missing == SortFieldMissingLast &&
- s.Mode == SortFieldDefault &&
- s.Type == SortFieldAuto {
- if s.Desc {
- return json.Marshal("-" + s.Field)
- }
- return json.Marshal(s.Field)
- }
- sfm := map[string]interface{}{
- "by": "field",
- "field": s.Field,
- }
- if s.Desc {
- sfm["desc"] = true
- }
- if s.Missing > SortFieldMissingLast {
- switch s.Missing {
- case SortFieldMissingFirst:
- sfm["missing"] = "first"
- }
- }
- if s.Mode > SortFieldDefault {
- switch s.Mode {
- case SortFieldMin:
- sfm["mode"] = "min"
- case SortFieldMax:
- sfm["mode"] = "max"
- }
- }
- if s.Type > SortFieldAuto {
- switch s.Type {
- case SortFieldAsString:
- sfm["type"] = "string"
- case SortFieldAsNumber:
- sfm["type"] = "number"
- case SortFieldAsDate:
- sfm["type"] = "date"
- }
- }
- return json.Marshal(sfm)
- }
- func (s *SortField) Copy() SearchSort {
- rv := *s
- return &rv
- }
- func (s *SortField) Reverse() {
- s.Desc = !s.Desc
- if s.Missing == SortFieldMissingFirst {
- s.Missing = SortFieldMissingLast
- } else {
- s.Missing = SortFieldMissingFirst
- }
- }
- // SortDocID will sort results by the document identifier
- type SortDocID struct {
- Desc bool
- }
- // UpdateVisitor is a no-op for SortDocID as it's value
- // is not dependent on any field terms
- func (s *SortDocID) UpdateVisitor(field string, term []byte) {
- }
- // Value returns the sort value of the DocumentMatch
- func (s *SortDocID) Value(i *DocumentMatch) string {
- return i.ID
- }
- // Descending determines the order of the sort
- func (s *SortDocID) Descending() bool {
- return s.Desc
- }
- // RequiresDocID says this SearchSort does require the DocID be loaded
- func (s *SortDocID) RequiresDocID() bool { return true }
- // RequiresScoring says this SearchStore does not require scoring
- func (s *SortDocID) RequiresScoring() bool { return false }
- // RequiresFields says this SearchStore does not require any stored fields
- func (s *SortDocID) RequiresFields() []string { return nil }
- func (s *SortDocID) MarshalJSON() ([]byte, error) {
- if s.Desc {
- return json.Marshal("-_id")
- }
- return json.Marshal("_id")
- }
- func (s *SortDocID) Copy() SearchSort {
- rv := *s
- return &rv
- }
- func (s *SortDocID) Reverse() {
- s.Desc = !s.Desc
- }
- // SortScore will sort results by the document match score
- type SortScore struct {
- Desc bool
- }
- // UpdateVisitor is a no-op for SortScore as it's value
- // is not dependent on any field terms
- func (s *SortScore) UpdateVisitor(field string, term []byte) {
- }
- // Value returns the sort value of the DocumentMatch
- func (s *SortScore) Value(i *DocumentMatch) string {
- return "_score"
- }
- // Descending determines the order of the sort
- func (s *SortScore) Descending() bool {
- return s.Desc
- }
- // RequiresDocID says this SearchSort does not require the DocID be loaded
- func (s *SortScore) RequiresDocID() bool { return false }
- // RequiresScoring says this SearchStore does require scoring
- func (s *SortScore) RequiresScoring() bool { return true }
- // RequiresFields says this SearchStore does not require any store fields
- func (s *SortScore) RequiresFields() []string { return nil }
- func (s *SortScore) MarshalJSON() ([]byte, error) {
- if s.Desc {
- return json.Marshal("-_score")
- }
- return json.Marshal("_score")
- }
- func (s *SortScore) Copy() SearchSort {
- rv := *s
- return &rv
- }
- func (s *SortScore) Reverse() {
- s.Desc = !s.Desc
- }
- var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
- // NewSortGeoDistance creates SearchSort instance for sorting documents by
- // their distance from the specified point.
- func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
- *SortGeoDistance, error) {
- rv := &SortGeoDistance{
- Field: field,
- Desc: desc,
- Unit: unit,
- Lon: lon,
- Lat: lat,
- }
- var err error
- rv.unitMult, err = geo.ParseDistanceUnit(unit)
- if err != nil {
- return nil, err
- }
- return rv, nil
- }
- // SortGeoDistance will sort results by the distance of an
- // indexed geo point, from the provided location.
- // Field is the name of the field
- // Descending reverse the sort order (default false)
- type SortGeoDistance struct {
- Field string
- Desc bool
- Unit string
- values []string
- Lon float64
- Lat float64
- unitMult float64
- }
- // UpdateVisitor notifies this sort field that in this document
- // this field has the specified term
- func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
- if field == s.Field {
- s.values = append(s.values, string(term))
- }
- }
- // Value returns the sort value of the DocumentMatch
- // it also resets the state of this SortField for
- // processing the next document
- func (s *SortGeoDistance) Value(i *DocumentMatch) string {
- iTerms := s.filterTermsByType(s.values)
- iTerm := s.filterTermsByMode(iTerms)
- s.values = s.values[:0]
- if iTerm == "" {
- return maxDistance
- }
- i64, err := numeric.PrefixCoded(iTerm).Int64()
- if err != nil {
- return maxDistance
- }
- docLon := geo.MortonUnhashLon(uint64(i64))
- docLat := geo.MortonUnhashLat(uint64(i64))
- dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat)
- // dist is returned in km, so convert to m
- dist *= 1000
- if s.unitMult != 0 {
- dist /= s.unitMult
- }
- distInt64 := numeric.Float64ToInt64(dist)
- return string(numeric.MustNewPrefixCodedInt64(distInt64, 0))
- }
- // Descending determines the order of the sort
- func (s *SortGeoDistance) Descending() bool {
- return s.Desc
- }
- func (s *SortGeoDistance) filterTermsByMode(terms []string) string {
- if len(terms) >= 1 {
- return terms[0]
- }
- return ""
- }
- // filterTermsByType attempts to make one pass on the terms
- // return only valid prefix coded numbers with shift of 0
- func (s *SortGeoDistance) filterTermsByType(terms []string) []string {
- var termsWithShiftZero []string
- for _, term := range terms {
- valid, shift := numeric.ValidPrefixCodedTerm(term)
- if valid && shift == 0 {
- termsWithShiftZero = append(termsWithShiftZero, term)
- }
- }
- return termsWithShiftZero
- }
- // RequiresDocID says this SearchSort does not require the DocID be loaded
- func (s *SortGeoDistance) RequiresDocID() bool { return false }
- // RequiresScoring says this SearchStore does not require scoring
- func (s *SortGeoDistance) RequiresScoring() bool { return false }
- // RequiresFields says this SearchStore requires the specified stored field
- func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} }
- func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
- sfm := map[string]interface{}{
- "by": "geo_distance",
- "field": s.Field,
- "location": map[string]interface{}{
- "lon": s.Lon,
- "lat": s.Lat,
- },
- }
- if s.Unit != "" {
- sfm["unit"] = s.Unit
- }
- if s.Desc {
- sfm["desc"] = true
- }
- return json.Marshal(sfm)
- }
- func (s *SortGeoDistance) Copy() SearchSort {
- rv := *s
- return &rv
- }
- func (s *SortGeoDistance) Reverse() {
- s.Desc = !s.Desc
- }
- type BytesSlice [][]byte
- func (p BytesSlice) Len() int { return len(p) }
- func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 }
- func (p BytesSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|