Add more accurate hashtag search (#11579)
* Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii characters
This commit is contained in:
37
app/chewy/tags_index.rb
Normal file
37
app/chewy/tags_index.rb
Normal file
@ -0,0 +1,37 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class TagsIndex < Chewy::Index
|
||||
settings index: { refresh_interval: '15m' }, analysis: {
|
||||
analyzer: {
|
||||
content: {
|
||||
tokenizer: 'keyword',
|
||||
filter: %w(lowercase asciifolding cjk_width),
|
||||
},
|
||||
|
||||
edge_ngram: {
|
||||
tokenizer: 'edge_ngram',
|
||||
filter: %w(lowercase asciifolding cjk_width),
|
||||
},
|
||||
},
|
||||
|
||||
tokenizer: {
|
||||
edge_ngram: {
|
||||
type: 'edge_ngram',
|
||||
min_gram: 2,
|
||||
max_gram: 15,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do
|
||||
root date_detection: false do
|
||||
field :name, type: 'text', analyzer: 'content' do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
|
||||
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
|
||||
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
|
||||
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
|
||||
end
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user