Change language detection to include hashtags as words (#11341)
This commit is contained in:
parent
3a6fe657ba
commit
5bfe1e1f05
@ -69,7 +69,7 @@ class LanguageDetector
|
|||||||
new_text = remove_html(text)
|
new_text = remove_html(text)
|
||||||
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
|
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
|
||||||
new_text.gsub!(Account::MENTION_RE, '')
|
new_text.gsub!(Account::MENTION_RE, '')
|
||||||
new_text.gsub!(Tag::HASHTAG_RE, '')
|
new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
|
||||||
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
|
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
|
||||||
new_text.gsub!(/\s+/, ' ')
|
new_text.gsub!(/\s+/, ' ')
|
||||||
new_text
|
new_text
|
||||||
|
@ -32,11 +32,11 @@ describe LanguageDetector do
|
|||||||
expect(result).to eq 'Our website is and also'
|
expect(result).to eq 'Our website is and also'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'strips #hashtags from strings before detection' do
|
it 'converts #hashtags back to normal text before detection' do
|
||||||
string = 'Hey look at all the #animals and #fish'
|
string = 'Hey look at all the #animals and #FishAndChips'
|
||||||
|
|
||||||
result = described_class.instance.send(:prepare_text, string)
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
expect(result).to eq 'Hey look at all the and'
|
expect(result).to eq 'Hey look at all the animals and fish and chips'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user