Files
.circleci
.github
app
chewy
controllers
helpers
javascript
fonts
images
mastodon
actions
components
containers
features
locales
middleware
reducers
selectors
service_worker
storage
store
utils
api.js
base_polyfills.js
common.js
compare_id.js
extra_polyfills.js
initial_state.js
is_mobile.js
load_keyboard_extensions.js
load_polyfills.js
main.js
performance.js
ready.js
rtl.js
scroll.js
settings.js
stream.js
test_setup.js
uuid.js
packs
styles
lib
mailers
models
policies
presenters
serializers
services
validators
views
workers
bin
chart
config
db
dist
lib
log
nanobox
public
spec
streaming
vendor
.buildpacks
.codeclimate.yml
.dockerignore
.editorconfig
.env.nanobox
.env.production.sample
.env.test
.env.vagrant
.eslintignore
.eslintrc.js
.foreman
.gitattributes
.gitignore
.haml-lint.yml
.nanoignore
.nvmrc
.profile
.rspec
.rubocop.yml
.ruby-version
.sass-lint.yml
.slugignore
.yarnclean
AUTHORS.md
Aptfile
CHANGELOG.md
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Capfile
Dockerfile
Gemfile
Gemfile.lock
LICENSE
Procfile
Procfile.dev
README.md
Rakefile
SECURITY.md
Vagrantfile
app.json
babel.config.js
boxfile.yml
config.ru
crowdin.yml
docker-compose.yml
ide-helper.js
package.json
postcss.config.js
priv-config
scalingo.json
yarn.lock
hometown/app/javascript/mastodon/rtl.js
Mostafa Ahangarha e974d4923f Exclude URLs from text analysis ()
By the added regex, URLs, including the one without http or even www
like mysite.com will be removed from the toot's body so only the real
text of the toot will be analyzed for RTL detection
2019-09-04 22:30:49 +02:00

33 lines
939 B
JavaScript

// U+0590 to U+05FF - Hebrew
// U+0600 to U+06FF - Arabic
// U+0700 to U+074F - Syriac
// U+0750 to U+077F - Arabic Supplement
// U+0780 to U+07BF - Thaana
// U+07C0 to U+07FF - N'Ko
// U+0800 to U+083F - Samaritan
// U+08A0 to U+08FF - Arabic Extended-A
// U+FB1D to U+FB4F - Hebrew presentation forms
// U+FB50 to U+FDFF - Arabic presentation forms A
// U+FE70 to U+FEFF - Arabic presentation forms B
const rtlChars = /[\u0590-\u083F]|[\u08A0-\u08FF]|[\uFB1D-\uFDFF]|[\uFE70-\uFEFF]/mg;
export function isRtl(text) {
if (text.length === 0) {
return false;
}
text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, '');
text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, '');
text = text.replace(/\s+/g, '');
text = text.replace(/(\w\S+\.\w{2,}\S*)/g, '');
const matches = text.match(rtlChars);
if (!matches) {
return false;
}
return matches.length / text.length > 0.3;
};