From 3202bdd74416035f81170e978129bbffb3151ed2 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 22 Feb 2017 19:35:11 +0100 Subject: [PATCH] Fix #555 - Use a better URL parser --- Gemfile | 1 + Gemfile.lock | 3 +++ app/lib/formatter.rb | 10 ++++++---- spec/lib/formatter_spec.rb | 32 +++++++++++++++++++++++++++++++- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/Gemfile b/Gemfile index 55c1de693..c97f80bde 100644 --- a/Gemfile +++ b/Gemfile @@ -35,6 +35,7 @@ gem 'devise-two-factor' gem 'doorkeeper' gem 'rabl' gem 'rqrcode' +gem 'twitter-text' gem 'oj' gem 'hiredis' gem 'redis', '~>3.2' diff --git a/Gemfile.lock b/Gemfile.lock index f50edaf95..19d61103d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -422,6 +422,8 @@ GEM thread_safe (0.3.5) tilt (2.0.5) tins (1.12.0) + twitter-text (1.14.5) + unf (~> 0.1.0) tzinfo (1.2.2) thread_safe (~> 0.1) uglifier (3.0.1) @@ -514,6 +516,7 @@ DEPENDENCIES simple_form simplecov statsd-instrument + twitter-text uglifier (>= 1.3.0) webmock will_paginate diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 044407a6c..073ab0784 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -9,6 +9,8 @@ class Formatter include ActionView::Helpers::TextHelper include ActionView::Helpers::SanitizeHelper + AUTOLINK_RE = /https?:\/\/([\S]+\.[!#$&-;=?-[\]_a-z~]|%[\w\d]{2}]+[\w])/i + def format(status) return reformat(status.content) unless status.local? @@ -44,9 +46,9 @@ class Formatter end def link_urls(html) - html.gsub(URI.regexp(%w(http https))) do |match| - link_html(match) - end + Twitter::Autolink.auto_link_urls(html, url_target: '_blank', + link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' }, + link_text_block: lambda { |_, text| link_html(text) }) end def link_mentions(html, mentions) @@ -70,7 +72,7 @@ class Formatter suffix = url[prefix.length + 30..-1] cutoff = url[prefix.length..-1].length > 30 - "#{prefix}#{text}#{suffix}" + "#{prefix}#{text}#{suffix}" end def hashtag_html(match) diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index 0db1634e9..4b003b8e5 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -17,8 +17,38 @@ RSpec.describe Formatter do end it 'contains a link' do - expect(subject).to match('google.com') + expect(subject).to match('google.com') end + +=begin + it 'matches a stand-alone medium URL' do + expect(subject.match('https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4')[0]).to eq 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' + end + + it 'matches a stand-alone google URL' do + expect(subject.match('http://google.com')[0]).to eq 'http://google.com' + end + + it 'matches a URL without trailing period' do + expect(subject.match('http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ')[0]).to eq 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona' + end + + it 'matches a URL without closing paranthesis' do + expect(subject.match('(http://google.com/)')[0]).to eq 'http://google.com' + end + + it 'matches a URL without exclamation point' do + expect(subject.match('http://www.google.com! ')[0]).to eq 'http://www.google.com' + end + + it 'matches a URL with a query string' do + expect(subject.match('https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink')[0]).to eq 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' + end + + it 'matches a URL with parenthesis in it' do + expect(subject.match('https://en.wikipedia.org/wiki/Diaspora_(software)')[0]).to eq 'https://en.wikipedia.org/wiki/Diaspora_(software)' + end +=end end describe '#reformat' do