]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Shorten matching urls in linkify
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   SPAMMY_PHRASES = [
5     "Business Description:", "Additional Keywords:"
6   ].freeze
7
8   MAX_DESCRIPTION_LENGTH = 500
9
10   def self.new(format, text)
11     case format
12     when "html" then HTML.new(text || "")
13     when "markdown" then Markdown.new(text || "")
14     when "text" then Text.new(text || "")
15     end
16   end
17
18   class SimpleFormat
19     include ActionView::Helpers::TextHelper
20     include ActionView::Helpers::OutputSafetyHelper
21
22     def sanitize(text, _options = {})
23       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
24     end
25   end
26
27   class Base < String
28     include ActionView::Helpers::TagHelper
29
30     def spam_score
31       link_count = 0
32       link_size = 0
33
34       doc = Nokogiri::HTML(to_html)
35
36       if doc.content.empty?
37         link_proportion = 0
38       else
39         doc.xpath("//a").each do |link|
40           link_count += 1
41           link_size += link.content.length
42         end
43
44         link_proportion = link_size.to_f / doc.content.length
45       end
46
47       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
48         doc.content.include?(phrase)
49       end
50
51       ([link_proportion - 0.2, 0.0].max * 200) +
52         (link_count * 40) +
53         (spammy_phrases * 40)
54     end
55
56     def image
57       nil
58     end
59
60     def image_alt
61       nil
62     end
63
64     def description
65       nil
66     end
67
68     protected
69
70     def simple_format(text)
71       SimpleFormat.new.simple_format(text, :dir => "auto")
72     end
73
74     def sanitize(text)
75       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
76     end
77
78     def linkify(text, mode = :urls)
79       link_attr = tag_builder.tag_options(:rel => "nofollow noopener noreferrer")
80       Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
81         %r{^https?://([^/]*)(.*)$}.match(url) do |m|
82           "#{Settings.linkify_hosts_replacement}#{m[2]}" if Settings.linkify_hosts_replacement &&
83                                                             Settings.linkify_hosts&.include?(m[1])
84         end || url
85       end.html_safe
86     end
87   end
88
89   class HTML < Base
90     def to_html
91       linkify(sanitize(simple_format(self)))
92     end
93
94     def to_text
95       to_s
96     end
97   end
98
99   class Markdown < Base
100     def to_html
101       linkify(sanitize(document.to_html), :all)
102     end
103
104     def to_text
105       to_s
106     end
107
108     def image
109       @image_element = first_image_element(document.root) unless defined? @image_element
110       @image_element.attr["src"] if @image_element
111     end
112
113     def image_alt
114       @image_element = first_image_element(document.root) unless defined? @image_element
115       @image_element.attr["alt"] if @image_element
116     end
117
118     def description
119       return @description if defined? @description
120
121       @description = first_truncated_text_content(document.root)
122     end
123
124     private
125
126     def document
127       @document ||= Kramdown::Document.new(self)
128     end
129
130     def first_image_element(element)
131       return element if image?(element) && element.attr["src"].present?
132
133       element.children.find do |child|
134         nested_image = first_image_element(child)
135         break nested_image if nested_image
136       end
137     end
138
139     def first_truncated_text_content(element)
140       if paragraph?(element)
141         truncated_text_content(element)
142       else
143         element.children.find do |child|
144           text = first_truncated_text_content(child)
145           break text unless text.nil?
146         end
147       end
148     end
149
150     def truncated_text_content(element)
151       text = +""
152
153       append_text = lambda do |child|
154         if child.type == :text
155           text << child.value
156         else
157           child.children.each do |c|
158             append_text.call(c)
159             break if text.length > MAX_DESCRIPTION_LENGTH
160           end
161         end
162       end
163       append_text.call(element)
164
165       return nil if text.blank?
166
167       text.truncate(MAX_DESCRIPTION_LENGTH)
168     end
169
170     def image?(element)
171       element.type == :img || (element.type == :html_element && element.value == "img")
172     end
173
174     def paragraph?(element)
175       element.type == :p || (element.type == :html_element && element.value == "p")
176     end
177   end
178
179   class Text < Base
180     def to_html
181       linkify(simple_format(ERB::Util.html_escape(self)))
182     end
183
184     def to_text
185       to_s
186     end
187   end
188 end