]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Localisation updates from https://translatewiki.net.
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   SPAMMY_PHRASES = [
5     "Business Description:", "Additional Keywords:"
6   ].freeze
7
8   MAX_DESCRIPTION_LENGTH = 500
9
10   def self.new(format, text)
11     case format
12     when "html" then HTML.new(text || "")
13     when "markdown" then Markdown.new(text || "")
14     when "text" then Text.new(text || "")
15     end
16   end
17
18   class SimpleFormat
19     include ActionView::Helpers::TextHelper
20     include ActionView::Helpers::OutputSafetyHelper
21
22     def sanitize(text, _options = {})
23       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
24     end
25   end
26
27   class Base < String
28     def spam_score
29       link_count = 0
30       link_size = 0
31
32       doc = Nokogiri::HTML(to_html)
33
34       if doc.content.empty?
35         link_proportion = 0
36       else
37         doc.xpath("//a").each do |link|
38           link_count += 1
39           link_size += link.content.length
40         end
41
42         link_proportion = link_size.to_f / doc.content.length
43       end
44
45       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
46         doc.content.include?(phrase)
47       end
48
49       ([link_proportion - 0.2, 0.0].max * 200) +
50         (link_count * 40) +
51         (spammy_phrases * 40)
52     end
53
54     def image
55       nil
56     end
57
58     def image_alt
59       nil
60     end
61
62     def description
63       nil
64     end
65
66     def truncate_html(max_length = nil, img_length = 1000)
67       html_doc = to_html
68       return html_doc if max_length.nil?
69
70       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
71       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
72       accumulated_length = 0
73       exceeded_node_parent = nil
74       truncated = false
75
76       doc.traverse do |node|
77         if accumulated_length >= max_length
78           if node == exceeded_node_parent
79             exceeded_node_parent = node.parent
80             node.remove if keep_or_discards.include?(node.name)
81           else
82             node.remove
83           end
84           next
85         end
86
87         next unless node.children.empty?
88
89         if node.text?
90           accumulated_length += node.text.length
91         elsif node.name == "img"
92           accumulated_length += img_length
93         end
94
95         if accumulated_length >= max_length
96           truncated = true
97           exceeded_node_parent = node.parent
98           node.remove
99         end
100       end
101
102       {
103         :truncated => truncated,
104         :html => doc.to_html.html_safe
105       }
106     end
107
108     protected
109
110     def simple_format(text)
111       SimpleFormat.new.simple_format(text, :dir => "auto")
112     end
113
114     def sanitize(text)
115       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
116     end
117
118     def linkify(text, mode = :urls)
119       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
120       Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
121         url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
122         shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
123           path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
124         end
125       end.html_safe
126     end
127
128     private
129
130     def shorten_host(url, hosts, hosts_replacement)
131       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
132         scheme_host, host, path = m.captures
133         if hosts&.include?(host)
134           path = yield(path) if block_given?
135           if hosts_replacement
136             "#{hosts_replacement}#{path}"
137           else
138             "#{scheme_host}#{path}"
139           end
140         end || url
141       end || url
142     end
143   end
144
145   class HTML < Base
146     def to_html
147       linkify(simple_format(self))
148     end
149
150     def to_text
151       to_s
152     end
153   end
154
155   class Markdown < Base
156     def to_html
157       linkify(sanitize(document.to_html), :all)
158     end
159
160     def to_text
161       to_s
162     end
163
164     def image
165       @image_element = first_image_element(document.root) unless defined? @image_element
166       @image_element.attr["src"] if @image_element
167     end
168
169     def image_alt
170       @image_element = first_image_element(document.root) unless defined? @image_element
171       @image_element.attr["alt"] if @image_element
172     end
173
174     def description
175       return @description if defined? @description
176
177       @description = first_truncated_text_content(document.root)
178     end
179
180     private
181
182     def document
183       return @document if @document
184
185       @document = Kramdown::Document.new(self)
186
187       should_get_dir_auto = lambda do |el|
188         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
189         return true if dir_auto_types.include?(el.type)
190         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
191
192         false
193       end
194
195       add_dir = lambda do |element|
196         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
197         element.children.each(&add_dir)
198       end
199       add_dir.call(@document.root)
200
201       @document
202     end
203
204     def first_image_element(element)
205       return element if image?(element) && element.attr["src"].present?
206
207       element.children.find do |child|
208         nested_image = first_image_element(child)
209         break nested_image if nested_image
210       end
211     end
212
213     def first_truncated_text_content(element)
214       if paragraph?(element)
215         truncated_text_content(element)
216       else
217         element.children.find do |child|
218           text = first_truncated_text_content(child)
219           break text unless text.nil?
220         end
221       end
222     end
223
224     def truncated_text_content(element)
225       text = +""
226
227       append_text = lambda do |child|
228         if child.type == :text
229           text << child.value
230         else
231           child.children.each do |c|
232             append_text.call(c)
233             break if text.length > MAX_DESCRIPTION_LENGTH
234           end
235         end
236       end
237       append_text.call(element)
238
239       return nil if text.blank?
240
241       text.truncate(MAX_DESCRIPTION_LENGTH)
242     end
243
244     def image?(element)
245       element.type == :img || (element.type == :html_element && element.value == "img")
246     end
247
248     def paragraph?(element)
249       element.type == :p || (element.type == :html_element && element.value == "p")
250     end
251   end
252
253   class Text < Base
254     def to_html
255       linkify(simple_format(ERB::Util.html_escape(self)))
256     end
257
258     def to_text
259       to_s
260     end
261   end
262 end