]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Detect <p> as richtext paragraph
[rails.git] / lib / rich_text.rb
1 module RichText
2   SPAMMY_PHRASES = [
3     "Business Description:", "Additional Keywords:"
4   ].freeze
5
6   def self.new(format, text)
7     case format
8     when "html" then HTML.new(text || "")
9     when "markdown" then Markdown.new(text || "")
10     when "text" then Text.new(text || "")
11     end
12   end
13
14   class SimpleFormat
15     include ActionView::Helpers::TextHelper
16     include ActionView::Helpers::OutputSafetyHelper
17
18     def sanitize(text, _options = {})
19       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
20     end
21   end
22
23   class Base < String
24     include ActionView::Helpers::TagHelper
25
26     def spam_score
27       link_count = 0
28       link_size = 0
29
30       doc = Nokogiri::HTML(to_html)
31
32       if doc.content.empty?
33         link_proportion = 0
34       else
35         doc.xpath("//a").each do |link|
36           link_count += 1
37           link_size += link.content.length
38         end
39
40         link_proportion = link_size.to_f / doc.content.length
41       end
42
43       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
44         doc.content.include?(phrase)
45       end
46
47       ([link_proportion - 0.2, 0.0].max * 200) +
48         (link_count * 40) +
49         (spammy_phrases * 40)
50     end
51
52     def image
53       nil
54     end
55
56     def image_alt
57       nil
58     end
59
60     def description
61       nil
62     end
63
64     protected
65
66     def simple_format(text)
67       SimpleFormat.new.simple_format(text)
68     end
69
70     def sanitize(text)
71       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
72     end
73
74     def linkify(text, mode = :urls)
75       if text.html_safe?
76         Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer")).html_safe
77       else
78         Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer"))
79       end
80     end
81   end
82
83   class HTML < Base
84     def to_html
85       linkify(sanitize(simple_format(self)))
86     end
87
88     def to_text
89       to_s
90     end
91   end
92
93   class Markdown < Base
94     def to_html
95       linkify(sanitize(document.to_html), :all)
96     end
97
98     def to_text
99       to_s
100     end
101
102     def image
103       @image_element = first_image_element(document.root) unless defined? @image_element
104       @image_element.attr["src"] if @image_element
105     end
106
107     def image_alt
108       @image_element = first_image_element(document.root) unless defined? @image_element
109       @image_element.attr["alt"] if @image_element
110     end
111
112     def description
113       @paragraph_element = first_paragraph_element(document.root) unless defined? @paragraph_element
114       text_content(@paragraph_element) if @paragraph_element
115     end
116
117     private
118
119     def document
120       @document ||= Kramdown::Document.new(self)
121     end
122
123     def first_image_element(element)
124       return element if image?(element) && element.attr["src"].present?
125
126       element.children.find do |child|
127         nested_image = first_image_element(child)
128         break nested_image if nested_image
129       end
130     end
131
132     def first_paragraph_element(element)
133       return element if paragraph?(element)
134
135       element.children.find do |child|
136         nested_paragraph = first_paragraph_element(child)
137         break nested_paragraph if nested_paragraph
138       end
139     end
140
141     def text_content(element)
142       text = ""
143
144       append_text = lambda do |child|
145         if child.type == :text
146           text << child.value
147         else
148           child.children.each { |c| append_text.call(c) }
149         end
150       end
151       append_text.call(element)
152
153       text
154     end
155
156     def image?(element)
157       element.type == :img || (element.type == :html_element && element.value == "img")
158     end
159
160     def paragraph?(element)
161       element.type == :p || (element.type == :html_element && element.value == "p")
162     end
163   end
164
165   class Text < Base
166     def to_html
167       linkify(simple_format(ERB::Util.html_escape(self)))
168     end
169
170     def to_text
171       to_s
172     end
173   end
174 end