]> git.openstreetmap.org Git - rails.git/blobdiff - lib/rich_text.rb
Merge pull request #4887 from AntonKhorev/full-encode-wikipedia-value
[rails.git] / lib / rich_text.rb
index bb2baddc81991832ef24e115b93183e3a1c4e865..f19d3d3a952ddd2c8240e2ea8da228c434523ffc 100644 (file)
@@ -1,4 +1,8 @@
 module RichText
 module RichText
+  SPAMMY_PHRASES = [
+    "Business Description:", "Additional Keywords:"
+  ].freeze
+
   def self.new(format, text)
     case format
     when "html" then HTML.new(text || "")
   def self.new(format, text)
     case format
     when "html" then HTML.new(text || "")
@@ -11,7 +15,7 @@ module RichText
     include ActionView::Helpers::TextHelper
     include ActionView::Helpers::OutputSafetyHelper
 
     include ActionView::Helpers::TextHelper
     include ActionView::Helpers::OutputSafetyHelper
 
-    def sanitize(text)
+    def sanitize(text, _options = {})
       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
     end
   end
       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
     end
   end
@@ -25,18 +29,28 @@ module RichText
 
       doc = Nokogiri::HTML(to_html)
 
 
       doc = Nokogiri::HTML(to_html)
 
-      if doc.content.length > 0
+      if doc.content.empty?
+        link_proportion = 0
+      else
         doc.xpath("//a").each do |link|
           link_count += 1
           link_size += link.content.length
         end
 
         doc.xpath("//a").each do |link|
           link_count += 1
           link_size += link.content.length
         end
 
-        link_proportion = link_size.to_f / doc.content.length.to_f
-      else
-        link_proportion = 0
+        link_proportion = link_size.to_f / doc.content.length
       end
 
       end
 
-      [link_proportion - 0.2, 0.0].max * 200 + link_count * 40
+      spammy_phrases = SPAMMY_PHRASES.count do |phrase|
+        doc.content.include?(phrase)
+      end
+
+      ([link_proportion - 0.2, 0.0].max * 200) +
+        (link_count * 40) +
+        (spammy_phrases * 40)
+    end
+
+    def image
+      nil
     end
 
     protected
     end
 
     protected
@@ -45,11 +59,15 @@ module RichText
       SimpleFormat.new.simple_format(text)
     end
 
       SimpleFormat.new.simple_format(text)
     end
 
-    def linkify(text)
+    def sanitize(text)
+      Sanitize.clean(text, Sanitize::Config::OSM).html_safe
+    end
+
+    def linkify(text, mode = :urls)
       if text.html_safe?
       if text.html_safe?
-        Rinku.auto_link(text, :urls, tag_options(:rel => "nofollow")).html_safe
+        Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer")).html_safe
       else
       else
-        Rinku.auto_link(text, :urls, tag_options(:rel => "nofollow"))
+        Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer"))
       end
     end
   end
       end
     end
   end
@@ -62,41 +80,35 @@ module RichText
     def to_text
       to_s
     end
     def to_text
       to_s
     end
-
-    private
-
-    def sanitize(text)
-      Sanitize.clean(text, Sanitize::Config::OSM).html_safe
-    end
   end
 
   class Markdown < Base
     def to_html
   end
 
   class Markdown < Base
     def to_html
-      html_parser.render(self).html_safe
+      linkify(sanitize(document.to_html), :all)
     end
 
     def to_text
       to_s
     end
 
     end
 
     def to_text
       to_s
     end
 
+    def image
+      return @image if defined? @image
+
+      @image = first_image_element(document.root)&.attr&.[]("src")
+    end
+
     private
 
     private
 
-    def html_parser
-      @@html_renderer ||= Renderer.new(:filter_html => true, :safe_links_only => true)
-      @@html_parser ||= Redcarpet::Markdown.new(@@html_renderer,         :no_intra_emphasis => true, :autolink => true, :space_after_headers => true)
+    def document
+      @document ||= Kramdown::Document.new(self)
     end
 
     end
 
-    class Renderer < Redcarpet::Render::XHTML
-      def link(link, _title, alt_text)
-        "<a rel=\"nofollow\" href=\"#{link}\">#{alt_text}</a>"
-      end
+    def first_image_element(element)
+      return element if element.type == :img
 
 
-      def autolink(link, link_type)
-        if link_type == :email
-          "<a rel=\"nofollow\" href=\"mailto:#{link}\">#{link}</a>"
-        else
-          "<a rel=\"nofollow\" href=\"#{link}\">#{link}</a>"
-        end
+      element.children.find do |child|
+        nested_image = first_image_element(child)
+        break nested_image if nested_image
       end
     end
   end
       end
     end
   end