X-Git-Url: https://git.openstreetmap.org./rails.git/blobdiff_plain/ca5de5b4e01297b07ce3a9db45e908f15f7cda11..67a5809c8a486b5e8cc92dabd08624afc1a12e6c:/lib/rich_text.rb?ds=sidebyside diff --git a/lib/rich_text.rb b/lib/rich_text.rb index 9e43ed29c..bdf9c37ca 100644 --- a/lib/rich_text.rb +++ b/lib/rich_text.rb @@ -1,4 +1,10 @@ module RichText + SPAMMY_PHRASES = [ + "Business Description:", "Additional Keywords:" + ].freeze + + MAX_DESCRIPTION_LENGTH = 500 + def self.new(format, text) case format when "html" then HTML.new(text || "") @@ -11,7 +17,7 @@ module RichText include ActionView::Helpers::TextHelper include ActionView::Helpers::OutputSafetyHelper - def sanitize(text) + def sanitize(text, _options = {}) Sanitize.clean(text, Sanitize::Config::OSM).html_safe end end @@ -33,10 +39,28 @@ module RichText link_size += link.content.length end - link_proportion = link_size.to_f / doc.content.length.to_f + link_proportion = link_size.to_f / doc.content.length end - [link_proportion - 0.2, 0.0].max * 200 + link_count * 40 + spammy_phrases = SPAMMY_PHRASES.count do |phrase| + doc.content.include?(phrase) + end + + ([link_proportion - 0.2, 0.0].max * 200) + + (link_count * 40) + + (spammy_phrases * 40) + end + + def image + nil + end + + def image_alt + nil + end + + def description + nil end protected @@ -45,11 +69,15 @@ module RichText SimpleFormat.new.simple_format(text) end - def linkify(text) + def sanitize(text) + Sanitize.clean(text, Sanitize::Config::OSM).html_safe + end + + def linkify(text, mode = :urls) if text.html_safe? - Rinku.auto_link(text, :urls, tag_options(:rel => "nofollow")).html_safe + Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer")).html_safe else - Rinku.auto_link(text, :urls, tag_options(:rel => "nofollow")) + Rinku.auto_link(text, mode, tag_builder.tag_options(:rel => "nofollow noopener noreferrer")) end end end @@ -62,43 +90,85 @@ module RichText def to_text to_s end - - private - - def sanitize(text) - Sanitize.clean(text, Sanitize::Config::OSM).html_safe - end end class Markdown < Base def to_html - Markdown.html_parser.render(self).html_safe + linkify(sanitize(document.to_html), :all) end def to_text to_s end - def self.html_renderer - @html_renderer ||= Renderer.new(:filter_html => true, :safe_links_only => true) + def image + @image_element = first_image_element(document.root) unless defined? @image_element + @image_element.attr["src"] if @image_element end - def self.html_parser - @html_parser ||= Redcarpet::Markdown.new(html_renderer, :no_intra_emphasis => true, :autolink => true, :space_after_headers => true) + def image_alt + @image_element = first_image_element(document.root) unless defined? @image_element + @image_element.attr["alt"] if @image_element end - class Renderer < Redcarpet::Render::XHTML - def link(link, _title, alt_text) - "#{alt_text}" + def description + return @description if defined? @description + + @description = first_truncated_text_content(document.root) + end + + private + + def document + @document ||= Kramdown::Document.new(self) + end + + def first_image_element(element) + return element if image?(element) && element.attr["src"].present? + + element.children.find do |child| + nested_image = first_image_element(child) + break nested_image if nested_image end + end - def autolink(link, link_type) - if link_type == :email - "#{link}" + def first_truncated_text_content(element) + if paragraph?(element) + truncated_text_content(element) + else + element.children.find do |child| + text = first_truncated_text_content(child) + break text unless text.nil? + end + end + end + + def truncated_text_content(element) + text = "" + + append_text = lambda do |child| + if child.type == :text + text << child.value else - "#{link}" + child.children.each do |c| + append_text.call(c) + break if text.length > MAX_DESCRIPTION_LENGTH + end end end + append_text.call(element) + + return nil if text.blank? + + text.truncate(MAX_DESCRIPTION_LENGTH) + end + + def image?(element) + element.type == :img || (element.type == :html_element && element.value == "img") + end + + def paragraph?(element) + element.type == :p || (element.type == :html_element && element.value == "p") end end