From: Tom Hughes Date: Mon, 5 Mar 2012 22:46:28 +0000 (+0000) Subject: Make spam scoring work with new rich text system X-Git-Tag: live~6285 X-Git-Url: https://git.openstreetmap.org./rails.git/commitdiff_plain/8f4a9a4b870f7b15652eb125c675bf635c7b0484 Make spam scoring work with new rich text system --- diff --git a/app/models/user.rb b/app/models/user.rb index 3b55040c6..cebea9468 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -204,10 +204,10 @@ class User < ActiveRecord::Base def spam_score changeset_score = self.changesets.limit(10).length * 50 trace_score = self.traces.limit(10).length * 50 - diary_entry_score = self.diary_entries.inject(0) { |s,e| s += OSM.spam_score(e.body) } - diary_comment_score = self.diary_comments.inject(0) { |s,e| s += OSM.spam_score(e.body) } + diary_entry_score = self.diary_entries.inject(0) { |s,e| s += e.body.spam_score } + diary_comment_score = self.diary_comments.inject(0) { |s,c| s += c.body.spam_score } - score = OSM.spam_score(self.description) + score = self.description.spam_score score += diary_entry_score / self.diary_entries.length if self.diary_entries.length > 0 score += diary_comment_score / self.diary_comments.length if self.diary_comments.length > 0 score -= changeset_score diff --git a/lib/osm.rb b/lib/osm.rb index c85abc0f3..1a22af93a 100644 --- a/lib/osm.rb +++ b/lib/osm.rb @@ -6,7 +6,6 @@ module OSM require 'rexml/text' require 'xml/libxml' require 'digest/md5' - require 'nokogiri' if defined?(SystemTimer) Timer = SystemTimer @@ -511,27 +510,6 @@ module OSM "AND #{prefix}longitude BETWEEN #{bbox.min_lon} AND #{bbox.max_lon}" end - # Return a spam score for a chunk of text - def self.spam_score(text) - link_count = 0 - link_size = 0 - - doc = Nokogiri::HTML(Rinku.auto_link(text, :urls)) - - if doc.content.length > 0 - doc.xpath("//a").each do |link| - link_count += 1 - link_size += link.content.length - end - - link_proportion = link_size.to_f / doc.content.length.to_f - else - link_proportion = 0 - end - - return [link_proportion - 0.2, 0.0].max * 200 + link_count * 20 - end - def self.legal_text_for_country(country_code) file_name = File.join(Rails.root, "config", "legales", country_code.to_s + ".yml") file_name = File.join(Rails.root, "config", "legales", DEFAULT_LEGALE + ".yml") unless File.exist? file_name diff --git a/lib/rich_text.rb b/lib/rich_text.rb index ec5e9e473..f2a558e6a 100644 --- a/lib/rich_text.rb +++ b/lib/rich_text.rb @@ -7,7 +7,29 @@ module RichText end end - class HTML < String + class Base < String + def spam_score + link_count = 0 + link_size = 0 + + doc = Nokogiri::HTML(to_html) + + if doc.content.length > 0 + doc.xpath("//a").each do |link| + link_count += 1 + link_size += link.content.length + end + + link_proportion = link_size.to_f / doc.content.length.to_f + else + link_proportion = 0 + end + + return [link_proportion - 0.2, 0.0].max * 200 + link_count * 20 + end + end + + class HTML < Base include ActionView::Helpers::TextHelper include ActionView::Helpers::TagHelper @@ -34,7 +56,7 @@ module RichText end end - class Markdown < String + class Markdown < Base def to_html html_parser.render(self).html_safe end