From 507b3a9778a21a0b8f2043b1558ba84f0eee5878 Mon Sep 17 00:00:00 2001 From: qw3rty Date: Wed, 5 May 2010 16:11:17 +0000 Subject: [PATCH] OSQA - 19 osqa now uses akismet to checks that all questions, answers and comments are not spam before they are posted. git-svn-id: http://svn.osqa.net/svnroot/osqa/trunk@132 0cfe37f9-358a-4d5e-be75-b63607b5c754 --- akismet.py | 376 ++++++++++++++++++++++++++++++++++++++ forum/models/node.py | 15 ++ forum/settings/extkeys.py | 18 +- forum/views/commands.py | 17 ++ forum/views/writers.py | 23 +++ 5 files changed, 447 insertions(+), 2 deletions(-) create mode 100644 akismet.py diff --git a/akismet.py b/akismet.py new file mode 100644 index 0000000..1433330 --- /dev/null +++ b/akismet.py @@ -0,0 +1,376 @@ +# Version 0.2.0 +# 2009/06/18 + +# Copyright Michael Foord 2005-2009 +# akismet.py +# Python interface to the akismet API +# E-mail fuzzyman@voidspace.org.uk + +# http://www.voidspace.org.uk/python/modules.shtml +# http://akismet.com + +# Released subject to the BSD License +# See http://www.voidspace.org.uk/python/license.shtml + + +""" +A python interface to the `Akismet `_ API. +This is a web service for blocking SPAM comments to blogs - or other online +services. + +You will need a Wordpress API key, from `wordpress.com `_. + +You should pass in the keyword argument 'agent' to the name of your program, +when you create an Akismet instance. This sets the ``user-agent`` to a useful +value. + +The default is : :: + + Python Interface by Fuzzyman | akismet.py/0.2.0 + +Whatever you pass in, will replace the *Python Interface by Fuzzyman* part. +**0.2.0** will change with the version of this interface. + +Usage example:: + + from akismet import Akismet + + api = Akismet(agent='Test Script') + # if apikey.txt is in place, + # the key will automatically be set + # or you can call api.setAPIKey() + # + if api.key is None: + print "No 'apikey.txt' file." + elif not api.verify_key(): + print "The API key is invalid." + else: + # data should be a dictionary of values + # They can all be filled in with defaults + # from a CGI environment + if api.comment_check(comment, data): + print 'This comment is spam.' + else: + print 'This comment is ham.' +""" + + +import os, sys +from urllib import urlencode +from django.conf import settings +from forum import settings + +import socket +if hasattr(socket, 'setdefaulttimeout'): + # Set the default timeout on sockets to 5 seconds + socket.setdefaulttimeout(5) + +__version__ = '0.2.0' + +__all__ = ( + '__version__', + 'Akismet', + 'AkismetError', + 'APIKeyError', + ) + +__author__ = 'Michael Foord ' + +__docformat__ = "restructuredtext en" + +user_agent = "%s | akismet.py/%s" +DEFAULTAGENT = 'Python Interface by Fuzzyman/%s' + +isfile = os.path.isfile + +urllib2 = None +try: + from google.appengine.api import urlfetch +except ImportError: + import urllib2 + +if urllib2 is None: + def _fetch_url(url, data, headers): + req = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=headers) + if req.status_code == 200: + return req.content + raise Exception('Could not fetch Akismet URL: %s Response code: %s' % + (url, req.status_code)) +else: + def _fetch_url(url, data, headers): + req = urllib2.Request(url, data, headers) + h = urllib2.urlopen(req) + resp = h.read() + return resp + + +class AkismetError(Exception): + """Base class for all akismet exceptions.""" + +class APIKeyError(AkismetError): + """Invalid API key.""" + +class Akismet(object): + """A class for working with the akismet API""" + + baseurl = 'rest.akismet.com/1.1/' + + def __init__(self, key=None, blog_url=None, agent=None): + """Automatically calls ``setAPIKey``.""" + if agent is None: + agent = DEFAULTAGENT % __version__ + self.user_agent = user_agent % (agent, __version__) + self.key = settings.WORDPRESS_API_KEY + self.blog_url = settings.WORDPRESS_BLOG_URL + # self.setAPIKey(key, blog_url) + + + def _getURL(self): + """ + Fetch the url to make requests to. + + This comprises of api key plus the baseurl. + """ + return 'http://%s.%s' % (self.key, self.baseurl) + + + def _safeRequest(self, url, data, headers): + try: + resp = _fetch_url(url, data, headers) + except Exception, e: + raise AkismetError(str(e)) + return resp + + + def setAPIKey(self, key=None, blog_url=None): + """ + Set the wordpress API key for all transactions. + + If you don't specify an explicit API ``key`` and ``blog_url`` it will + attempt to load them from a file called ``apikey.txt`` in the current + directory. + + This method is *usually* called automatically when you create a new + ``Akismet`` instance. + """ + if key is None and isfile('apikey.txt'): + the_file = [l.strip() for l in open('apikey.txt').readlines() + if l.strip() and not l.strip().startswith('#')] + try: + self.key = the_file[0] + self.blog_url = the_file[1] + except IndexError: + raise APIKeyError("Your 'apikey.txt' is invalid.") + else: + self.key = settings.WORDPRESS_API_KEY + self.blog_url = blog_url + + + def verify_key(self): + """ + This equates to the ``verify-key`` call against the akismet API. + + It returns ``True`` if the key is valid. + + The docs state that you *ought* to call this at the start of the + transaction. + + It raises ``APIKeyError`` if you have not yet set an API key. + + If the connection to akismet fails, it allows the normal ``HTTPError`` + or ``URLError`` to be raised. + (*akismet.py* uses `urllib2 `_) + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + data = { 'key': self.key, 'blog': self.blog_url } + # this function *doesn't* use the key as part of the URL + url = 'http://%sverify-key' % self.baseurl + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + resp = self._safeRequest(url, urlencode(data), headers) + if resp.lower() == 'valid': + return True + else: + return False + + def _build_data(self, comment, data): + """ + This function builds the data structure required by ``comment_check``, + ``submit_spam``, and ``submit_ham``. + + It modifies the ``data`` dictionary you give it in place. (and so + doesn't return anything) + + It raises an ``AkismetError`` if the user IP or user-agent can't be + worked out. + """ + data['comment_content'] = comment + if not 'user_ip' in data: + try: + val = os.environ['REMOTE_ADDR'] + except KeyError: + raise AkismetError("No 'user_ip' supplied") + data['user_ip'] = val + if not 'user_agent' in data: + try: + val = os.environ['HTTP_USER_AGENT'] + except KeyError: + raise AkismetError("No 'user_agent' supplied") + data['user_agent'] = val + # + data.setdefault('referrer', os.environ.get('HTTP_REFERER', 'unknown')) + data.setdefault('permalink', '') + data.setdefault('comment_type', 'comment') + data.setdefault('comment_author', '') + data.setdefault('comment_author_email', '') + data.setdefault('comment_author_url', '') + data.setdefault('SERVER_ADDR', os.environ.get('SERVER_ADDR', '')) + data.setdefault('SERVER_ADMIN', os.environ.get('SERVER_ADMIN', '')) + data.setdefault('SERVER_NAME', os.environ.get('SERVER_NAME', '')) + data.setdefault('SERVER_PORT', os.environ.get('SERVER_PORT', '')) + data.setdefault('SERVER_SIGNATURE', os.environ.get('SERVER_SIGNATURE', + '')) + data.setdefault('SERVER_SOFTWARE', os.environ.get('SERVER_SOFTWARE', + '')) + data.setdefault('HTTP_ACCEPT', os.environ.get('HTTP_ACCEPT', '')) + data.setdefault('blog', self.blog_url) + + + def comment_check(self, comment, data=None, build_data=True, DEBUG=False): + """ + This is the function that checks comments. + + It returns ``True`` for spam and ``False`` for ham. + + If you set ``DEBUG=True`` then it will return the text of the response, + instead of the ``True`` or ``False`` object. + + It raises ``APIKeyError`` if you have not yet set an API key. + + If the connection to Akismet fails then the ``HTTPError`` or + ``URLError`` will be propogated. + + As a minimum it requires the body of the comment. This is the + ``comment`` argument. + + Akismet requires some other arguments, and allows some optional ones. + The more information you give it, the more likely it is to be able to + make an accurate diagnosise. + + You supply these values using a mapping object (dictionary) as the + ``data`` argument. + + If ``build_data`` is ``True`` (the default), then *akismet.py* will + attempt to fill in as much information as possible, using default + values where necessary. This is particularly useful for programs + running in a {acro;CGI} environment. A lot of useful information + can be supplied from evironment variables (``os.environ``). See below. + + You *only* need supply values for which you don't want defaults filled + in for. All values must be strings. + + There are a few required values. If they are not supplied, and + defaults can't be worked out, then an ``AkismetError`` is raised. + + If you set ``build_data=False`` and a required value is missing an + ``AkismetError`` will also be raised. + + The normal values (and defaults) are as follows : :: + + 'user_ip': os.environ['REMOTE_ADDR'] (*) + 'user_agent': os.environ['HTTP_USER_AGENT'] (*) + 'referrer': os.environ.get('HTTP_REFERER', 'unknown') [#]_ + 'permalink': '' + 'comment_type': 'comment' [#]_ + 'comment_author': '' + 'comment_author_email': '' + 'comment_author_url': '' + 'SERVER_ADDR': os.environ.get('SERVER_ADDR', '') + 'SERVER_ADMIN': os.environ.get('SERVER_ADMIN', '') + 'SERVER_NAME': os.environ.get('SERVER_NAME', '') + 'SERVER_PORT': os.environ.get('SERVER_PORT', '') + 'SERVER_SIGNATURE': os.environ.get('SERVER_SIGNATURE', '') + 'SERVER_SOFTWARE': os.environ.get('SERVER_SOFTWARE', '') + 'HTTP_ACCEPT': os.environ.get('HTTP_ACCEPT', '') + + (*) Required values + + You may supply as many additional 'HTTP_*' type values as you wish. + These should correspond to the http headers sent with the request. + + .. [#] Note the spelling "referrer". This is a required value by the + akismet api - however, referrer information is not always + supplied by the browser or server. In fact the HTTP protocol + forbids relying on referrer information for functionality in + programs. + .. [#] The `API docs `_ state that this value + can be " *blank, comment, trackback, pingback, or a made up value* + *like 'registration'* ". + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + if 'blog' not in data: + data['blog'] = self.blog_url + url = '%scomment-check' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + resp = self._safeRequest(url, urlencode(data), headers) + if DEBUG: + return resp + resp = resp.lower() + if resp == 'true': + return True + elif resp == 'false': + return False + else: + # NOTE: Happens when you get a 'howdy wilbur' response ! + raise AkismetError('missing required argument.') + + + def submit_spam(self, comment, data=None, build_data=True): + """ + This function is used to tell akismet that a comment it marked as ham, + is really spam. + + It takes all the same arguments as ``comment_check``, except for + *DEBUG*. + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + url = '%ssubmit-spam' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + self._safeRequest(url, urlencode(data), headers) + + + def submit_ham(self, comment, data=None, build_data=True): + """ + This function is used to tell akismet that a comment it marked as spam, + is really ham. + + It takes all the same arguments as ``comment_check``, except for + *DEBUG*. + """ + if self.key is None: + raise APIKeyError("Your have not set an API key.") + if data is None: + data = {} + if build_data: + self._build_data(comment, data) + url = '%ssubmit-ham' % self._getURL() + # we *don't* trap the error here + # so if akismet is down it will raise an HTTPError or URLError + headers = {'User-Agent' : self.user_agent} + self._safeRequest(url, urlencode(data), headers) diff --git a/forum/models/node.py b/forum/models/node.py index 00cbbd0..3a20a3b 100644 --- a/forum/models/node.py +++ b/forum/models/node.py @@ -1,3 +1,4 @@ +from akismet import * from base import * from tag import Tag @@ -201,6 +202,20 @@ class Node(BaseModel, NodeContent, DeletableContent): super(Node, self).save(*args, **kwargs) if tags is not None: self.tags = tags + @staticmethod + def isSpam(comment, data): + api = Akismet() + if api.key is None: + print "problem" # raise APIKeyError + elif not api.verify_key(): + print "problem" # raise APIKeyError() + else: + if api.comment_check(comment, data): + return True + else: + return False + return data + class Meta: app_label = 'forum' diff --git a/forum/settings/extkeys.py b/forum/settings/extkeys.py index 50fa5ff..a2e8222 100644 --- a/forum/settings/extkeys.py +++ b/forum/settings/extkeys.py @@ -8,7 +8,21 @@ label = _("Google sitemap code"), help_text = _("This is the code you get when you register your site at Google webmaster central."), required=False)) -GOOGLE_ANALYTICS_KEY = Setting('GOOGLE_ANALYTICS_KEY', '', EXT_KEYS_SET, dict( +GOOGLE_ANALYTICS_KEY = Setting('GOOGLE_ANALYTICS_KEY', '', EXT_KEYS_SET, dict( label = _("Google analytics key"), help_text = _("Your Google analytics key. You can get one at the Google analytics official website"), -required=False)) \ No newline at end of file +required=False)) + +WORDPRESS_API_KEY = Setting('WORDPRESS_API_KEY', '', EXT_KEYS_SET, dict( +label = _("Wordpress API key"), +help_text = _("Your Wordpress API key. You can get one at http://wordpress.com/"), +required=False)) + +WORDPRESS_BLOG_URL = Setting('WORDPRESS_BLOG_URL', '', EXT_KEYS_SET, dict( +label = _("Wordpress blog url"), +help_text = _("Your Wordpress blog url. You can get one at http://wordpress.com/"), +required=False)) + + + + diff --git a/forum/views/commands.py b/forum/views/commands.py index a55ac56..d0599b3 100644 --- a/forum/views/commands.py +++ b/forum/views/commands.py @@ -41,6 +41,12 @@ class AnonymousNotAllowedException(Exception): """ % {'action': action, 'signin_url': reverse('auth_signin')}) ) +class SpamNotAllowedException(Exception): + def __init__(self, action = "comment"): + super(SpamNotAllowedException, self).__init__( + _("""Your %s has been marked as spam.""" % action) + ) + class NotEnoughLeftException(Exception): def __init__(self, action, limit): super(NotEnoughLeftException, self).__init__( @@ -244,6 +250,17 @@ def comment(request, id): comment.create_revision(user, body=comment_text) + data = { + "user_ip":request.META["REMOTE_ADDR"], + "user_agent":request.environ['HTTP_USER_AGENT'], + "comment_author":request.user.real_name, + "comment_author_email":request.user.email, + "comment_author_url":request.user.website, + "comment":comment_text + } + if Node.isSpam(comment_text, data): + raise SpamNotAllowedException() + if comment.active_revision.revision == 1: return { 'commands': { diff --git a/forum/views/writers.py b/forum/views/writers.py index e2143a8..fad53c1 100644 --- a/forum/views/writers.py +++ b/forum/views/writers.py @@ -18,6 +18,7 @@ from forum.models import * from forum.const import * from forum.utils.forms import get_next_url from forum.views.readers import _get_tags_cache_json +from forum.views.commands import SpamNotAllowedException # used in index page INDEX_PAGE_SIZE = 20 @@ -104,6 +105,17 @@ def ask(request): form = AskForm(request.POST) if form.is_valid(): if request.user.is_authenticated(): + data = { + "user_ip":request.META["REMOTE_ADDR"], + "user_agent":request.environ['HTTP_USER_AGENT'], + "comment_author":request.user.real_name, + "comment_author_email":request.user.email, + "comment_author_url":request.user.website, + "comment":request.POST['text'] + } + if Node.isSpam(request.POST['text'], data): + raise SpamNotAllowedException("question") + return _create_post(request, Question, form) else: return HttpResponseRedirect(reverse('auth_action_signin', kwargs={'action': 'newquestion'})) @@ -245,6 +257,17 @@ def answer(request, id): form = AnswerForm(question, request.POST) if form.is_valid(): if request.user.is_authenticated(): + data = { + "user_ip":request.META["REMOTE_ADDR"], + "user_agent":request.environ['HTTP_USER_AGENT'], + "comment_author":request.user.real_name, + "comment_author_email":request.user.email, + "comment_author_url":request.user.website, + "comment":request.POST['text'] + } + if Node.isSpam(request.POST['text'], data): + raise SpamNotAllowedException("answer") + return _create_post(request, Answer, form, question) else: return HttpResponseRedirect(reverse('auth_action_signin', kwargs={'action': 'newquestion'})) -- 2.39.5