X-Git-Url: https://git.openstreetmap.org./osqa.git/blobdiff_plain/5960a749b88d64b2d0587e090d3f47be554d4f9d..11014041527e050c889dfb8899e0fb87945b5513:/forum/utils/html.py diff --git a/forum/utils/html.py b/forum/utils/html.py index e7ca42c..441f1f2 100644 --- a/forum/utils/html.py +++ b/forum/utils/html.py @@ -1,8 +1,9 @@ """Utilities for working with HTML.""" -import html5lib -from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers +#import html5lib +from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers, HTMLParser +from django.utils.html import strip_tags from forum.utils.html2text import HTML2Text -from django.template import mark_safe +from django.utils.safestring import mark_safe from forum import settings class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin): @@ -38,7 +39,7 @@ class HTMLSanitizer(tokenizer.HTMLTokenizer, HTMLSanitizerMixin): def sanitize_html(html): """Sanitizes an HTML fragment.""" - p = html5lib.HTMLParser(tokenizer=HTMLSanitizer, + p = HTMLParser(tokenizer=HTMLSanitizer, tree=treebuilders.getTreeBuilder("dom")) dom_tree = p.parseFragment(html) walker = treewalkers.getTreeWalker("dom") @@ -48,6 +49,9 @@ def sanitize_html(html): output_generator = s.serialize(stream) return u''.join(output_generator) +def cleanup_urls(url): + return strip_tags(url) + def html2text(s, ignore_tags=(), indent_width=4, page_width=80): ignore_tags = [t.lower() for t in ignore_tags] @@ -58,7 +62,7 @@ def html2text(s, ignore_tags=(), indent_width=4, page_width=80): return mark_safe(parser.result) def buildtag(name, content, **attrs): - return mark_safe('<%s %s>%s' % (name, " ".join('%s="%s"' % i for i in attrs.items()), content)) + return mark_safe('<%s %s>%s' % (name, " ".join('%s="%s"' % i for i in attrs.items()), unicode(content), name)) def hyperlink(url, title, **attrs): return mark_safe('%s' % (url, " ".join('%s="%s"' % i for i in attrs.items()), title))