X-Git-Url: https://git.openstreetmap.org./osqa.git/blobdiff_plain/1a949f7c97dc2f34c135f5cdf088df2927d3d652..31f67094578292b834139406a9aaea3a15123218:/forum/utils/html2text.py diff --git a/forum/utils/html2text.py b/forum/utils/html2text.py index 3b51771..c666610 100644 --- a/forum/utils/html2text.py +++ b/forum/utils/html2text.py @@ -1,140 +1,140 @@ -# Copyright (c) 2001 Chris Withers -# -# This Software is released under the MIT License: -# http://www.opensource.org/licenses/mit-license.html -# See license.txt for more details. -# -# $Id: html2text.py,v 1.7 2002/12/17 16:56:17 fresh Exp $ - -import sgmllib -from string import lower, replace, split, join - -class HTML2Text(sgmllib.SGMLParser): - - from htmlentitydefs import entitydefs # replace entitydefs from sgmllib - - def __init__(self, ignore_tags=(), indent_width=4, page_width=80): - sgmllib.SGMLParser.__init__(self) - self.result = "" - self.indent = 0 - self.ol_number = 0 - self.page_width=page_width - self.inde_width=indent_width - self.lines=[] - self.line=[] - self.ignore_tags = ignore_tags - - def add_text(self,text): - # convert text into words - words = split(replace(text,'\n',' ')) - self.line.extend(words) - - def add_break(self): - self.lines.append((self.indent,self.line)) - self.line=[] - - def generate(self): - # join lines with indents - indent_width = self.inde_width - page_width = self.page_width - out_paras=[] - for indent,line in self.lines+[(self.indent,self.line)]: - - i=indent*indent_width - indent_string = i*' ' - line_width = page_width-i - - out_para='' - out_line=[] - len_out_line=0 - for word in line: - len_word = len(word) - if len_out_line+len_word