]> git.openstreetmap.org Git - osqa.git/blob - forum/skins/default/media/js/wmd/showdown.js
Fixed WMD to correctly show < and > in preview for <code> text
[osqa.git] / forum / skins / default / media / js / wmd / showdown.js
1 //
2 // showdown.js -- A javascript port of Markdown.
3 //
4 // Copyright (c) 2007 John Fraser.
5 //
6 // Original Markdown Copyright (c) 2004-2005 John Gruber
7 //   <http://daringfireball.net/projects/markdown/>
8 //
9 // The full source distribution is at:
10 //
11 //                              A A L
12 //                              T C A
13 //                              T K B
14 //
15 //   <http://www.attacklab.net/>
16 //
17
18 //
19 // Wherever possible, Showdown is a straight, line-by-line port
20 // of the Perl version of Markdown.
21 //
22 // This is not a normal parser design; it's basically just a
23 // series of string substitutions.  It's hard to read and
24 // maintain this way,  but keeping Showdown close to the original
25 // design makes it easier to port new features.
26 //
27 // More importantly, Showdown behaves like markdown.pl in most
28 // edge cases.  So web applications can do client-side preview
29 // in Javascript, and then build identical HTML on the server.
30 //
31 // This port needs the new RegExp functionality of ECMA 262,
32 // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
33 // should do fine.  Even with the new regular expression features,
34 // We do a lot of work to emulate Perl's regex functionality.
35 // The tricky changes in this file mostly have the "attacklab:"
36 // label.  Major or self-explanatory changes don't.
37 //
38 // Smart diff tools like Araxis Merge will be able to match up
39 // this file with markdown.pl in a useful way.  A little tweaking
40 // helps: in a copy of markdown.pl, replace "#" with "//" and
41 // replace "$text" with "text".  Be sure to ignore whitespace
42 // and line endings.
43 //
44
45
46 //
47 // Showdown usage:
48 //
49 //   var text = "Markdown *rocks*.";
50 //
51 //   var converter = new Attacklab.showdown.converter();
52 //   var html = converter.makeHtml(text);
53 //
54 //   alert(html);
55 //
56 // Note: move the sample code to the bottom of this
57 // file before uncommenting it.
58 //
59
60
61 //
62 // Attacklab namespace
63 //
64 var Attacklab = Attacklab || {}
65
66 //
67 // Showdown namespace
68 //
69 Attacklab.showdown = Attacklab.showdown || {}
70
71 //
72 // converter
73 //
74 // Wraps all "globals" so that the only thing
75 // exposed is makeHtml().
76 //
77 Attacklab.showdown.converter = function() {
78
79
80 // g_urls and g_titles allow arbitrary user-entered strings as keys. This
81 // caused an exception (and hence stopped the rendering) when the user entered
82 // e.g. [push] or [__proto__]. Adding a prefix to the actual key prevents this
83 // (since no builtin property starts with "s_"). See
84 // http://meta.stackoverflow.com/questions/64655/strange-wmd-bug
85 // (granted, switching from Array() to Object() alone would have left only __proto__
86 // to be a problem)
87 var SaveHash = function () {
88     this.set = function (key, value) {
89         this["s_" + key] = value;
90     }
91     this.get = function (key) {
92         return this["s_" + key];
93     }
94 }
95
96 //
97 // Globals:
98 //
99
100 // Global hashes, used by various utility routines
101 var g_urls;
102 var g_titles;
103 var g_html_blocks;
104
105 // Used to track when we're inside an ordered or unordered list
106 // (see _ProcessListItems() for details):
107 var g_list_level = 0;
108
109
110 this.makeHtml = function(text) {
111 //
112 // Main function. The order in which other subs are called here is
113 // essential. Link and image substitutions need to happen before
114 // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
115 // and <img> tags get encoded.
116 //
117     text = html_sanitize(text, function(url) {return url;}, function(id) {return id;});
118
119         // Clear the global hashes. If we don't clear these, you get conflicts
120         // from other articles when generating a page which contains more than
121         // one article (e.g. an index page that shows the N most recent
122         // articles):
123     g_urls = new SaveHash();
124     g_titles = new SaveHash();
125         g_html_blocks = new Array();
126
127         // attacklab: Replace ~ with ~T
128         // This lets us use tilde as an escape char to avoid md5 hashes
129         // The choice of character is arbitray; anything that isn't
130     // magic in Markdown will work.
131         text = text.replace(/~/g,"~T");
132
133         // attacklab: Replace $ with ~D
134         // RegExp interprets $ as a special character
135         // when it's in a replacement string
136         text = text.replace(/\$/g,"~D");
137
138         // Standardize line endings
139         text = text.replace(/\r\n/g,"\n"); // DOS to Unix
140         text = text.replace(/\r/g,"\n"); // Mac to Unix
141
142         // Make sure text begins and ends with a couple of newlines:
143         text = "\n\n" + text + "\n\n";
144
145         // Convert all tabs to spaces.
146         text = _Detab(text);
147
148         // Strip any lines consisting only of spaces and tabs.
149         // This makes subsequent regexen easier to write, because we can
150         // match consecutive blank lines with /\n+/ instead of something
151         // contorted like /[ \t]*\n+/ .
152         text = text.replace(/^[ \t]+$/mg,"");
153
154         // Turn block-level HTML blocks into hash entries
155         text = _HashHTMLBlocks(text);
156
157         // Strip link definitions, store in hashes.
158         text = _StripLinkDefinitions(text);
159
160         text = _RunBlockGamut(text);
161
162         text = _UnescapeSpecialChars(text);
163
164         // attacklab: Restore dollar signs
165         text = text.replace(/~D/g,"$$");
166
167         // attacklab: Restore tildes
168         text = text.replace(/~T/g,"~");
169
170         text = text.replace(/&amp;lt;/g,"<");
171         text = text.replace(/&amp;gt;/g,">");
172
173         return text;
174 }
175
176 var _StripLinkDefinitions = function(text) {
177 //
178 // Strips link definitions from text, stores the URLs and titles in
179 // hash references.
180 //
181
182         // Link defs are in the form: ^[id]: url "optional title"
183
184         /*
185                 var text = text.replace(/
186                                 ^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
187                                   [ \t]*
188                                   \n?                           // maybe *one* newline
189                                   [ \t]*
190                                 <?(\S+?)>?                      // url = $2
191                 (?=\s|$)            // lookahead for whitespace instead of the lookbehind removed below
192                                   [ \t]*
193                                   \n?                           // maybe one newline
194                                   [ \t]*
195                                 (                   // (potential) title = $3
196                                   (\n*)                         // any lines skipped = $4 attacklab: lookbehind removed
197                   [ \t]+
198                                   ["(]
199                                   (.+?)                         // title = $5
200                                   [")]
201                                   [ \t]*
202                                 )?                                      // title is optional
203                                 (?:\n+|$)
204                           /gm,
205                           function(){...});
206         */
207         var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?(?=\s|$)[ \t]*\n?[ \t]*((\n*)["(](.+?)[")][ \t]*)?(?:\n+)/gm,
208                 function (wholeMatch,m1,m2,m3,m4,m5) {
209                         m1 = m1.toLowerCase();
210                         g_urls.set(m1, _EncodeAmpsAndAngles(m2));  // Link IDs are case-insensitive
211                         if (m4) {
212                                 // Oops, found blank lines, so it's not a title.
213                                 // Put back the parenthetical statement we stole.
214                                 return m3;
215                         } else if (m5) {
216                                 g_titles.set(m1, m5.replace(/"/g,"&quot;"));
217                         }
218                         
219                         // Completely remove the definition from the text
220                         return "";
221                 }
222         );
223
224         return text;
225 }
226
227 var _HashHTMLBlocks = function(text) {
228
229         // Hashify HTML blocks:
230         // We only want to do this for block-level HTML tags, such as headers,
231         // lists, and tables. That's because we still want to wrap <p>s around
232         // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
233         // phrase emphasis, and spans. The list of tags we're looking for is
234         // hard-coded:
235         var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
236         var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
237
238         // First, look for nested blocks, e.g.:
239         //   <div>
240         //     <div>
241         //     tags for inner block must be indented.
242         //     </div>
243         //   </div>
244         //
245         // The outermost tags must start at the left margin for this to match, and
246         // the inner nested divs must be indented.
247         // We need to do this before the next, more liberal match, because the next
248         // match will start at the first `<div>` and stop at the first `</div>`.
249
250         // attacklab: This regex can be expensive when it fails.
251         /*
252                 var text = text.replace(/
253                 (                                               // save in $1
254                         ^                                       // start of line  (with /m)
255                         <($block_tags_a)        // start tag = $2
256                         \b                                      // word break
257                                                                 // attacklab: hack around khtml/pcre bug...
258                         [^\r]*?\n                       // any number of lines, minimally matching
259                         </\2>                           // the matching end tag
260                         [ \t]*                          // trailing spaces/tabs
261                         (?=\n+)                         // followed by a newline
262                 )                                               // attacklab: there are sentinel newlines at end of document
263                 /gm,function(){...}};
264         */
265         text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
266
267         //
268         // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
269         //
270
271         /*
272                 var text = text.replace(/
273                 (                                               // save in $1
274                         ^                                       // start of line  (with /m)
275                         <($block_tags_b)        // start tag = $2
276                         \b                                      // word break
277                                                                 // attacklab: hack around khtml/pcre bug...
278                         [^\r]*?                         // any number of lines, minimally matching
279                         .*</\2>                         // the matching end tag
280                         [ \t]*                          // trailing spaces/tabs
281                         (?=\n+)                         // followed by a newline
282                 )                                               // attacklab: there are sentinel newlines at end of document
283                 /gm,function(){...}};
284         */
285         text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
286
287         // Special case just for <hr />. It was easier to make a special case than
288         // to make the other regex more complicated.  
289
290         /*
291                 text = text.replace(/
292                 \n                                  // Starting after a blank line
293                 [ ]{0,3}
294                 (                                               // save in $1
295                         (<(hr)                          // start tag = $2
296                         \b                                      // word break
297                         ([^<>])*?                       // 
298                         \/?>)                           // the matching end tag
299                         [ \t]*
300                         (?=\n{2,})                      // followed by a blank line
301                 )
302                 /g,hashElement);
303         */
304         text = text.replace(/\n[ ]{0,3}((<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
305
306         // Special case for standalone HTML comments:
307
308         /*
309                 text = text.replace(/
310                 \n\n                            // Starting after a blank line
311                 [ ]{0,3}                        // attacklab: g_tab_width - 1
312                 (                                               // save in $1
313                         <!
314                         (--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)    // see http://www.w3.org/TR/html-markup/syntax.html#comments
315                         >
316                         [ \t]*
317                         (?=\n{2,})                      // followed by a blank line
318                 )
319                 /g,hashElement);
320         */
321         text = text.replace(/\n\n[ ]{0,3}(<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>[ \t]*(?=\n{2,}))/g, hashElement);
322
323         // PHP and ASP-style processor instructions (<?...?> and <%...%>)
324
325         /*
326                 text = text.replace(/
327                 (?:
328                         \n\n                            // Starting after a blank line
329                 )
330                 (                                               // save in $1
331                         [ ]{0,3}                        // attacklab: g_tab_width - 1
332                         (?:
333                                 <([?%])                 // $2
334                                 [^\r]*?
335                                 \2>
336                         )
337                         [ \t]*
338                         (?=\n{2,})                      // followed by a blank line
339                 )
340                 /g,hashElement);
341         */
342         text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
343
344         return text;
345 }
346
347 var hashElement = function(wholeMatch,m1) {
348         var blockText = m1;
349
350         // Undo double lines
351         blockText = blockText.replace(/^\n+/,"");
352         
353         // strip trailing blank lines
354         blockText = blockText.replace(/\n+$/g,"");
355         
356         // Replace the element text with a marker ("~KxK" where x is its key)
357         blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
358         
359         return blockText;
360 };
361
362 var _RunBlockGamut = function(text, doNotUnhash) {
363 //
364 // These are all the transformations that form block-level
365 // tags like paragraphs, headers, and list items.
366 //
367         text = _DoHeaders(text);
368
369         // Do Horizontal Rules:
370         var key = hashBlock("<hr />");
371         text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
372         text = text.replace(/^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$/gm,key);
373         text = text.replace(/^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$/gm,key);
374
375         text = _DoLists(text);
376         text = _DoCodeBlocks(text);
377         text = _DoBlockQuotes(text);
378
379         // We already ran _HashHTMLBlocks() before, in Markdown(), but that
380         // was to escape raw HTML in the original Markdown source. This time,
381         // we're escaping the markup we've just created, so that we don't wrap
382         // <p> tags around block-level tags.
383         text = _HashHTMLBlocks(text);
384     text = _FormParagraphs(text, doNotUnhash);
385
386         return text;
387 }
388
389
390 var _RunSpanGamut = function(text) {
391 //
392 // These are all the transformations that occur *within* block-level
393 // tags like paragraphs, headers, and list items.
394 //
395
396         text = _DoCodeSpans(text);
397         text = _EscapeSpecialCharsWithinTagAttributes(text);
398         text = _EncodeBackslashEscapes(text);
399
400         // Process anchor and image tags. Images must come first,
401         // because ![foo][f] looks like an anchor.
402         text = _DoImages(text);
403         text = _DoAnchors(text);
404
405         // Make links out of things like `<http://example.com/>`
406         // Must come after _DoAnchors(), because you can use < and >
407         // delimiters in inline links like [this](<url>).
408         text = _DoAutoLinks(text);
409         text = _EncodeAmpsAndAngles(text);
410         text = _DoItalicsAndBold(text);
411
412         // Do hard breaks:
413         text = text.replace(/  +\n/g," <br />\n");
414
415         return text;
416 }
417
418 var _EscapeSpecialCharsWithinTagAttributes = function(text) {
419 //
420 // Within tags -- meaning between < and > -- encode [\ ` * _] so they
421 // don't conflict with their use in Markdown for code, italics and strong.
422 //
423
424         // Build a regex to find HTML tags and comments.  See Friedl's 
425     // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
426     
427     // SE: changed the comment part of the regex
428
429     var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi;
430
431         text = text.replace(regex, function(wholeMatch) {
432                 var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
433                 tag = escapeCharacters(tag,"\\`*_");
434                 return tag;
435         });
436
437         return text;
438 }
439
440 var _DoAnchors = function(text) {
441 //
442 // Turn Markdown link shortcuts into XHTML <a> tags.
443 //
444         //
445         // First, handle reference-style links: [link text] [id]
446         //
447
448         /*
449                 text = text.replace(/
450                 (                                                       // wrap whole match in $1
451                         \[
452                         (
453                                 (?:
454                                         \[[^\]]*\]              // allow brackets nested one level
455                                         |
456                                         [^\[]                   // or anything else
457                                 )*
458                         )
459                         \]
460
461                         [ ]?                                    // one optional space
462                         (?:\n[ ]*)?                             // one optional newline followed by spaces
463
464                         \[
465                         (.*?)                                   // id = $3
466                         \]
467                 )()()()()                                       // pad remaining backreferences
468                 /g,_DoAnchors_callback);
469         */
470         text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
471
472         //
473         // Next, inline-style links: [link text](url "optional title")
474         //
475
476         /*
477                 text = text.replace(/
478                 (                                               // wrap whole match in $1
479                         \[
480                                 (
481                                         (?:
482                                                 \[[^\]]*\]      // allow brackets nested one level
483                                             |
484                                             [^\[\]]             // or anything else
485                                     )*
486                             )
487                         \]
488                         \(                                              // literal paren
489                         [ \t]*
490                         ()                                              // no id, so leave $3 empty
491                         <?(                     // href = $4
492                 (?:
493                     \([^)]*\)       // allow one level of (correctly nested) parens (think MSDN)
494                     |
495                     [^()]
496                 )*?
497             )>?                         
498                         [ \t]*
499                         (                                               // $5
500                                 (['"])                          // quote char = $6
501                                 (.*?)                           // Title = $7
502                                 \6                                      // matching quote
503                                 [ \t]*                          // ignore any spaces/tabs between closing quote and )
504                         )?                                              // title is optional
505                         \)
506                 )
507                 /g,writeAnchorTag);
508         */
509     
510         text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?((?:\([^)]*\)|[^()])*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
511
512         //
513         // Last, handle reference-style shortcuts: [link text]
514         // These must come last in case you've also got [link test][1]
515         // or [link test](/foo)
516         //
517
518         /*
519                 text = text.replace(/
520                 (                                                       // wrap whole match in $1
521                         \[
522                         ([^\[\]]+)                              // link text = $2; can't contain '[' or ']'
523                         \]
524                 )()()()()()                                     // pad rest of backreferences
525                 /g, writeAnchorTag);
526         */
527         text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
528
529         return text;
530 }
531
532 var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
533         if (m7 == undefined) m7 = "";
534         var whole_match = m1;
535         var link_text   = m2;
536         var link_id      = m3.toLowerCase();
537         var url         = m4;
538         var title       = m7;
539         
540         if (url == "") {
541                 if (link_id == "") {
542                         // lower-case and turn embedded newlines into spaces
543                         link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
544                 }
545                 url = "#"+link_id;
546                 
547                 if (g_urls.get(link_id) != undefined) {
548                         url = g_urls.get(link_id);
549                         if (g_titles.get(link_id) != undefined) {
550                                 title = g_titles.get(link_id);
551                         }
552                 }
553                 else {
554                         if (whole_match.search(/\(\s*\)$/m)>-1) {
555                                 // Special case for explicit empty url
556                                 url = "";
557                         } else {
558                                 return whole_match;
559                         }
560                 }
561         }       
562         
563         url = escapeCharacters(url,"*_");
564         var result = "<a href=\"" + url + "\"";
565         
566         if (title != "") {
567                 title = title.replace(/"/g,"&quot;");
568                 title = escapeCharacters(title,"*_");
569                 result +=  " title=\"" + title + "\"";
570         }
571         
572         result += ">" + link_text + "</a>";
573         
574         return result;
575 }
576
577
578 var _DoImages = function(text) {
579 //
580 // Turn Markdown image shortcuts into <img> tags.
581 //
582
583         //
584         // First, handle reference-style labeled images: ![alt text][id]
585         //
586
587         /*
588                 text = text.replace(/
589                 (                                               // wrap whole match in $1
590                         !\[
591                         (.*?)                           // alt text = $2
592                         \]
593
594                         [ ]?                            // one optional space
595                         (?:\n[ ]*)?                     // one optional newline followed by spaces
596
597                         \[
598                         (.*?)                           // id = $3
599                         \]
600                 )()()()()                               // pad rest of backreferences
601                 /g,writeImageTag);
602         */
603         text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
604
605         //
606         // Next, handle inline images:  ![alt text](url "optional title")
607         // Don't forget: encode * and _
608
609         /*
610                 text = text.replace(/
611                 (                                               // wrap whole match in $1
612                         !\[
613                         (.*?)                           // alt text = $2
614                         \]
615                         \s?                                     // One optional whitespace character
616                         \(                                      // literal paren
617                         [ \t]*
618                         ()                                      // no id, so leave $3 empty
619                         <?(\S+?)>?                      // src url = $4
620                         [ \t]*
621                         (                                       // $5
622                                 (['"])                  // quote char = $6
623                                 (.*?)                   // title = $7
624                                 \6                              // matching quote
625                                 [ \t]*
626                         )?                                      // title is optional
627                 \)
628                 )
629                 /g,writeImageTag);
630         */
631         text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
632
633         return text;
634 }
635
636 var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
637         var whole_match = m1;
638         var alt_text   = m2;
639         var link_id      = m3.toLowerCase();
640         var url         = m4;
641         var title       = m7;
642
643         if (!title) title = "";
644         
645         if (url == "") {
646                 if (link_id == "") {
647                         // lower-case and turn embedded newlines into spaces
648                         link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
649                 }
650                 url = "#"+link_id;
651                 
652                 if (g_urls.get(link_id) != undefined) {
653                         url = g_urls.get(link_id);
654                         if (g_titles.get(link_id) != undefined) {
655                                 title = g_titles.get(link_id);
656                         }
657                 }
658                 else {
659                         return whole_match;
660                 }
661         }       
662         
663         alt_text = alt_text.replace(/"/g,"&quot;");
664         url = escapeCharacters(url,"*_");
665         var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
666
667         // attacklab: Markdown.pl adds empty title attributes to images.
668         // Replicate this bug.
669
670         //if (title != "") {
671                 title = title.replace(/"/g,"&quot;");
672                 title = escapeCharacters(title,"*_");
673                 result +=  " title=\"" + title + "\"";
674         //}
675         
676         result += " />";
677         
678         return result;
679 }
680
681
682 var _DoHeaders = function(text) {
683
684         // Setext-style headers:
685         //      Header 1
686         //      ========
687         //  
688         //      Header 2
689         //      --------
690         //
691         text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
692                 function(wholeMatch,m1){return "<h1>" + _RunSpanGamut(m1) + "</h1>\n\n";});
693
694         text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
695                 function(matchFound,m1){return "<h2>" + _RunSpanGamut(m1) + "</h2>\n\n";});
696
697         // atx-style headers:
698         //  # Header 1
699         //  ## Header 2
700         //  ## Header 2 with closing hashes ##
701         //  ...
702         //  ###### Header 6
703         //
704
705         /*
706                 text = text.replace(/
707                         ^(\#{1,6})                              // $1 = string of #'s
708                         [ \t]*
709                         (.+?)                                   // $2 = Header text
710                         [ \t]*
711                         \#*                                             // optional closing #'s (not counted)
712                         \n+
713                 /gm, function() {...});
714         */
715
716         text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
717                 function(wholeMatch,m1,m2) {
718                         var h_level = m1.length;
719                         return "<h" + h_level + ">" + _RunSpanGamut(m2) + "</h" + h_level + ">\n\n";
720                 });
721
722         return text;
723 }
724
725 // This declaration keeps Dojo compressor from outputting garbage:
726 var _ProcessListItems;
727
728 var _DoLists = function(text) {
729 //
730 // Form HTML ordered (numbered) and unordered (bulleted) lists.
731 //
732
733         // attacklab: add sentinel to hack around khtml/safari bug:
734         // http://bugs.webkit.org/show_bug.cgi?id=11231
735         text += "~0";
736
737         // Re-usable pattern to match any entirel ul or ol list:
738
739         /*
740                 var whole_list = /
741                 (                                                                       // $1 = whole list
742                         (                                                               // $2
743                                 [ ]{0,3}                                        // attacklab: g_tab_width - 1
744                                 ([*+-]|\d+[.])                          // $3 = first list item marker
745                                 [ \t]+
746                         )
747                         [^\r]+?
748                         (                                                               // $4
749                                 ~0                                                      // sentinel for workaround; should be $
750                         |
751                                 \n{2,}
752                                 (?=\S)
753                                 (?!                                                     // Negative lookahead for another list item marker
754                                         [ \t]*
755                                         (?:[*+-]|\d+[.])[ \t]+
756                                 )
757                         )
758                 )/g
759         */
760         var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
761
762         if (g_list_level) {
763                 text = text.replace(whole_list,function(wholeMatch,m1,m2) {
764                         var list = m1;
765                         var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
766
767                         var result = _ProcessListItems(list, list_type);
768         
769                         // Trim any trailing whitespace, to put the closing `</$list_type>`
770                         // up on the preceding line, to get it past the current stupid
771                         // HTML block parser. This is a hack to work around the terrible
772                         // hack that is the HTML block parser.
773                         result = result.replace(/\s+$/,"");
774                         result = "<"+list_type+">" + result + "</"+list_type+">\n";
775                         return result;
776                 });
777         } else {
778                 whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
779                 text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
780                         var runup = m1;
781                         var list = m2;
782
783                         var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
784                         var result = _ProcessListItems(list, list_type);
785                         result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";   
786                         return result;
787                 });
788         }
789
790         // attacklab: strip sentinel
791         text = text.replace(/~0/,"");
792
793         return text;
794 }
795
796 var _listItemMarkers = { ol: "\\d+[.]", ul: "[*+-]" };
797
798 _ProcessListItems = function(list_str, list_type) {
799 //
800 //  Process the contents of a single ordered or unordered list, splitting it
801 //  into individual list items.
802 //
803 //  list_type is either "ul" or "ol".
804
805         // The $g_list_level global keeps track of when we're inside a list.
806         // Each time we enter a list, we increment it; when we leave a list,
807         // we decrement. If it's zero, we're not in a list anymore.
808         //
809         // We do this because when we're not inside a list, we want to treat
810         // something like this:
811         //
812         //    I recommend upgrading to version
813         //    8. Oops, now this line is treated
814         //    as a sub-list.
815         //
816         // As a single paragraph, despite the fact that the second line starts
817         // with a digit-period-space sequence.
818         //
819         // Whereas when we're inside a list (or sub-list), that line will be
820         // treated as the start of a sub-list. What a kludge, huh? This is
821         // an aspect of Markdown's syntax that's hard to parse perfectly
822         // without resorting to mind-reading. Perhaps the solution is to
823         // change the syntax rules such that sub-lists must start with a
824         // starting cardinal number; e.g. "1." or "a.".
825
826         g_list_level++;
827
828         // trim trailing blank lines:
829         list_str = list_str.replace(/\n{2,}$/,"\n");
830
831         // attacklab: add sentinel to emulate \z
832         list_str += "~0";
833
834         // In the original attacklab WMD, list_type was not given to this function, and anything
835         // that matched /[*+-]|\d+[.]/ would just create the next <li>, causing this mismatch:
836         //
837     //  Markdown          rendered by WMD        rendered by MarkdownSharp
838         //  ------------------------------------------------------------------
839         //  1. first          1. first               1. first
840         //  2. second         2. second              2. second
841         //  - third           3. third                   * third
842         //
843         // We changed this to behave identical to MarkdownSharp. This is the constructed RegEx,
844     // with {MARKER} being one of \d+[.] or [*+-], depending on list_type:
845         /*
846                 list_str = list_str.replace(/
847                         (^[ \t]*)                                               // leading whitespace = $1
848                         ({MARKER}) [ \t]+                       // list marker = $2
849                         ([^\r]+?                                                // list item text   = $3
850                         (\n+))
851                         (?= (~0 | \2 ({MARKER}) [ \t]+))
852                 /gm, function(){...});
853         */
854     
855     var marker = _listItemMarkers[list_type];
856     var re = new RegExp("(^[ \\t]*)(" + marker + ")[ \\t]+([^\\r]+?(\\n+))(?=(~0|\\1(" + marker + ")[ \\t]+))", "gm");
857     var last_item_had_a_double_newline = false;
858         list_str = list_str.replace(re,
859                 function(wholeMatch,m1,m2,m3){
860                         var item = m3;
861                         var leading_space = m1;
862             var ends_with_double_newline = /\n\n$/.test(item);
863                         var contains_double_newline = ends_with_double_newline || item.search(/\n{2,}/)>-1;
864
865                         if (contains_double_newline || last_item_had_a_double_newline) {
866                                 item =  _RunBlockGamut(_Outdent(item), /* doNotUnhash = */ true);
867                         }
868                         else {
869                                 // Recursion for sub-lists:
870                                 item = _DoLists(_Outdent(item));
871                                 item = item.replace(/\n$/,""); // chomp(item)
872                                 item = _RunSpanGamut(item);
873             }
874             last_item_had_a_double_newline = ends_with_double_newline;
875                         return  "<li>" + item + "</li>\n";
876                 }
877         );
878
879         // attacklab: strip sentinel
880         list_str = list_str.replace(/~0/g,"");
881
882         g_list_level--;
883         return list_str;
884 }
885
886
887 var _DoCodeBlocks = function(text) {
888 //
889 //  Process Markdown `<pre><code>` blocks.
890 //  
891
892         /*
893                 text = text.replace(text,
894                         /(?:\n\n|^)
895                         (                                                               // $1 = the code block -- one or more lines, starting with a space/tab
896                                 (?:
897                                         (?:[ ]{4}|\t)                   // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
898                                         .*\n+
899                                 )+
900                         )
901                         (\n*[ ]{0,3}[^ \t\n]|(?=~0))    // attacklab: g_tab_width
902                 /g,function(){...});
903         */
904
905         // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
906         text += "~0";
907         
908         text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
909                 function(wholeMatch,m1,m2) {
910                         var codeblock = m1;
911                         var nextChar = m2;
912                 
913                         codeblock = _EncodeCode( _Outdent(codeblock));
914                         codeblock = _Detab(codeblock);
915                         codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
916                         codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
917
918                         codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
919
920                         return "\n\n" + codeblock + "\n\n" + nextChar;
921                 }
922         );
923
924         // attacklab: strip sentinel
925         text = text.replace(/~0/,"");
926
927         return text;
928 }
929
930 var hashBlock = function(text) {
931         text = text.replace(/(^\n+|\n+$)/g,"");
932         return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
933 }
934
935
936 var _DoCodeSpans = function(text) {
937 //
938 //   *  Backtick quotes are used for <code></code> spans.
939 // 
940 //   *  You can use multiple backticks as the delimiters if you want to
941 //       include literal backticks in the code span. So, this input:
942 //       
943 //               Just type ``foo `bar` baz`` at the prompt.
944 //       
945 //         Will translate to:
946 //       
947 //               <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
948 //       
949 //      There's no arbitrary limit to the number of backticks you
950 //      can use as delimters. If you need three consecutive backticks
951 //      in your code, use four for delimiters, etc.
952 //
953 //  *  You can use spaces to get literal backticks at the edges:
954 //       
955 //               ... type `` `bar` `` ...
956 //       
957 //         Turns to:
958 //       
959 //               ... type <code>`bar`</code> ...
960 //
961
962         /*
963                 text = text.replace(/
964                         (^|[^\\])                                       // Character before opening ` can't be a backslash
965                         (`+)                                            // $2 = Opening run of `
966                         (                                                       // $3 = The code block
967                                 [^\r]*?
968                                 [^`]                                    // attacklab: work around lack of lookbehind
969                         )
970                         \2                                                      // Matching closer
971                         (?!`)
972                 /gm, function(){...});
973         */
974
975         text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
976                 function(wholeMatch,m1,m2,m3,m4) {
977                         var c = m3;
978                         c = c.replace(/^([ \t]*)/g,""); // leading whitespace
979                         c = c.replace(/[ \t]*$/g,"");   // trailing whitespace
980                         c = _EncodeCode(c);
981                         return m1+"<code>"+c+"</code>";
982                 });
983
984         return text;
985 }
986
987
988 var _EncodeCode = function(text) {
989 //
990 // Encode/escape certain characters inside Markdown code runs.
991 // The point is that in code, these characters are literals,
992 // and lose their special Markdown meanings.
993 //
994         // Encode all ampersands; HTML entities are not
995         // entities within a Markdown code span.
996         text = text.replace(/&/g,"&amp;");
997
998         // Do the angle bracket song and dance:
999         text = text.replace(/</g,"&lt;");
1000         text = text.replace(/>/g,"&gt;");
1001
1002         // Now, escape characters that are magic in Markdown:
1003         text = escapeCharacters(text,"\*_{}[]\\",false);
1004
1005 // jj the line above breaks this:
1006 //---
1007
1008 //* Item
1009
1010 //   1. Subitem
1011
1012 //            special char: *
1013 //---
1014
1015         return text;
1016 }
1017
1018
1019 var _DoItalicsAndBold = function(text) {
1020
1021         // <strong> must go first:
1022         text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\1/g,
1023                 "<strong>$2</strong>");
1024
1025         text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
1026                 "<em>$2</em>");
1027
1028         return text;
1029 }
1030
1031
1032 var _DoBlockQuotes = function(text) {
1033
1034         /*
1035                 text = text.replace(/
1036                 (                                                               // Wrap whole match in $1
1037                         (
1038                                 ^[ \t]*>[ \t]?                  // '>' at the start of a line
1039                                 .+\n                                    // rest of the first line
1040                                 (.+\n)*                                 // subsequent consecutive lines
1041                                 \n*                                             // blanks
1042                         )+
1043                 )
1044                 /gm, function(){...});
1045         */
1046
1047         text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1048                 function(wholeMatch,m1) {
1049                         var bq = m1;
1050
1051                         // attacklab: hack around Konqueror 3.5.4 bug:
1052                         // "----------bug".replace(/^-/g,"") == "bug"
1053
1054                         bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");       // trim one level of quoting
1055
1056                         // attacklab: clean up hack
1057                         bq = bq.replace(/~0/g,"");
1058
1059                         bq = bq.replace(/^[ \t]+$/gm,"");               // trim whitespace-only lines
1060                         bq = _RunBlockGamut(bq);                                // recurse
1061                         
1062                         bq = bq.replace(/(^|\n)/g,"$1  ");
1063                         // These leading spaces screw with <pre> content, so we need to fix that:
1064                         bq = bq.replace(
1065                                         /(\s*<pre>[^\r]+?<\/pre>)/gm,
1066                                 function(wholeMatch,m1) {
1067                                         var pre = m1;
1068                                         // attacklab: hack around Konqueror 3.5.4 bug:
1069                                         pre = pre.replace(/^  /mg,"~0");
1070                                         pre = pre.replace(/~0/g,"");
1071                                         return pre;
1072                                 });
1073                         
1074                         return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
1075                 });
1076         return text;
1077 }
1078
1079
1080 var _FormParagraphs = function(text, doNotUnhash) {
1081 //
1082 //  Params:
1083 //    $text - string to process with html <p> tags
1084 //
1085
1086         // Strip leading and trailing lines:
1087         text = text.replace(/^\n+/g,"");
1088         text = text.replace(/\n+$/g,"");
1089
1090         var grafs = text.split(/\n{2,}/g);
1091         var grafsOut = new Array();
1092
1093         //
1094         // Wrap <p> tags.
1095         //
1096         var end = grafs.length;
1097         for (var i=0; i<end; i++) {
1098                 var str = grafs[i];
1099
1100                 // if this is an HTML marker, copy it
1101                 if (str.search(/~K(\d+)K/g) >= 0) {
1102                         grafsOut.push(str);
1103                 }
1104                 else if (str.search(/\S/) >= 0) {
1105                         str = _RunSpanGamut(str);
1106                         str = str.replace(/^([ \t]*)/g,"<p>");
1107                         str += "</p>"
1108                         grafsOut.push(str);
1109                 }
1110
1111         }
1112         //
1113         // Unhashify HTML blocks
1114         //
1115     if (!doNotUnhash) {
1116         end = grafsOut.length;
1117             for (var i=0; i<end; i++) {
1118                     // if this is a marker for an html block...
1119                     while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
1120                             var blockText = g_html_blocks[RegExp.$1];
1121                             blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1122                             grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1123                     }
1124             }
1125     }
1126         return grafsOut.join("\n\n");
1127 }
1128
1129
1130 var _EncodeAmpsAndAngles = function(text) {
1131 // Smart processing for ampersands and angle brackets that need to be encoded.
1132         
1133         // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1134         //   http://bumppo.net/projects/amputator/
1135         text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
1136         
1137         // Encode naked <'s
1138         text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
1139         
1140         return text;
1141 }
1142
1143
1144 var _EncodeBackslashEscapes = function(text) {
1145 //
1146 //   Parameter:  String.
1147 //   Returns:   The string, with after processing the following backslash
1148 //                         escape sequences.
1149 //
1150
1151         // attacklab: The polite way to do this is with the new
1152         // escapeCharacters() function:
1153         //
1154         //      text = escapeCharacters(text,"\\",true);
1155         //      text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1156         //
1157         // ...but we're sidestepping its use of the (slow) RegExp constructor
1158         // as an optimization for Firefox.  This function gets called a LOT.
1159
1160         text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1161         text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1162         return text;
1163 }
1164
1165
1166 var _DoAutoLinks = function(text) {
1167
1168         text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
1169
1170         // Email addresses: <address@domain.foo>
1171
1172         /*
1173                 text = text.replace(/
1174                         <
1175                         (?:mailto:)?
1176                         (
1177                                 [-.\w]+
1178                                 \@
1179                                 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1180                         )
1181                         >
1182                 /gi, _DoAutoLinks_callback());
1183         */
1184         text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1185                 function(wholeMatch,m1) {
1186                         return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1187                 }
1188         );
1189
1190         return text;
1191 }
1192
1193
1194 var _EncodeEmailAddress = function(addr) {
1195 //
1196 //  Input: an email address, e.g. "foo@example.com"
1197 //
1198 //  Output: the email address as a mailto link, with each character
1199 //      of the address encoded as either a decimal or hex entity, in
1200 //      the hopes of foiling most address harvesting spam bots. E.g.:
1201 //
1202 //      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1203 //         x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1204 //         &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1205 //
1206 //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1207 //  mailing list: <http://tinyurl.com/yu7ue>
1208 //
1209
1210         // attacklab: why can't javascript speak hex?
1211         function char2hex(ch) {
1212                 var hexDigits = '0123456789ABCDEF';
1213                 var dec = ch.charCodeAt(0);
1214                 return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1215         }
1216
1217         var encode = [
1218                 function(ch){return "&#"+ch.charCodeAt(0)+";";},
1219                 function(ch){return "&#x"+char2hex(ch)+";";},
1220                 function(ch){return ch;}
1221         ];
1222
1223         addr = "mailto:" + addr;
1224
1225         addr = addr.replace(/./g, function(ch) {
1226                 if (ch == "@") {
1227                         // this *must* be encoded. I insist.
1228                         ch = encode[Math.floor(Math.random()*2)](ch);
1229                 } else if (ch !=":") {
1230                         // leave ':' alone (to spot mailto: later)
1231                         var r = Math.random();
1232                         // roughly 10% raw, 45% hex, 45% dec
1233                         ch =  (
1234                                         r > .9  ?       encode[2](ch)   :
1235                                         r > .45 ?       encode[1](ch)   :
1236                                                                 encode[0](ch)
1237                                 );
1238                 }
1239                 return ch;
1240         });
1241
1242         addr = "<a href=\"" + addr + "\">" + addr + "</a>";
1243         addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1244
1245         return addr;
1246 }
1247
1248
1249 var _UnescapeSpecialChars = function(text) {
1250 //
1251 // Swap back in all the special characters we've hidden.
1252 //
1253         text = text.replace(/~E(\d+)E/g,
1254                 function(wholeMatch,m1) {
1255                         var charCodeToReplace = parseInt(m1);
1256                         return String.fromCharCode(charCodeToReplace);
1257                 }
1258         );
1259         return text;
1260 }
1261
1262
1263 var _Outdent = function(text) {
1264 //
1265 // Remove one level of line-leading tabs or spaces
1266 //
1267
1268         // attacklab: hack around Konqueror 3.5.4 bug:
1269         // "----------bug".replace(/^-/g,"") == "bug"
1270
1271         text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1272
1273         // attacklab: clean up hack
1274         text = text.replace(/~0/g,"")
1275
1276         return text;
1277 }
1278
1279 var _Detab = function (text) {
1280         if (!/\t/.test(text))
1281                 return text;
1282
1283         var spaces = ["    ", "   ", "  ", " "],
1284                 skew = 0,
1285                 v;
1286
1287         return text.replace(/[\n\t]/g, function (match, offset) {
1288                 if (match === "\n") {
1289                         skew = offset + 1;
1290                         return match;
1291                 }
1292                 v = (offset - skew) % 4;
1293                 skew = offset + 1;
1294                 return spaces[v];
1295         });
1296 }
1297
1298 //
1299 //  attacklab: Utility functions
1300 //
1301
1302
1303 var escapeCharacters = function(text, charsToEscape, afterBackslash) {
1304         // First we have to escape the escape characters so that
1305         // we can build a character class out of them
1306         var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
1307
1308         if (afterBackslash) {
1309                 regexString = "\\\\" + regexString;
1310         }
1311
1312         var regex = new RegExp(regexString,"g");
1313         text = text.replace(regex,escapeCharacters_callback);
1314
1315         return text;
1316 }
1317
1318
1319 var escapeCharacters_callback = function(wholeMatch,m1) {
1320         var charCodeToEscape = m1.charCodeAt(0);
1321         return "~E"+charCodeToEscape+"E";
1322 }
1323
1324 } // end of Attacklab.showdown.converter
1325
1326
1327 // Version 0.9 used the Showdown namespace instead of Attacklab.showdown
1328 // The old namespace is deprecated, but we'll support it for now:
1329 var Showdown = Attacklab.showdown;
1330
1331 // If anyone's interested, tell the world that this file's been loaded
1332 if (Attacklab.fileLoaded) {
1333         Attacklab.fileLoaded("showdown.js");
1334 }