Source for file lib_markdown.php

Documentation is available at lib_markdown.php

  1. <?php
  2.  
  3. #
  4. # Markdown - A text-to-HTML conversion tool for web writers
  5. #
  6. # Copyright (c) 2004 John Gruber
  7. # <http://daringfireball.net/projects/markdown/>
  8. #
  9. # Copyright (c) 2004 Michel Fortin - PHP Port
  10. # <http://www.michelf.com/projects/php-markdown/>
  11. #
  12.  
  13.  
  14.  
  15.  
  16.  
  17. global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
  18. $md_empty_element_suffix, $md_tab_width,
  19. $md_nested_brackets_depth, $md_nested_brackets,
  20. $md_escape_table, $md_backslash_escape_table,
  21. $md_list_level;
  22.  
  23. $MarkdownPHPVersion = '1.0.1'; # Fri 17 Dec 2004
  24. $MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004
  25. #
  26. # Global default settings:
  27. #
  28.  
  29. $md_empty_element_suffix = " />"; # Change to ">" for HTML output
  30. $md_tab_width = 4;
  31.  
  32.  
  33. # -- WordPress Plugin Interface -----------------------------------------------
  34. /*
  35. Plugin Name: Markdown
  36. Plugin URI: http://www.michelf.com/projects/php-markdown/
  37. Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  38. Version: 1.0.1
  39. Author: Michel Fortin
  40. Author URI: http://www.michelf.com/
  41. */
  42. if (isset($wp_version)) {
  43. # Remove default WordPress auto-paragraph filter.
  44. remove_filter('the_content', 'wpautop');
  45. remove_filter('the_excerpt', 'wpautop');
  46. remove_filter('comment_text', 'wpautop');
  47. # Add Markdown filter with priority 6 (same as Textile).
  48. add_filter('the_content', 'Markdown', 6);
  49. add_filter('the_excerpt', 'Markdown', 6);
  50. add_filter('comment_text', 'Markdown', 6);
  51. }
  52.  
  53.  
  54. # -- bBlog Plugin Info --------------------------------------------------------
  55. function identify_modifier_markdown() {
  56. global $MarkdownPHPVersion;
  57. return array(
  58. 'name' => 'markdown',
  59. 'type' => 'modifier',
  60. 'nicename' => 'Markdown',
  61. 'description' => 'A text-to-HTML conversion tool for web writers',
  62. 'authors' => 'Michel Fortin and John Gruber',
  63. 'licence' => 'GPL',
  64. 'version' => $MarkdownPHPVersion,
  65. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
  66. );
  67. }
  68.  
  69. # -- Smarty Modifier Interface ------------------------------------------------
  70. function smarty_modifier_markdown($text) {
  71. return Markdown($text);
  72. }
  73.  
  74. # -- Textile Compatibility Mode -----------------------------------------------
  75. # Rename this file to "classTextile.php" and it can replace Textile anywhere.
  76.  
  77. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  78. # Try to include PHP SmartyPants. Should be in the same directory.
  79. @include_once 'smartypants.php';
  80. # Fake Textile class. It calls Markdown instead.
  81. class Textile {
  82. function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
  83. if ($lite == '' && $encode == '') $text = Markdown($text);
  84. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  85. return $text;
  86. }
  87. }
  88. }
  89.  
  90.  
  91.  
  92. #
  93. # Globals:
  94. #
  95.  
  96. # Regex to match balanced [brackets].
  97. # Needed to insert a maximum bracked depth while converting to PHP.
  98.  
  99. $md_nested_brackets_depth = 6;
  100. $md_nested_brackets =
  101. str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
  102. str_repeat('\])*', $md_nested_brackets_depth);
  103.  
  104. # Table of hash values for escaped characters:
  105. $md_escape_table = array(
  106. "\\" => md5("\\"),
  107. "`" => md5("`"),
  108. "*" => md5("*"),
  109. "_" => md5("_"),
  110. "{" => md5("{"),
  111. "}" => md5("}"),
  112. "[" => md5("["),
  113. "]" => md5("]"),
  114. "(" => md5("("),
  115. ")" => md5(")"),
  116. ">" => md5(">"),
  117. "#" => md5("#"),
  118. "+" => md5("+"),
  119. "-" => md5("-"),
  120. "." => md5("."),
  121. "!" => md5("!")
  122. );
  123. # Create an identical table but for escaped characters.
  124. $md_backslash_escape_table;
  125. foreach ($md_escape_table as $key => $char)
  126. $md_backslash_escape_table["\\$key"] = $char;
  127.  
  128.  
  129. function Markdown($text) {
  130. #
  131. # Main function. The order in which other subs are called here is
  132. # essential. Link and image substitutions need to happen before
  133. # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
  134. # and <img> tags get encoded.
  135. #
  136. # Clear the global hashes. If we don't clear these, you get conflicts
  137. # from other articles when generating a page which contains more than
  138. # one article (e.g. an index page that shows the N most recent
  139. # articles):
  140. global $md_urls, $md_titles, $md_html_blocks;
  141. $md_urls = array();
  142. $md_titles = array();
  143. $md_html_blocks = array();
  144.  
  145. # Standardize line endings:
  146. # DOS to Unix and Mac to Unix
  147. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  148.  
  149. # Make sure $text ends with a couple of newlines:
  150. $text .= "\n\n";
  151.  
  152. # Convert all tabs to spaces.
  153. $text = _Detab($text);
  154.  
  155. # Strip any lines consisting only of spaces and tabs.
  156. # This makes subsequent regexen easier to write, because we can
  157. # match consecutive blank lines with /\n+/ instead of something
  158. # contorted like /[ \t]*\n+/ .
  159. $text = preg_replace('/^[ \t]+$/m', '', $text);
  160.  
  161. # Turn block-level HTML blocks into hash entries
  162. $text = _HashHTMLBlocks($text);
  163.  
  164. # Strip link definitions, store in hashes.
  165. $text = _StripLinkDefinitions($text);
  166.  
  167. $text = _RunBlockGamut($text);
  168.  
  169. $text = _UnescapeSpecialChars($text);
  170.  
  171. return $text . "\n";
  172. }
  173.  
  174.  
  175. function _StripLinkDefinitions($text) {
  176. #
  177. # Strips link definitions from text, stores the URLs and titles in
  178. # hash references.
  179. #
  180. global $md_tab_width;
  181. $less_than_tab = $md_tab_width - 1;
  182.  
  183. # Link defs are in the form: ^[id]: url "optional title"
  184. $text = preg_replace_callback('{
  185. ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
  186. [ \t]*
  187. \n? # maybe *one* newline
  188. [ \t]*
  189. <?(\S+?)>? # url = $2
  190. [ \t]*
  191. \n? # maybe one newline
  192. [ \t]*
  193. (?:
  194. (?<=\s) # lookbehind for whitespace
  195. ["(]
  196. (.+?) # title = $3
  197. [")]
  198. [ \t]*
  199. )? # title is optional
  200. (?:\n+|\Z)
  201. }xm',
  202. '_StripLinkDefinitions_callback',
  203. $text);
  204. return $text;
  205. }
  206. function _StripLinkDefinitions_callback($matches) {
  207. global $md_urls, $md_titles;
  208. $link_id = strtolower($matches[1]);
  209. $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
  210. if (isset($matches[3]))
  211. $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  212. return ''; # String that will replace the block
  213. }
  214.  
  215.  
  216. function _HashHTMLBlocks($text) {
  217. global $md_tab_width;
  218. $less_than_tab = $md_tab_width - 1;
  219.  
  220. # Hashify HTML blocks:
  221. # We only want to do this for block-level HTML tags, such as headers,
  222. # lists, and tables. That's because we still want to wrap <p>s around
  223. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  224. # phrase emphasis, and spans. The list of tags we're looking for is
  225. # hard-coded:
  226. $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
  227. 'script|noscript|form|fieldset|iframe|math|ins|del';
  228. $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
  229. 'script|noscript|form|fieldset|iframe|math';
  230.  
  231. # First, look for nested blocks, e.g.:
  232. # <div>
  233. # <div>
  234. # tags for inner block must be indented.
  235. # </div>
  236. # </div>
  237. #
  238. # The outermost tags must start at the left margin for this to match, and
  239. # the inner nested divs must be indented.
  240. # We need to do this before the next, more liberal match, because the next
  241. # match will start at the first `<div>` and stop at the first `</div>`.
  242. $text = preg_replace_callback("{
  243. ( # save in $1
  244. ^ # start of line (with /m)
  245. <($block_tags_a) # start tag = $2
  246. \\b # word break
  247. (.*\\n)*? # any number of lines, minimally matching
  248. </\\2> # the matching end tag
  249. [ \\t]* # trailing spaces/tabs
  250. (?=\\n+|\\Z) # followed by a newline or end of document
  251. )
  252. }xm",
  253. '_HashHTMLBlocks_callback',
  254. $text);
  255.  
  256. #
  257. # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
  258. #
  259. $text = preg_replace_callback("{
  260. ( # save in $1
  261. ^ # start of line (with /m)
  262. <($block_tags_b) # start tag = $2
  263. \\b # word break
  264. (.*\\n)*? # any number of lines, minimally matching
  265. .*</\\2> # the matching end tag
  266. [ \\t]* # trailing spaces/tabs
  267. (?=\\n+|\\Z) # followed by a newline or end of document
  268. )
  269. }xm",
  270. '_HashHTMLBlocks_callback',
  271. $text);
  272.  
  273. # Special case just for <hr />. It was easier to make a special case than
  274. # to make the other regex more complicated.
  275. $text = preg_replace_callback('{
  276. (?:
  277. (?<=\n\n) # Starting after a blank line
  278. | # or
  279. \A\n? # the beginning of the doc
  280. )
  281. ( # save in $1
  282. [ ]{0,'.$less_than_tab.'}
  283. <(hr) # start tag = $2
  284. \b # word break
  285. ([^<>])*? #
  286. /?> # the matching end tag
  287. [ \t]*
  288. (?=\n{2,}|\Z) # followed by a blank line or end of document
  289. )
  290. }x',
  291. '_HashHTMLBlocks_callback',
  292. $text);
  293.  
  294. # Special case for standalone HTML comments:
  295. $text = preg_replace_callback('{
  296. (?:
  297. (?<=\n\n) # Starting after a blank line
  298. | # or
  299. \A\n? # the beginning of the doc
  300. )
  301. ( # save in $1
  302. [ ]{0,'.$less_than_tab.'}
  303. (?s:
  304. <!
  305. (--.*?--\s*)+
  306. >
  307. )
  308. [ \t]*
  309. (?=\n{2,}|\Z) # followed by a blank line or end of document
  310. )
  311. }x',
  312. '_HashHTMLBlocks_callback',
  313. $text);
  314.  
  315. return $text;
  316. }
  317. function _HashHTMLBlocks_callback($matches) {
  318. global $md_html_blocks;
  319. $text = $matches[1];
  320. $key = md5($text);
  321. $md_html_blocks[$key] = $text;
  322. return "\n\n$key\n\n"; # String that will replace the block
  323. }
  324.  
  325.  
  326. function _RunBlockGamut($text) {
  327. #
  328. # These are all the transformations that form block-level
  329. # tags like paragraphs, headers, and list items.
  330. #
  331. global $md_empty_element_suffix;
  332.  
  333. $text = _DoHeaders($text);
  334.  
  335. # Do Horizontal Rules:
  336. $text = preg_replace(
  337. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
  338. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
  339. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
  340. "\n<hr$md_empty_element_suffix\n",
  341. $text);
  342.  
  343. $text = _DoLists($text);
  344.  
  345. $text = _DoCodeBlocks($text);
  346.  
  347. $text = _DoBlockQuotes($text);
  348.  
  349. # We already ran _HashHTMLBlocks() before, in Markdown(), but that
  350. # was to escape raw HTML in the original Markdown source. This time,
  351. # we're escaping the markup we've just created, so that we don't wrap
  352. # <p> tags around block-level tags.
  353. $text = _HashHTMLBlocks($text);
  354.  
  355. $text = _FormParagraphs($text);
  356.  
  357. return $text;
  358. }
  359.  
  360.  
  361. function _RunSpanGamut($text) {
  362. #
  363. # These are all the transformations that occur *within* block-level
  364. # tags like paragraphs, headers, and list items.
  365. #
  366. global $md_empty_element_suffix;
  367.  
  368. $text = _DoCodeSpans($text);
  369.  
  370. $text = _EscapeSpecialChars($text);
  371.  
  372. # Process anchor and image tags. Images must come first,
  373. # because ![foo][f] looks like an anchor.
  374. $text = _DoImages($text);
  375. $text = _DoAnchors($text);
  376.  
  377. # Make links out of things like `<http://example.com/>`
  378. # Must come after _DoAnchors(), because you can use < and >
  379. # delimiters in inline links like [this](<url>).
  380. $text = _DoAutoLinks($text);
  381.  
  382. # Fix unencoded ampersands and <'s:
  383. $text = _EncodeAmpsAndAngles($text);
  384.  
  385. $text = _DoItalicsAndBold($text);
  386.  
  387. # Do hard breaks:
  388. $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
  389.  
  390. return $text;
  391. }
  392.  
  393.  
  394. function _EscapeSpecialChars($text) {
  395. global $md_escape_table;
  396. $tokens = _TokenizeHTML($text);
  397.  
  398. $text = ''; # rebuild $text from the tokens
  399. # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
  400. # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
  401.  
  402. foreach ($tokens as $cur_token) {
  403. if ($cur_token[0] == 'tag') {
  404. # Within tags, encode * and _ so they don't conflict
  405. # with their use in Markdown for italics and strong.
  406. # We're replacing each such character with its
  407. # corresponding MD5 checksum value; this is likely
  408. # overkill, but it should prevent us from colliding
  409. # with the escape values by accident.
  410. $cur_token[1] = str_replace(array('*', '_'),
  411. array($md_escape_table['*'], $md_escape_table['_']),
  412. $cur_token[1]);
  413. $text .= $cur_token[1];
  414. } else {
  415. $t = $cur_token[1];
  416. $t = _EncodeBackslashEscapes($t);
  417. $text .= $t;
  418. }
  419. }
  420. return $text;
  421. }
  422.  
  423.  
  424. function _DoAnchors($text) {
  425. #
  426. # Turn Markdown link shortcuts into XHTML <a> tags.
  427. #
  428. global $md_nested_brackets;
  429. #
  430. # First, handle reference-style links: [link text] [id]
  431. #
  432. $text = preg_replace_callback("{
  433. ( # wrap whole match in $1
  434. \\[
  435. ($md_nested_brackets) # link text = $2
  436. \\]
  437.  
  438. [ ]? # one optional space
  439. (?:\\n[ ]*)? # one optional newline followed by spaces
  440.  
  441. \\[
  442. (.*?) # id = $3
  443. \\]
  444. )
  445. }xs",
  446. '_DoAnchors_reference_callback', $text);
  447.  
  448. #
  449. # Next, inline-style links: [link text](url "optional title")
  450. #
  451. $text = preg_replace_callback("{
  452. ( # wrap whole match in $1
  453. \\[
  454. ($md_nested_brackets) # link text = $2
  455. \\]
  456. \\( # literal paren
  457. [ \\t]*
  458. <?(.*?)>? # href = $3
  459. [ \\t]*
  460. ( # $4
  461. (['\"]) # quote char = $5
  462. (.*?) # Title = $6
  463. \\5 # matching quote
  464. )? # title is optional
  465. \\)
  466. )
  467. }xs",
  468. '_DoAnchors_inline_callback', $text);
  469.  
  470. return $text;
  471. }
  472. function _DoAnchors_reference_callback($matches) {
  473. global $md_urls, $md_titles, $md_escape_table;
  474. $whole_match = $matches[1];
  475. $link_text = $matches[2];
  476. $link_id = strtolower($matches[3]);
  477.  
  478. if ($link_id == "") {
  479. $link_id = strtolower($link_text); # for shortcut links like [this][].
  480. }
  481.  
  482. if (isset($md_urls[$link_id])) {
  483. $url = $md_urls[$link_id];
  484. # We've got to encode these to avoid conflicting with italics/bold.
  485. $url = str_replace(array('*', '_'),
  486. array($md_escape_table['*'], $md_escape_table['_']),
  487. $url);
  488. $result = "<a href=\"$url\"";
  489. if ( isset( $md_titles[$link_id] ) ) {
  490. $title = $md_titles[$link_id];
  491. $title = str_replace(array('*', '_'),
  492. array($md_escape_table['*'],
  493. $md_escape_table['_']), $title);
  494. $result .= " title=\"$title\"";
  495. }
  496. $result .= ">$link_text</a>";
  497. }
  498. else {
  499. $result = $whole_match;
  500. }
  501. return $result;
  502. }
  503. function _DoAnchors_inline_callback($matches) {
  504. global $md_escape_table;
  505. $whole_match = $matches[1];
  506. $link_text = $matches[2];
  507. $url = $matches[3];
  508. $title =& $matches[6];
  509.  
  510. # We've got to encode these to avoid conflicting with italics/bold.
  511. $url = str_replace(array('*', '_'),
  512. array($md_escape_table['*'], $md_escape_table['_']),
  513. $url);
  514. $result = "<a href=\"$url\"";
  515. if (isset($title)) {
  516. $title = str_replace('"', '&quot;', $title);
  517. $title = str_replace(array('*', '_'),
  518. array($md_escape_table['*'], $md_escape_table['_']),
  519. $title);
  520. $result .= " title=\"$title\"";
  521. }
  522. $result .= ">$link_text</a>";
  523.  
  524. return $result;
  525. }
  526.  
  527.  
  528. function _DoImages($text) {
  529. #
  530. # Turn Markdown image shortcuts into <img> tags.
  531. #
  532. #
  533. # First, handle reference-style labeled images: ![alt text][id]
  534. #
  535. $text = preg_replace_callback('{
  536. ( # wrap whole match in $1
  537. !\[
  538. (.*?) # alt text = $2
  539. \]
  540.  
  541. [ ]? # one optional space
  542. (?:\n[ ]*)? # one optional newline followed by spaces
  543.  
  544. \[
  545. (.*?) # id = $3
  546. \]
  547.  
  548. )
  549. }xs',
  550. '_DoImages_reference_callback', $text);
  551.  
  552. #
  553. # Next, handle inline images: ![alt text](url "optional title")
  554. # Don't forget: encode * and _
  555.  
  556. $text = preg_replace_callback("{
  557. ( # wrap whole match in $1
  558. !\\[
  559. (.*?) # alt text = $2
  560. \\]
  561. \\( # literal paren
  562. [ \\t]*
  563. <?(\S+?)>? # src url = $3
  564. [ \\t]*
  565. ( # $4
  566. (['\"]) # quote char = $5
  567. (.*?) # title = $6
  568. \\5 # matching quote
  569. [ \\t]*
  570. )? # title is optional
  571. \\)
  572. )
  573. }xs",
  574. '_DoImages_inline_callback', $text);
  575.  
  576. return $text;
  577. }
  578. function _DoImages_reference_callback($matches) {
  579. global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
  580. $whole_match = $matches[1];
  581. $alt_text = $matches[2];
  582. $link_id = strtolower($matches[3]);
  583.  
  584. if ($link_id == "") {
  585. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  586. }
  587.  
  588. $alt_text = str_replace('"', '&quot;', $alt_text);
  589. if (isset($md_urls[$link_id])) {
  590. $url = $md_urls[$link_id];
  591. # We've got to encode these to avoid conflicting with italics/bold.
  592. $url = str_replace(array('*', '_'),
  593. array($md_escape_table['*'], $md_escape_table['_']),
  594. $url);
  595. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  596. if (isset($md_titles[$link_id])) {
  597. $title = $md_titles[$link_id];
  598. $title = str_replace(array('*', '_'),
  599. array($md_escape_table['*'],
  600. $md_escape_table['_']), $title);
  601. $result .= " title=\"$title\"";
  602. }
  603. $result .= $md_empty_element_suffix;
  604. }
  605. else {
  606. # If there's no such link ID, leave intact:
  607. $result = $whole_match;
  608. }
  609.  
  610. return $result;
  611. }
  612. function _DoImages_inline_callback($matches) {
  613. global $md_empty_element_suffix, $md_escape_table;
  614. $whole_match = $matches[1];
  615. $alt_text = $matches[2];
  616. $url = $matches[3];
  617. $title = '';
  618. if (isset($matches[6])) {
  619. $title = $matches[6];
  620. }
  621.  
  622. $alt_text = str_replace('"', '&quot;', $alt_text);
  623. $title = str_replace('"', '&quot;', $title);
  624. # We've got to encode these to avoid conflicting with italics/bold.
  625. $url = str_replace(array('*', '_'),
  626. array($md_escape_table['*'], $md_escape_table['_']),
  627. $url);
  628. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  629. if (isset($title)) {
  630. $title = str_replace(array('*', '_'),
  631. array($md_escape_table['*'], $md_escape_table['_']),
  632. $title);
  633. $result .= " title=\"$title\""; # $title already quoted
  634. }
  635. $result .= $md_empty_element_suffix;
  636.  
  637. return $result;
  638. }
  639.  
  640.  
  641. function _DoHeaders($text) {
  642. # Setext-style headers:
  643. # Header 1
  644. # ========
  645. #
  646. # Header 2
  647. # --------
  648. #
  649. $text = preg_replace(
  650. array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
  651. '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
  652. array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
  653. "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
  654. $text);
  655.  
  656. # atx-style headers:
  657. # # Header 1
  658. # ## Header 2
  659. # ## Header 2 with closing hashes ##
  660. # ...
  661. # ###### Header 6
  662. #
  663. $text = preg_replace("{
  664. ^(\\#{1,6}) # $1 = string of #'s
  665. [ \\t]*
  666. (.+?) # $2 = Header text
  667. [ \\t]*
  668. \\#* # optional closing #'s (not counted)
  669. \\n+
  670. }xme",
  671. "'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
  672. $text);
  673.  
  674. return $text;
  675. }
  676.  
  677.  
  678. function _DoLists($text) {
  679. #
  680. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  681. #
  682. global $md_tab_width, $md_list_level;
  683. $less_than_tab = $md_tab_width - 1;
  684.  
  685. # Re-usable patterns to match list item bullets and number markers:
  686. $marker_ul = '[*+-]';
  687. $marker_ol = '\d+[.]';
  688. $marker_any = "(?:$marker_ul|$marker_ol)";
  689.  
  690. # Re-usable pattern to match any entirel ul or ol list:
  691. $whole_list = '
  692. ( # $1 = whole list
  693. ( # $2
  694. [ ]{0,'.$less_than_tab.'}
  695. ('.$marker_any.') # $3 = first list item marker
  696. [ \t]+
  697. )
  698. (?s:.+?)
  699. ( # $4
  700. \z
  701. |
  702. \n{2,}
  703. (?=\S)
  704. (?! # Negative lookahead for another list item marker
  705. [ \t]*
  706. '.$marker_any.'[ \t]+
  707. )
  708. )
  709. )
  710. '; // mx
  711. # We use a different prefix before nested lists than top-level lists.
  712. # See extended comment in _ProcessListItems().
  713.  
  714. if ($md_list_level) {
  715. $text = preg_replace_callback('{
  716. ^
  717. '.$whole_list.'
  718. }mx',
  719. '_DoLists_callback', $text);
  720. }
  721. else {
  722. $text = preg_replace_callback('{
  723. (?:(?<=\n\n)|\A\n?)
  724. '.$whole_list.'
  725. }mx',
  726. '_DoLists_callback', $text);
  727. }
  728.  
  729. return $text;
  730. }
  731. function _DoLists_callback($matches) {
  732. # Re-usable patterns to match list item bullets and number markers:
  733. $marker_ul = '[*+-]';
  734. $marker_ol = '\d+[.]';
  735. $marker_any = "(?:$marker_ul|$marker_ol)";
  736. $list = $matches[1];
  737. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  738. # Turn double returns into triple returns, so that we can make a
  739. # paragraph for the last item in a list, if necessary:
  740. $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
  741. $result = _ProcessListItems($list, $marker_any);
  742. $result = "<$list_type>\n" . $result . "</$list_type>\n";
  743. return $result;
  744. }
  745.  
  746.  
  747. function _ProcessListItems($list_str, $marker_any) {
  748. #
  749. # Process the contents of a single ordered or unordered list, splitting it
  750. # into individual list items.
  751. #
  752. global $md_list_level;
  753. # The $md_list_level global keeps track of when we're inside a list.
  754. # Each time we enter a list, we increment it; when we leave a list,
  755. # we decrement. If it's zero, we're not in a list anymore.
  756. #
  757. # We do this because when we're not inside a list, we want to treat
  758. # something like this:
  759. #
  760. # I recommend upgrading to version
  761. # 8. Oops, now this line is treated
  762. # as a sub-list.
  763. #
  764. # As a single paragraph, despite the fact that the second line starts
  765. # with a digit-period-space sequence.
  766. #
  767. # Whereas when we're inside a list (or sub-list), that line will be
  768. # treated as the start of a sub-list. What a kludge, huh? This is
  769. # an aspect of Markdown's syntax that's hard to parse perfectly
  770. # without resorting to mind-reading. Perhaps the solution is to
  771. # change the syntax rules such that sub-lists must start with a
  772. # starting cardinal number; e.g. "1." or "a.".
  773. $md_list_level++;
  774.  
  775. # trim trailing blank lines:
  776. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  777.  
  778. $list_str = preg_replace_callback('{
  779. (\n)? # leading line = $1
  780. (^[ \t]*) # leading whitespace = $2
  781. ('.$marker_any.') [ \t]+ # list marker = $3
  782. ((?s:.+?) # list item text = $4
  783. (\n{1,2}))
  784. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  785. }xm',
  786. '_ProcessListItems_callback', $list_str);
  787.  
  788. $md_list_level--;
  789. return $list_str;
  790. }
  791. function _ProcessListItems_callback($matches) {
  792. $item = $matches[4];
  793. $leading_line =& $matches[1];
  794. $leading_space =& $matches[2];
  795.  
  796. if ($leading_line || preg_match('/\n{2,}/', $item)) {
  797. $item = _RunBlockGamut(_Outdent($item));
  798. }
  799. else {
  800. # Recursion for sub-lists:
  801. $item = _DoLists(_Outdent($item));
  802. $item = rtrim($item, "\n");
  803. $item = _RunSpanGamut($item);
  804. }
  805.  
  806. return "<li>" . $item . "</li>\n";
  807. }
  808.  
  809.  
  810. function _DoCodeBlocks($text) {
  811. #
  812. # Process Markdown `<pre><code>` blocks.
  813. #
  814. global $md_tab_width;
  815. $text = preg_replace_callback("{
  816. (?:\\n\\n|\\A)
  817. ( # $1 = the code block -- one or more lines, starting with a space/tab
  818. (?:
  819. (?:[ ]\{$md_tab_width} | \\t) # Lines must start with a tab or a tab-width of spaces
  820. .*\\n+
  821. )+
  822. )
  823. ((?=^[ ]{0,$md_tab_width}\\S)|\\Z) # Lookahead for non-