John Koleszar | 0ea50ce | 2010-05-18 11:58:33 -0400 | [diff] [blame] | 1 | #!/usr/bin/env php |
| 2 | /* |
John Koleszar | c2140b8 | 2010-09-09 08:16:39 -0400 | [diff] [blame] | 3 | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
John Koleszar | 0ea50ce | 2010-05-18 11:58:33 -0400 | [diff] [blame] | 4 | * |
John Koleszar | 94c52e4 | 2010-06-18 12:39:21 -0400 | [diff] [blame] | 5 | * Use of this source code is governed by a BSD-style license |
John Koleszar | 09202d8 | 2010-06-04 16:19:40 -0400 | [diff] [blame] | 6 | * that can be found in the LICENSE file in the root of the source |
| 7 | * tree. An additional intellectual property rights grant can be found |
John Koleszar | 94c52e4 | 2010-06-18 12:39:21 -0400 | [diff] [blame] | 8 | * in the file PATENTS. All contributing project authors may |
John Koleszar | 09202d8 | 2010-06-04 16:19:40 -0400 | [diff] [blame] | 9 | * be found in the AUTHORS file in the root of the source tree. |
John Koleszar | 0ea50ce | 2010-05-18 11:58:33 -0400 | [diff] [blame] | 10 | */ |
| 11 | |
| 12 | |
| 13 | <?php |
| 14 | |
| 15 | /* This script converts markdown to doxygen htmlonly syntax, nesting the |
| 16 | * content inside a \page. It expects input on stdin and outputs on stdout. |
| 17 | * |
| 18 | * Usage: gen_example_doxy.php <page_identifier> "<page description>" |
| 19 | */ |
| 20 | |
| 21 | |
| 22 | $geshi_path = dirname($argv[0])."/includes/geshi/geshi/"; // Language files |
| 23 | $tmp_token = '<!-- I wanna rock you, Chaka Khan -->'; |
| 24 | |
| 25 | // Include prerequisites or exit |
| 26 | if(!include_once('includes/PHP-Markdown-Extra-1.2.3/markdown.php')) |
| 27 | die("Cannot load Markdown transformer.\n"); |
| 28 | if(!include_once('includes/PHP-SmartyPants-1.5.1e/smartypants.php')) |
| 29 | die("Cannot load SmartyPants transformer.\n"); |
| 30 | if(!include_once('includes/geshi/geshi.php')) |
| 31 | die("Cannot load GeSHi transformer.\n"); |
| 32 | // ASCIIMathPHP? |
| 33 | // HTML::Toc? |
| 34 | // Tidy? |
| 35 | // Prince? |
| 36 | |
| 37 | /** |
| 38 | * Generate XHTML body |
| 39 | * |
| 40 | */ |
| 41 | |
| 42 | $page_body = file_get_contents('php://stdin'); |
| 43 | |
| 44 | // Transform any MathML expressions in the body text |
| 45 | $regexp = '/\[\[(.*?)\]\]/'; // Double square bracket delimiters |
| 46 | $page_body = preg_replace_callback($regexp, 'ASCIIMathPHPCallback', $page_body); |
| 47 | |
| 48 | // Fix ASCIIMathPHP's output |
| 49 | $page_body = fix_asciiMath($page_body); |
| 50 | |
| 51 | // Wrap block-style <math> elements in <p>, since Markdown doesn't. |
| 52 | $page_body = preg_replace('/\n(<math.*<\/math>)\n/', '<p class="eq_para">$1</p>', $page_body); |
| 53 | |
| 54 | // Transform the body text to HTML |
| 55 | $page_body = Markdown($page_body); |
| 56 | |
| 57 | // Preprocess code blocks |
| 58 | // Decode XML entities. GeSHi doesn't anticipate that |
| 59 | // Markdown has already done this. |
| 60 | $regexp = '|<pre><code>(.*?)<\/code><\/pre>|si'; |
| 61 | while (preg_match($regexp, $page_body, $matches) > 0) |
| 62 | { |
| 63 | // Replace 1st match with token |
| 64 | $page_body = preg_replace($regexp, $tmp_token, $page_body, 1); |
| 65 | $block_new = $matches[1]; |
| 66 | // Un-encode ampersand entities |
| 67 | $block_new = decode_markdown($block_new); |
| 68 | // Replace token with revised string |
| 69 | $page_body = preg_replace("|$tmp_token|", '<div class="codeblock">'.$block_new.'</div>', $page_body); |
| 70 | } |
| 71 | |
| 72 | // Run GeSHi over code blocks |
| 73 | $regexp = '|<div class="codeblock">(.*?)<\/div>|si'; |
| 74 | $language = 'c'; |
| 75 | |
| 76 | while (preg_match($regexp, $page_body, $matches)) |
| 77 | { |
| 78 | $geshi = new GeSHi($matches[1], $language); |
| 79 | $geshi->set_language_path($geshi_path); |
| 80 | $block_new = $geshi->parse_code(); |
| 81 | // Strip annoying final newline |
| 82 | $block_new = preg_replace('|\n <\/pre>|', '</pre>' , $block_new); |
| 83 | // Remove style attribute (TODO: Research this in GeSHi) |
| 84 | $block_new = preg_replace('| style="font-family:monospace;"|', '' , $block_new); |
| 85 | $page_body = preg_replace($regexp, $block_new, $page_body, 1); |
| 86 | unset($geshi); // Clean up |
| 87 | } |
| 88 | unset($block_new); // Clean up |
| 89 | |
| 90 | // Apply typographic flourishes |
| 91 | $page_body = SmartyPants($page_body); |
| 92 | |
| 93 | |
| 94 | /** |
| 95 | * Generate Doxygen Body |
| 96 | * |
| 97 | */ |
| 98 | $page_id=(isset($argv[1]))?$argv[1]:""; |
| 99 | $page_desc=(isset($argv[2]))?$argv[2]:""; |
| 100 | print "/*!\\page ".$page_id." ".$page_desc."\n\\htmlonly\n"; |
| 101 | print $page_body; |
| 102 | print "\\endhtmlonly\n*/\n"; |
| 103 | |
| 104 | // --------------------------------------------------------- |
| 105 | |
| 106 | /** |
| 107 | * decode_markdown() |
| 108 | * |
| 109 | * Markdown encodes '&', '<' and '>' in detected code |
| 110 | * blocks, as a convenience. This will restore the |
| 111 | * encoded entities to ordinary characters, since a |
| 112 | * downstream transformer (like GeSHi) may not |
| 113 | * anticipate this. |
| 114 | * |
| 115 | **********************************************************/ |
| 116 | |
| 117 | function decode_markdown($input) |
| 118 | { |
| 119 | $out = FALSE; |
| 120 | |
| 121 | $entities = array ('|&|' |
| 122 | ,'|<|' |
| 123 | ,'|>|' |
| 124 | ); |
| 125 | $characters = array ('&' |
| 126 | ,'<' |
| 127 | ,'>' |
| 128 | ); |
| 129 | $input = preg_replace($entities, $characters, $input); |
| 130 | $out = $input; |
| 131 | |
| 132 | return $out; |
| 133 | } |
| 134 | |
| 135 | |
| 136 | /** |
| 137 | * ASCIIMathML parser |
| 138 | * http://tinyurl.com/ASCIIMathPHP |
| 139 | * |
| 140 | * @PARAM mtch_arr array - Array of ASCIIMath expressions |
| 141 | * as returned by preg_replace_callback([pattern]). First |
| 142 | * dimension is the full matched string (with delimiter); |
| 143 | * 2nd dimension is the undelimited contents (typically |
| 144 | * a capture group). |
| 145 | * |
| 146 | **********************************************************/ |
| 147 | |
| 148 | function ASCIIMathPHPCallback($mtch_arr) |
| 149 | { |
| 150 | $txt = trim($mtch_arr[1]); |
| 151 | |
| 152 | include('includes/ASCIIMathPHP-2.0/ASCIIMathPHP-2.0.cfg.php'); |
| 153 | require_once('includes/ASCIIMathPHP-2.0/ASCIIMathPHP-2.0.class.php'); |
| 154 | |
| 155 | static $asciimath; |
| 156 | |
| 157 | if (!isset($asciimath)) $asciimath = new ASCIIMathPHP($symbol_arr); |
| 158 | |
| 159 | $math_attr_arr = array('displaystyle' => 'true'); |
| 160 | |
| 161 | $asciimath->setExpr($txt); |
| 162 | $asciimath->genMathML($math_attr_arr); |
| 163 | |
| 164 | return($asciimath->getMathML()); |
| 165 | } |
| 166 | |
| 167 | /** |
| 168 | * fix_asciiMath() |
| 169 | * |
| 170 | * ASCIIMath pretty-prints its output, with linefeeds |
| 171 | * and tabs. Causes unexpected behavior in some renderers. |
| 172 | * This flattens <math> blocks. |
| 173 | * |
| 174 | * @PARAM page_body str - The <body> element of an |
| 175 | * XHTML page to transform. |
| 176 | * |
| 177 | **********************************************************/ |
| 178 | |
| 179 | function fix_asciiMath($page_body) |
| 180 | { |
| 181 | $out = FALSE; |
| 182 | |
| 183 | // Remove linefeeds and whitespace in <math> elements |
| 184 | $tags_bad = array('/(<math.*?>)\n*\s*/' |
| 185 | , '/(<mstyle.*?>)\n*\s*/' |
| 186 | , '/(<\/mstyle>)\n*\s*/' |
| 187 | , '/(<mrow.*?>)\n*\s*/' |
| 188 | , '/(<\/mrow>)\n*\s*/' |
| 189 | , '/(<mo.*?>)\n*\s*/' |
| 190 | , '/(<\/mo>)\n*\s*/' |
| 191 | , '/(<mi.*?>)\n*\s*/' |
| 192 | , '/(<\/mi>)\n*\s*/' |
| 193 | , '/(<mn.*?>)\n*\s*/' |
| 194 | , '/(<\/mn>)\n*\s*/' |
| 195 | , '/(<mtext.*?>)\n*\s*/' |
| 196 | , '/(<\/mtext>)\n*\s*/' |
| 197 | , '/(<msqrt.*?>)\n*\s*/' |
| 198 | , '/(<\/msqrt>)\n*\s*/' |
| 199 | , '/(<mfrac.*?>)\n*\s*/' |
| 200 | , '/(<\/mfrac>)\n*\s*/' |
| 201 | ); |
| 202 | $tags_good = array( '$1' |
| 203 | , '$1' |
| 204 | , '$1' |
| 205 | , '$1' |
| 206 | , '$1' |
| 207 | , '$1' |
| 208 | , '$1' |
| 209 | , '$1' |
| 210 | , '$1' |
| 211 | , '$1' |
| 212 | , '$1' |
| 213 | , '$1' |
| 214 | , '$1' |
| 215 | , '$1' |
| 216 | , '$1' |
| 217 | , '$1' |
| 218 | , '$1' |
| 219 | ); |
| 220 | $out = preg_replace($tags_bad, $tags_good, $page_body); |
| 221 | |
| 222 | return $out; |
| 223 | |
| 224 | } |