Projects : mp-wp : mp-wp_genesis
1 | <?php |
2 | /** |
3 | * WordPress Diff bastard child of old MediaWiki Diff Formatter. |
4 | * |
5 | * Basically all that remains is the table structure and some method names. |
6 | * |
7 | * @package WordPress |
8 | * @subpackage Diff |
9 | */ |
10 | |
11 | if ( !class_exists( 'Text_Diff' ) ) { |
12 | /** Text_Diff class */ |
13 | require( dirname(__FILE__).'/Text/Diff.php' ); |
14 | /** Text_Diff_Renderer class */ |
15 | require( dirname(__FILE__).'/Text/Diff/Renderer.php' ); |
16 | /** Text_Diff_Renderer_inline class */ |
17 | require( dirname(__FILE__).'/Text/Diff/Renderer/inline.php' ); |
18 | } |
19 | |
20 | /** |
21 | * Table renderer to display the diff lines. |
22 | * |
23 | * @since 2.6.0 |
24 | * @uses Text_Diff_Renderer Extends |
25 | */ |
26 | class WP_Text_Diff_Renderer_Table extends Text_Diff_Renderer { |
27 | |
28 | /** |
29 | * @see Text_Diff_Renderer::_leading_context_lines |
30 | * @var int |
31 | * @access protected |
32 | * @since 2.6.0 |
33 | */ |
34 | var $_leading_context_lines = 10000; |
35 | |
36 | /** |
37 | * @see Text_Diff_Renderer::_trailing_context_lines |
38 | * @var int |
39 | * @access protected |
40 | * @since 2.6.0 |
41 | */ |
42 | var $_trailing_context_lines = 10000; |
43 | |
44 | /** |
45 | * {@internal Missing Description}} |
46 | * |
47 | * @var float |
48 | * @access protected |
49 | * @since 2.6.0 |
50 | */ |
51 | var $_diff_threshold = 0.6; |
52 | |
53 | /** |
54 | * Inline display helper object name. |
55 | * |
56 | * @var string |
57 | * @access protected |
58 | * @since 2.6.0 |
59 | */ |
60 | var $inline_diff_renderer = 'WP_Text_Diff_Renderer_inline'; |
61 | |
62 | /** |
63 | * PHP4 Constructor - Call parent constructor with params array. |
64 | * |
65 | * This will set class properties based on the key value pairs in the array. |
66 | * |
67 | * @since unknown |
68 | * |
69 | * @param array $params |
70 | */ |
71 | function Text_Diff_Renderer_Table( $params = array() ) { |
72 | $parent = get_parent_class($this); |
73 | $this->$parent( $params ); |
74 | } |
75 | |
76 | /** |
77 | * @ignore |
78 | * |
79 | * @param string $header |
80 | * @return string |
81 | */ |
82 | function _startBlock( $header ) { |
83 | return ''; |
84 | } |
85 | |
86 | /** |
87 | * @ignore |
88 | * |
89 | * @param array $lines |
90 | * @param string $prefix |
91 | */ |
92 | function _lines( $lines, $prefix=' ' ) { |
93 | } |
94 | |
95 | /** |
96 | * @ignore |
97 | * |
98 | * @param string $line HTML-escape the value. |
99 | * @return string |
100 | */ |
101 | function addedLine( $line ) { |
102 | return "<td>+</td><td class='diff-addedline'>{$line}</td>"; |
103 | } |
104 | |
105 | /** |
106 | * @ignore |
107 | * |
108 | * @param string $line HTML-escape the value. |
109 | * @return string |
110 | */ |
111 | function deletedLine( $line ) { |
112 | return "<td>-</td><td class='diff-deletedline'>{$line}</td>"; |
113 | } |
114 | |
115 | /** |
116 | * @ignore |
117 | * |
118 | * @param string $line HTML-escape the value. |
119 | * @return string |
120 | */ |
121 | function contextLine( $line ) { |
122 | return "<td> </td><td class='diff-context'>{$line}</td>"; |
123 | } |
124 | |
125 | /** |
126 | * @ignore |
127 | * |
128 | * @return string |
129 | */ |
130 | function emptyLine() { |
131 | return '<td colspan="2"> </td>'; |
132 | } |
133 | |
134 | /** |
135 | * @ignore |
136 | * @access private |
137 | * |
138 | * @param array $lines |
139 | * @param bool $encode |
140 | * @return string |
141 | */ |
142 | function _added( $lines, $encode = true ) { |
143 | $r = ''; |
144 | foreach ($lines as $line) { |
145 | if ( $encode ) |
146 | $line = htmlspecialchars( $line ); |
147 | $r .= '<tr>' . $this->emptyLine() . $this->addedLine( $line ) . "</tr>\n"; |
148 | } |
149 | return $r; |
150 | } |
151 | |
152 | /** |
153 | * @ignore |
154 | * @access private |
155 | * |
156 | * @param array $lines |
157 | * @param bool $encode |
158 | * @return string |
159 | */ |
160 | function _deleted( $lines, $encode = true ) { |
161 | $r = ''; |
162 | foreach ($lines as $line) { |
163 | if ( $encode ) |
164 | $line = htmlspecialchars( $line ); |
165 | $r .= '<tr>' . $this->deletedLine( $line ) . $this->emptyLine() . "</tr>\n"; |
166 | } |
167 | return $r; |
168 | } |
169 | |
170 | /** |
171 | * @ignore |
172 | * @access private |
173 | * |
174 | * @param array $lines |
175 | * @param bool $encode |
176 | * @return string |
177 | */ |
178 | function _context( $lines, $encode = true ) { |
179 | $r = ''; |
180 | foreach ($lines as $line) { |
181 | if ( $encode ) |
182 | $line = htmlspecialchars( $line ); |
183 | $r .= '<tr>' . |
184 | $this->contextLine( $line ) . $this->contextLine( $line ) . "</tr>\n"; |
185 | } |
186 | return $r; |
187 | } |
188 | |
189 | /** |
190 | * Process changed lines to do word-by-word diffs for extra highlighting. |
191 | * |
192 | * (TRAC style) sometimes these lines can actually be deleted or added rows. |
193 | * We do additional processing to figure that out |
194 | * |
195 | * @access private |
196 | * @since 2.6.0 |
197 | * |
198 | * @param array $orig |
199 | * @param array $final |
200 | * @return string |
201 | */ |
202 | function _changed( $orig, $final ) { |
203 | $r = ''; |
204 | |
205 | // Does the aforementioned additional processing |
206 | // *_matches tell what rows are "the same" in orig and final. Those pairs will be diffed to get word changes |
207 | // match is numeric: an index in other column |
208 | // match is 'X': no match. It is a new row |
209 | // *_rows are column vectors for the orig column and the final column. |
210 | // row >= 0: an indix of the $orig or $final array |
211 | // row < 0: a blank row for that column |
212 | list($orig_matches, $final_matches, $orig_rows, $final_rows) = $this->interleave_changed_lines( $orig, $final ); |
213 | |
214 | |
215 | // These will hold the word changes as determined by an inline diff |
216 | $orig_diffs = array(); |
217 | $final_diffs = array(); |
218 | |
219 | // Compute word diffs for each matched pair using the inline diff |
220 | foreach ( $orig_matches as $o => $f ) { |
221 | if ( is_numeric($o) && is_numeric($f) ) { |
222 | $text_diff = new Text_Diff( 'auto', array( array($orig[$o]), array($final[$f]) ) ); |
223 | $renderer = new $this->inline_diff_renderer; |
224 | $diff = $renderer->render( $text_diff ); |
225 | |
226 | // If they're too different, don't include any <ins> or <dels> |
227 | if ( $diff_count = preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) { |
228 | // length of all text between <ins> or <del> |
229 | $stripped_matches = strlen(strip_tags( join(' ', $diff_matches[0]) )); |
230 | // since we count lengith of text between <ins> or <del> (instead of picking just one), |
231 | // we double the length of chars not in those tags. |
232 | $stripped_diff = strlen(strip_tags( $diff )) * 2 - $stripped_matches; |
233 | $diff_ratio = $stripped_matches / $stripped_diff; |
234 | if ( $diff_ratio > $this->_diff_threshold ) |
235 | continue; // Too different. Don't save diffs. |
236 | } |
237 | |
238 | // Un-inline the diffs by removing del or ins |
239 | $orig_diffs[$o] = preg_replace( '|<ins>.*?</ins>|', '', $diff ); |
240 | $final_diffs[$f] = preg_replace( '|<del>.*?</del>|', '', $diff ); |
241 | } |
242 | } |
243 | |
244 | foreach ( array_keys($orig_rows) as $row ) { |
245 | // Both columns have blanks. Ignore them. |
246 | if ( $orig_rows[$row] < 0 && $final_rows[$row] < 0 ) |
247 | continue; |
248 | |
249 | // If we have a word based diff, use it. Otherwise, use the normal line. |
250 | $orig_line = isset($orig_diffs[$orig_rows[$row]]) |
251 | ? $orig_diffs[$orig_rows[$row]] |
252 | : htmlspecialchars($orig[$orig_rows[$row]]); |
253 | $final_line = isset($final_diffs[$final_rows[$row]]) |
254 | ? $final_diffs[$final_rows[$row]] |
255 | : htmlspecialchars($final[$final_rows[$row]]); |
256 | |
257 | if ( $orig_rows[$row] < 0 ) { // Orig is blank. This is really an added row. |
258 | $r .= $this->_added( array($final_line), false ); |
259 | } elseif ( $final_rows[$row] < 0 ) { // Final is blank. This is really a deleted row. |
260 | $r .= $this->_deleted( array($orig_line), false ); |
261 | } else { // A true changed row. |
262 | $r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->addedLine( $final_line ) . "</tr>\n"; |
263 | } |
264 | } |
265 | |
266 | return $r; |
267 | } |
268 | |
269 | /** |
270 | * Takes changed blocks and matches which rows in orig turned into which rows in final. |
271 | * |
272 | * Returns |
273 | * *_matches ( which rows match with which ) |
274 | * *_rows ( order of rows in each column interleaved with blank rows as |
275 | * necessary ) |
276 | * |
277 | * @since 2.6.0 |
278 | * |
279 | * @param unknown_type $orig |
280 | * @param unknown_type $final |
281 | * @return unknown |
282 | */ |
283 | function interleave_changed_lines( $orig, $final ) { |
284 | |
285 | // Contains all pairwise string comparisons. Keys are such that this need only be a one dimensional array. |
286 | $matches = array(); |
287 | foreach ( array_keys($orig) as $o ) { |
288 | foreach ( array_keys($final) as $f ) { |
289 | $matches["$o,$f"] = $this->compute_string_distance( $orig[$o], $final[$f] ); |
290 | } |
291 | } |
292 | asort($matches); // Order by string distance. |
293 | |
294 | $orig_matches = array(); |
295 | $final_matches = array(); |
296 | |
297 | foreach ( $matches as $keys => $difference ) { |
298 | list($o, $f) = explode(',', $keys); |
299 | $o = (int) $o; |
300 | $f = (int) $f; |
301 | |
302 | // Already have better matches for these guys |
303 | if ( isset($orig_matches[$o]) && isset($final_matches[$f]) ) |
304 | continue; |
305 | |
306 | // First match for these guys. Must be best match |
307 | if ( !isset($orig_matches[$o]) && !isset($final_matches[$f]) ) { |
308 | $orig_matches[$o] = $f; |
309 | $final_matches[$f] = $o; |
310 | continue; |
311 | } |
312 | |
313 | // Best match of this final is already taken? Must mean this final is a new row. |
314 | if ( isset($orig_matches[$o]) ) |
315 | $final_matches[$f] = 'x'; |
316 | |
317 | // Best match of this orig is already taken? Must mean this orig is a deleted row. |
318 | elseif ( isset($final_matches[$f]) ) |
319 | $orig_matches[$o] = 'x'; |
320 | } |
321 | |
322 | // We read the text in this order |
323 | ksort($orig_matches); |
324 | ksort($final_matches); |
325 | |
326 | |
327 | // Stores rows and blanks for each column. |
328 | $orig_rows = $orig_rows_copy = array_keys($orig_matches); |
329 | $final_rows = array_keys($final_matches); |
330 | |
331 | // Interleaves rows with blanks to keep matches aligned. |
332 | // We may end up with some extraneous blank rows, but we'll just ignore them later. |
333 | foreach ( $orig_rows_copy as $orig_row ) { |
334 | $final_pos = array_search($orig_matches[$orig_row], $final_rows, true); |
335 | $orig_pos = (int) array_search($orig_row, $orig_rows, true); |
336 | |
337 | if ( false === $final_pos ) { // This orig is paired with a blank final. |
338 | array_splice( $final_rows, $orig_pos, 0, -1 ); |
339 | } elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways. Pad final with blank rows. |
340 | $diff_pos = $final_pos - $orig_pos; |
341 | while ( $diff_pos < 0 ) |
342 | array_splice( $final_rows, $orig_pos, 0, $diff_pos++ ); |
343 | } elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways. Pad orig with blank rows. |
344 | $diff_pos = $orig_pos - $final_pos; |
345 | while ( $diff_pos < 0 ) |
346 | array_splice( $orig_rows, $orig_pos, 0, $diff_pos++ ); |
347 | } |
348 | } |
349 | |
350 | |
351 | // Pad the ends with blank rows if the columns aren't the same length |
352 | $diff_count = count($orig_rows) - count($final_rows); |
353 | if ( $diff_count < 0 ) { |
354 | while ( $diff_count < 0 ) |
355 | array_push($orig_rows, $diff_count++); |
356 | } elseif ( $diff_count > 0 ) { |
357 | $diff_count = -1 * $diff_count; |
358 | while ( $diff_count < 0 ) |
359 | array_push($final_rows, $diff_count++); |
360 | } |
361 | |
362 | return array($orig_matches, $final_matches, $orig_rows, $final_rows); |
363 | |
364 | /* |
365 | // Debug |
366 | echo "\n\n\n\n\n"; |
367 | |
368 | echo "-- DEBUG Matches: Orig -> Final --"; |
369 | |
370 | foreach ( $orig_matches as $o => $f ) { |
371 | echo "\n\n\n\n\n"; |
372 | echo "ORIG: $o, FINAL: $f\n"; |
373 | var_dump($orig[$o],$final[$f]); |
374 | } |
375 | echo "\n\n\n\n\n"; |
376 | |
377 | echo "-- DEBUG Matches: Final -> Orig --"; |
378 | |
379 | foreach ( $final_matches as $f => $o ) { |
380 | echo "\n\n\n\n\n"; |
381 | echo "FINAL: $f, ORIG: $o\n"; |
382 | var_dump($final[$f],$orig[$o]); |
383 | } |
384 | echo "\n\n\n\n\n"; |
385 | |
386 | echo "-- DEBUG Rows: Orig -- Final --"; |
387 | |
388 | echo "\n\n\n\n\n"; |
389 | foreach ( $orig_rows as $row => $o ) { |
390 | if ( $o < 0 ) |
391 | $o = 'X'; |
392 | $f = $final_rows[$row]; |
393 | if ( $f < 0 ) |
394 | $f = 'X'; |
395 | echo "$o -- $f\n"; |
396 | } |
397 | echo "\n\n\n\n\n"; |
398 | |
399 | echo "-- END DEBUG --"; |
400 | |
401 | echo "\n\n\n\n\n"; |
402 | |
403 | return array($orig_matches, $final_matches, $orig_rows, $final_rows); |
404 | */ |
405 | } |
406 | |
407 | /** |
408 | * Computes a number that is intended to reflect the "distance" between two strings. |
409 | * |
410 | * @since 2.6.0 |
411 | * |
412 | * @param string $string1 |
413 | * @param string $string2 |
414 | * @return int |
415 | */ |
416 | function compute_string_distance( $string1, $string2 ) { |
417 | // Vectors containing character frequency for all chars in each string |
418 | $chars1 = count_chars($string1); |
419 | $chars2 = count_chars($string2); |
420 | |
421 | // L1-norm of difference vector. |
422 | $difference = array_sum( array_map( array(&$this, 'difference'), $chars1, $chars2 ) ); |
423 | |
424 | // $string1 has zero length? Odd. Give huge penalty by not dividing. |
425 | if ( !$string1 ) |
426 | return $difference; |
427 | |
428 | // Return distance per charcter (of string1) |
429 | return $difference / strlen($string1); |
430 | } |
431 | |
432 | /** |
433 | * @ignore |
434 | * @since 2.6.0 |
435 | * |
436 | * @param int $a |
437 | * @param int $b |
438 | * @return int |
439 | */ |
440 | function difference( $a, $b ) { |
441 | return abs( $a - $b ); |
442 | } |
443 | |
444 | } |
445 | |
446 | /** |
447 | * Better word splitting than the PEAR package provides. |
448 | * |
449 | * @since 2.6.0 |
450 | * @uses Text_Diff_Renderer_inline Extends |
451 | */ |
452 | class WP_Text_Diff_Renderer_inline extends Text_Diff_Renderer_inline { |
453 | |
454 | /** |
455 | * @ignore |
456 | * @since 2.6.0 |
457 | * |
458 | * @param string $string |
459 | * @param string $newlineEscape |
460 | * @return string |
461 | */ |
462 | function _splitOnWords($string, $newlineEscape = "\n") { |
463 | $string = str_replace("\0", '', $string); |
464 | $words = preg_split( '/([^\w])/u', $string, -1, PREG_SPLIT_DELIM_CAPTURE ); |
465 | $words = str_replace( "\n", $newlineEscape, $words ); |
466 | return $words; |
467 | } |
468 | |
469 | } |
470 | |
471 | ?> |