Projects : mp-wp : mp-wp_genesis
1 | <?php |
2 | /** |
3 | * HTML/XHTML filter that only allows some elements and attributes |
4 | * |
5 | * Added wp_ prefix to avoid conflicts with existing kses users |
6 | * |
7 | * @version 0.2.2 |
8 | * @copyright (C) 2002, 2003, 2005 |
9 | * @author Ulf Harnhammar <metaur@users.sourceforge.net> |
10 | * |
11 | * @package External |
12 | * @subpackage KSES |
13 | * |
14 | * @internal |
15 | * *** CONTACT INFORMATION *** |
16 | * E-mail: metaur at users dot sourceforge dot net |
17 | * Web page: http://sourceforge.net/projects/kses |
18 | * Paper mail: Ulf Harnhammar |
19 | * Ymergatan 17 C |
20 | * 753 25 Uppsala |
21 | * SWEDEN |
22 | * |
23 | * [kses strips evil scripts!] |
24 | */ |
25 | |
26 | /** |
27 | * You can override this in your my-hacks.php file You can also override this |
28 | * in a plugin file. The my-hacks.php is deprecated in its usage. |
29 | * |
30 | * @since 1.2.0 |
31 | */ |
32 | if (!defined('CUSTOM_TAGS')) |
33 | define('CUSTOM_TAGS', false); |
34 | |
35 | if (!CUSTOM_TAGS) { |
36 | /** |
37 | * Kses global for default allowable HTML tags. |
38 | * |
39 | * Can be override by using CUSTOM_TAGS constant. |
40 | * |
41 | * @global array $allowedposttags |
42 | * @since 2.0.0 |
43 | */ |
44 | $allowedposttags = array( |
45 | 'address' => array(), |
46 | 'a' => array( |
47 | 'class' => array (), |
48 | 'href' => array (), |
49 | 'id' => array (), |
50 | 'title' => array (), |
51 | 'rel' => array (), |
52 | 'rev' => array (), |
53 | 'name' => array (), |
54 | 'target' => array()), |
55 | 'abbr' => array( |
56 | 'class' => array (), |
57 | 'title' => array ()), |
58 | 'acronym' => array( |
59 | 'title' => array ()), |
60 | 'b' => array(), |
61 | 'big' => array(), |
62 | 'blockquote' => array( |
63 | 'id' => array (), |
64 | 'cite' => array (), |
65 | 'class' => array(), |
66 | 'lang' => array(), |
67 | 'xml:lang' => array()), |
68 | 'br' => array ( |
69 | 'class' => array ()), |
70 | 'button' => array( |
71 | 'disabled' => array (), |
72 | 'name' => array (), |
73 | 'type' => array (), |
74 | 'value' => array ()), |
75 | 'caption' => array( |
76 | 'align' => array (), |
77 | 'class' => array ()), |
78 | 'cite' => array ( |
79 | 'class' => array(), |
80 | 'dir' => array(), |
81 | 'lang' => array(), |
82 | 'title' => array ()), |
83 | 'code' => array ( |
84 | 'style' => array()), |
85 | 'col' => array( |
86 | 'align' => array (), |
87 | 'char' => array (), |
88 | 'charoff' => array (), |
89 | 'span' => array (), |
90 | 'dir' => array(), |
91 | 'style' => array (), |
92 | 'valign' => array (), |
93 | 'width' => array ()), |
94 | 'del' => array( |
95 | 'datetime' => array ()), |
96 | 'dd' => array(), |
97 | 'div' => array( |
98 | 'align' => array (), |
99 | 'class' => array (), |
100 | 'dir' => array (), |
101 | 'lang' => array(), |
102 | 'style' => array (), |
103 | 'xml:lang' => array()), |
104 | 'dl' => array(), |
105 | 'dt' => array(), |
106 | 'em' => array(), |
107 | 'fieldset' => array(), |
108 | 'font' => array( |
109 | 'color' => array (), |
110 | 'face' => array (), |
111 | 'size' => array ()), |
112 | 'form' => array( |
113 | 'action' => array (), |
114 | 'accept' => array (), |
115 | 'accept-charset' => array (), |
116 | 'enctype' => array (), |
117 | 'method' => array (), |
118 | 'name' => array (), |
119 | 'target' => array ()), |
120 | 'h1' => array( |
121 | 'align' => array (), |
122 | 'class' => array ()), |
123 | 'h2' => array( |
124 | 'align' => array (), |
125 | 'class' => array ()), |
126 | 'h3' => array( |
127 | 'align' => array (), |
128 | 'class' => array ()), |
129 | 'h4' => array( |
130 | 'align' => array (), |
131 | 'class' => array ()), |
132 | 'h5' => array( |
133 | 'align' => array (), |
134 | 'class' => array ()), |
135 | 'h6' => array( |
136 | 'align' => array (), |
137 | 'class' => array ()), |
138 | 'hr' => array( |
139 | 'align' => array (), |
140 | 'class' => array (), |
141 | 'noshade' => array (), |
142 | 'size' => array (), |
143 | 'width' => array ()), |
144 | 'i' => array(), |
145 | 'img' => array( |
146 | 'alt' => array (), |
147 | 'align' => array (), |
148 | 'border' => array (), |
149 | 'class' => array (), |
150 | 'height' => array (), |
151 | 'hspace' => array (), |
152 | 'longdesc' => array (), |
153 | 'vspace' => array (), |
154 | 'src' => array (), |
155 | 'style' => array (), |
156 | 'width' => array ()), |
157 | 'ins' => array( |
158 | 'datetime' => array (), |
159 | 'cite' => array ()), |
160 | 'kbd' => array(), |
161 | 'label' => array( |
162 | 'for' => array ()), |
163 | 'legend' => array( |
164 | 'align' => array ()), |
165 | 'li' => array ( |
166 | 'align' => array (), |
167 | 'class' => array ()), |
168 | 'p' => array( |
169 | 'class' => array (), |
170 | 'align' => array (), |
171 | 'dir' => array(), |
172 | 'lang' => array(), |
173 | 'style' => array (), |
174 | 'xml:lang' => array()), |
175 | 'pre' => array( |
176 | 'style' => array(), |
177 | 'width' => array ()), |
178 | 'q' => array( |
179 | 'cite' => array ()), |
180 | 's' => array(), |
181 | 'span' => array ( |
182 | 'class' => array (), |
183 | 'dir' => array (), |
184 | 'align' => array (), |
185 | 'lang' => array (), |
186 | 'style' => array (), |
187 | 'title' => array (), |
188 | 'xml:lang' => array()), |
189 | 'strike' => array(), |
190 | 'strong' => array(), |
191 | 'sub' => array(), |
192 | 'sup' => array(), |
193 | 'table' => array( |
194 | 'align' => array (), |
195 | 'bgcolor' => array (), |
196 | 'border' => array (), |
197 | 'cellpadding' => array (), |
198 | 'cellspacing' => array (), |
199 | 'class' => array (), |
200 | 'dir' => array(), |
201 | 'id' => array(), |
202 | 'rules' => array (), |
203 | 'style' => array (), |
204 | 'summary' => array (), |
205 | 'width' => array ()), |
206 | 'tbody' => array( |
207 | 'align' => array (), |
208 | 'char' => array (), |
209 | 'charoff' => array (), |
210 | 'valign' => array ()), |
211 | 'td' => array( |
212 | 'abbr' => array (), |
213 | 'align' => array (), |
214 | 'axis' => array (), |
215 | 'bgcolor' => array (), |
216 | 'char' => array (), |
217 | 'charoff' => array (), |
218 | 'class' => array (), |
219 | 'colspan' => array (), |
220 | 'dir' => array(), |
221 | 'headers' => array (), |
222 | 'height' => array (), |
223 | 'nowrap' => array (), |
224 | 'rowspan' => array (), |
225 | 'scope' => array (), |
226 | 'style' => array (), |
227 | 'valign' => array (), |
228 | 'width' => array ()), |
229 | 'textarea' => array( |
230 | 'cols' => array (), |
231 | 'rows' => array (), |
232 | 'disabled' => array (), |
233 | 'name' => array (), |
234 | 'readonly' => array ()), |
235 | 'tfoot' => array( |
236 | 'align' => array (), |
237 | 'char' => array (), |
238 | 'class' => array (), |
239 | 'charoff' => array (), |
240 | 'valign' => array ()), |
241 | 'th' => array( |
242 | 'abbr' => array (), |
243 | 'align' => array (), |
244 | 'axis' => array (), |
245 | 'bgcolor' => array (), |
246 | 'char' => array (), |
247 | 'charoff' => array (), |
248 | 'class' => array (), |
249 | 'colspan' => array (), |
250 | 'headers' => array (), |
251 | 'height' => array (), |
252 | 'nowrap' => array (), |
253 | 'rowspan' => array (), |
254 | 'scope' => array (), |
255 | 'valign' => array (), |
256 | 'width' => array ()), |
257 | 'thead' => array( |
258 | 'align' => array (), |
259 | 'char' => array (), |
260 | 'charoff' => array (), |
261 | 'class' => array (), |
262 | 'valign' => array ()), |
263 | 'title' => array(), |
264 | 'tr' => array( |
265 | 'align' => array (), |
266 | 'bgcolor' => array (), |
267 | 'char' => array (), |
268 | 'charoff' => array (), |
269 | 'class' => array (), |
270 | 'style' => array (), |
271 | 'valign' => array ()), |
272 | 'tt' => array(), |
273 | 'u' => array(), |
274 | 'ul' => array ( |
275 | 'class' => array (), |
276 | 'style' => array (), |
277 | 'type' => array ()), |
278 | 'ol' => array ( |
279 | 'class' => array (), |
280 | 'start' => array (), |
281 | 'style' => array (), |
282 | 'type' => array ()), |
283 | 'var' => array ()); |
284 | |
285 | /** |
286 | * Kses allowed HTML elements. |
287 | * |
288 | * @global array $allowedtags |
289 | * @since 1.0.0 |
290 | */ |
291 | $allowedtags = array( |
292 | 'a' => array( |
293 | 'href' => array (), |
294 | 'title' => array ()), |
295 | 'abbr' => array( |
296 | 'title' => array ()), |
297 | 'acronym' => array( |
298 | 'title' => array ()), |
299 | 'b' => array(), |
300 | 'blockquote' => array( |
301 | 'cite' => array ()), |
302 | // 'br' => array(), |
303 | 'cite' => array (), |
304 | 'code' => array(), |
305 | 'del' => array( |
306 | 'datetime' => array ()), |
307 | // 'dd' => array(), |
308 | // 'dl' => array(), |
309 | // 'dt' => array(), |
310 | 'em' => array (), 'i' => array (), |
311 | // 'ins' => array('datetime' => array(), 'cite' => array()), |
312 | // 'li' => array(), |
313 | // 'ol' => array(), |
314 | // 'p' => array(), |
315 | 'q' => array( |
316 | 'cite' => array ()), |
317 | 'strike' => array(), |
318 | 'strong' => array(), |
319 | // 'sub' => array(), |
320 | // 'sup' => array(), |
321 | // 'u' => array(), |
322 | // 'ul' => array(), |
323 | ); |
324 | } |
325 | |
326 | /** |
327 | * Filters content and keeps only allowable HTML elements. |
328 | * |
329 | * This function makes sure that only the allowed HTML element names, attribute |
330 | * names and attribute values plus only sane HTML entities will occur in |
331 | * $string. You have to remove any slashes from PHP's magic quotes before you |
332 | * call this function. |
333 | * |
334 | * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news', |
335 | * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common |
336 | * link protocols, except for 'javascript' which should not be allowed for |
337 | * untrusted users. |
338 | * |
339 | * @since 1.0.0 |
340 | * |
341 | * @param string $string Content to filter through kses |
342 | * @param array $allowed_html List of allowed HTML elements |
343 | * @param array $allowed_protocols Optional. Allowed protocol in links. |
344 | * @return string Filtered content with only allowed HTML elements |
345 | */ |
346 | function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { |
347 | $string = wp_kses_no_null($string); |
348 | $string = wp_kses_js_entities($string); |
349 | $string = wp_kses_normalize_entities($string); |
350 | $allowed_html_fixed = wp_kses_array_lc($allowed_html); |
351 | $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook |
352 | return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols); |
353 | } |
354 | |
355 | /** |
356 | * You add any kses hooks here. |
357 | * |
358 | * There is currently only one kses WordPress hook and it is called here. All |
359 | * parameters are passed to the hooks and expected to recieve a string. |
360 | * |
361 | * @since 1.0.0 |
362 | * |
363 | * @param string $string Content to filter through kses |
364 | * @param array $allowed_html List of allowed HTML elements |
365 | * @param array $allowed_protocols Allowed protocol in links |
366 | * @return string Filtered content through 'pre_kses' hook |
367 | */ |
368 | function wp_kses_hook($string, $allowed_html, $allowed_protocols) { |
369 | $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols); |
370 | return $string; |
371 | } |
372 | |
373 | /** |
374 | * This function returns kses' version number. |
375 | * |
376 | * @since 1.0.0 |
377 | * |
378 | * @return string KSES Version Number |
379 | */ |
380 | function wp_kses_version() { |
381 | return '0.2.2'; |
382 | } |
383 | |
384 | /** |
385 | * Searches for HTML tags, no matter how malformed. |
386 | * |
387 | * It also matches stray ">" characters. |
388 | * |
389 | * @since 1.0.0 |
390 | * |
391 | * @param string $string Content to filter |
392 | * @param array $allowed_html Allowed HTML elements |
393 | * @param array $allowed_protocols Allowed protocols to keep |
394 | * @return string Content with fixed HTML tags |
395 | */ |
396 | function wp_kses_split($string, $allowed_html, $allowed_protocols) { |
397 | return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', |
398 | function($match) { |
399 | return wp_kses_split2($match[1], $allowed_html, $allowed_protocols); |
400 | }, $string); |
401 | } |
402 | |
403 | /** |
404 | * Callback for wp_kses_split for fixing malformed HTML tags. |
405 | * |
406 | * This function does a lot of work. It rejects some very malformed things like |
407 | * <:::>. It returns an empty string, if the element isn't allowed (look ma, no |
408 | * strip_tags()!). Otherwise it splits the tag into an element and an attribute |
409 | * list. |
410 | * |
411 | * After the tag is split into an element and an attribute list, it is run |
412 | * through another filter which will remove illegal attributes and once that is |
413 | * completed, will be returned. |
414 | * |
415 | * @access private |
416 | * @since 1.0.0 |
417 | * @uses wp_kses_attr() |
418 | * |
419 | * @param string $string Content to filter |
420 | * @param array $allowed_html Allowed HTML elements |
421 | * @param array $allowed_protocols Allowed protocols to keep |
422 | * @return string Fixed HTML element |
423 | */ |
424 | function wp_kses_split2($string, $allowed_html, $allowed_protocols) { |
425 | $string = wp_kses_stripslashes($string); |
426 | |
427 | if (substr($string, 0, 1) != '<') |
428 | return '>'; |
429 | # It matched a ">" character |
430 | |
431 | if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) { |
432 | $string = str_replace(array('<!--', '-->'), '', $matches[1]); |
433 | while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) ) |
434 | $string = $newstring; |
435 | if ( $string == '' ) |
436 | return ''; |
437 | // prevent multiple dashes in comments |
438 | $string = preg_replace('/--+/', '-', $string); |
439 | // prevent three dashes closing a comment |
440 | $string = preg_replace('/-$/', '', $string); |
441 | return "<!--{$string}-->"; |
442 | } |
443 | # Allow HTML comments |
444 | |
445 | if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) |
446 | return ''; |
447 | # It's seriously malformed |
448 | |
449 | $slash = trim($matches[1]); |
450 | $elem = $matches[2]; |
451 | $attrlist = $matches[3]; |
452 | |
453 | if (!@isset($allowed_html[strtolower($elem)])) |
454 | return ''; |
455 | # They are using a not allowed HTML element |
456 | |
457 | if ($slash != '') |
458 | return "<$slash$elem>"; |
459 | # No attributes are allowed for closing elements |
460 | |
461 | return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols); |
462 | } |
463 | |
464 | /** |
465 | * Removes all attributes, if none are allowed for this element. |
466 | * |
467 | * If some are allowed it calls wp_kses_hair() to split them further, and then |
468 | * it builds up new HTML code from the data that kses_hair() returns. It also |
469 | * removes "<" and ">" characters, if there are any left. One more thing it does |
470 | * is to check if the tag has a closing XHTML slash, and if it does, it puts one |
471 | * in the returned code as well. |
472 | * |
473 | * @since 1.0.0 |
474 | * |
475 | * @param string $element HTML element/tag |
476 | * @param string $attr HTML attributes from HTML element to closing HTML element tag |
477 | * @param array $allowed_html Allowed HTML elements |
478 | * @param array $allowed_protocols Allowed protocols to keep |
479 | * @return string Sanitized HTML element |
480 | */ |
481 | function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { |
482 | # Is there a closing XHTML slash at the end of the attributes? |
483 | |
484 | $xhtml_slash = ''; |
485 | if (preg_match('%\s/\s*$%', $attr)) |
486 | $xhtml_slash = ' /'; |
487 | |
488 | # Are any attributes allowed at all for this element? |
489 | |
490 | if (@ count($allowed_html[strtolower($element)]) == 0) |
491 | return "<$element$xhtml_slash>"; |
492 | |
493 | # Split it |
494 | |
495 | $attrarr = wp_kses_hair($attr, $allowed_protocols); |
496 | |
497 | # Go through $attrarr, and save the allowed attributes for this element |
498 | # in $attr2 |
499 | |
500 | $attr2 = ''; |
501 | |
502 | foreach ($attrarr as $arreach) { |
503 | if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])])) |
504 | continue; # the attribute is not allowed |
505 | |
506 | $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])]; |
507 | if ($current == '') |
508 | continue; # the attribute is not allowed |
509 | |
510 | if (!is_array($current)) |
511 | $attr2 .= ' '.$arreach['whole']; |
512 | # there are no checks |
513 | |
514 | else { |
515 | # there are some checks |
516 | $ok = true; |
517 | foreach ($current as $currkey => $currval) |
518 | if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) { |
519 | $ok = false; |
520 | break; |
521 | } |
522 | |
523 | if ($ok) |
524 | $attr2 .= ' '.$arreach['whole']; # it passed them |
525 | } # if !is_array($current) |
526 | } # foreach |
527 | |
528 | # Remove any "<" or ">" characters |
529 | |
530 | $attr2 = preg_replace('/[<>]/', '', $attr2); |
531 | |
532 | return "<$element$attr2$xhtml_slash>"; |
533 | } |
534 | |
535 | /** |
536 | * Builds an attribute list from string containing attributes. |
537 | * |
538 | * This function does a lot of work. It parses an attribute list into an array |
539 | * with attribute data, and tries to do the right thing even if it gets weird |
540 | * input. It will add quotes around attribute values that don't have any quotes |
541 | * or apostrophes around them, to make it easier to produce HTML code that will |
542 | * conform to W3C's HTML specification. It will also remove bad URL protocols |
543 | * from attribute values. It also reduces duplicate attributes by using the |
544 | * attribute defined first (foo='bar' foo='baz' will result in foo='bar'). |
545 | * |
546 | * @since 1.0.0 |
547 | * |
548 | * @param string $attr Attribute list from HTML element to closing HTML element tag |
549 | * @param array $allowed_protocols Allowed protocols to keep |
550 | * @return array List of attributes after parsing |
551 | */ |
552 | function wp_kses_hair($attr, $allowed_protocols) { |
553 | $attrarr = array (); |
554 | $mode = 0; |
555 | $attrname = ''; |
556 | $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action'); |
557 | |
558 | # Loop through the whole attribute list |
559 | |
560 | while (strlen($attr) != 0) { |
561 | $working = 0; # Was the last operation successful? |
562 | |
563 | switch ($mode) { |
564 | case 0 : # attribute name, href for instance |
565 | |
566 | if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { |
567 | $attrname = $match[1]; |
568 | $working = $mode = 1; |
569 | $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); |
570 | } |
571 | |
572 | break; |
573 | |
574 | case 1 : # equals sign or valueless ("selected") |
575 | |
576 | if (preg_match('/^\s*=\s*/', $attr)) # equals sign |
577 | { |
578 | $working = 1; |
579 | $mode = 2; |
580 | $attr = preg_replace('/^\s*=\s*/', '', $attr); |
581 | break; |
582 | } |
583 | |
584 | if (preg_match('/^\s+/', $attr)) # valueless |
585 | { |
586 | $working = 1; |
587 | $mode = 0; |
588 | if(FALSE === array_key_exists($attrname, $attrarr)) { |
589 | $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); |
590 | } |
591 | $attr = preg_replace('/^\s+/', '', $attr); |
592 | } |
593 | |
594 | break; |
595 | |
596 | case 2 : # attribute value, a URL after href= for instance |
597 | |
598 | if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) |
599 | # "value" |
600 | { |
601 | $thisval = $match[1]; |
602 | if ( in_array($attrname, $uris) ) |
603 | $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
604 | |
605 | if(FALSE === array_key_exists($attrname, $attrarr)) { |
606 | $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); |
607 | } |
608 | $working = 1; |
609 | $mode = 0; |
610 | $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); |
611 | break; |
612 | } |
613 | |
614 | if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) |
615 | # 'value' |
616 | { |
617 | $thisval = $match[1]; |
618 | if ( in_array($attrname, $uris) ) |
619 | $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
620 | |
621 | if(FALSE === array_key_exists($attrname, $attrarr)) { |
622 | $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n'); |
623 | } |
624 | $working = 1; |
625 | $mode = 0; |
626 | $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); |
627 | break; |
628 | } |
629 | |
630 | if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) |
631 | # value |
632 | { |
633 | $thisval = $match[1]; |
634 | if ( in_array($attrname, $uris) ) |
635 | $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols); |
636 | |
637 | if(FALSE === array_key_exists($attrname, $attrarr)) { |
638 | $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n'); |
639 | } |
640 | # We add quotes to conform to W3C's HTML spec. |
641 | $working = 1; |
642 | $mode = 0; |
643 | $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); |
644 | } |
645 | |
646 | break; |
647 | } # switch |
648 | |
649 | if ($working == 0) # not well formed, remove and try again |
650 | { |
651 | $attr = wp_kses_html_error($attr); |
652 | $mode = 0; |
653 | } |
654 | } # while |
655 | |
656 | if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr)) |
657 | # special case, for when the attribute list ends with a valueless |
658 | # attribute like "selected" |
659 | $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); |
660 | |
661 | return $attrarr; |
662 | } |
663 | |
664 | /** |
665 | * Performs different checks for attribute values. |
666 | * |
667 | * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" |
668 | * and "valueless" with even more checks to come soon. |
669 | * |
670 | * @since 1.0.0 |
671 | * |
672 | * @param string $value Attribute value |
673 | * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' |
674 | * @param string $checkname What $checkvalue is checking for. |
675 | * @param mixed $checkvalue What constraint the value should pass |
676 | * @return bool Whether check passes (true) or not (false) |
677 | */ |
678 | function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { |
679 | $ok = true; |
680 | |
681 | switch (strtolower($checkname)) { |
682 | case 'maxlen' : |
683 | # The maxlen check makes sure that the attribute value has a length not |
684 | # greater than the given value. This can be used to avoid Buffer Overflows |
685 | # in WWW clients and various Internet servers. |
686 | |
687 | if (strlen($value) > $checkvalue) |
688 | $ok = false; |
689 | break; |
690 | |
691 | case 'minlen' : |
692 | # The minlen check makes sure that the attribute value has a length not |
693 | # smaller than the given value. |
694 | |
695 | if (strlen($value) < $checkvalue) |
696 | $ok = false; |
697 | break; |
698 | |
699 | case 'maxval' : |
700 | # The maxval check does two things: it checks that the attribute value is |
701 | # an integer from 0 and up, without an excessive amount of zeroes or |
702 | # whitespace (to avoid Buffer Overflows). It also checks that the attribute |
703 | # value is not greater than the given value. |
704 | # This check can be used to avoid Denial of Service attacks. |
705 | |
706 | if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) |
707 | $ok = false; |
708 | if ($value > $checkvalue) |
709 | $ok = false; |
710 | break; |
711 | |
712 | case 'minval' : |
713 | # The minval check checks that the attribute value is a positive integer, |
714 | # and that it is not smaller than the given value. |
715 | |
716 | if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) |
717 | $ok = false; |
718 | if ($value < $checkvalue) |
719 | $ok = false; |
720 | break; |
721 | |
722 | case 'valueless' : |
723 | # The valueless check checks if the attribute has a value |
724 | # (like <a href="blah">) or not (<option selected>). If the given value |
725 | # is a "y" or a "Y", the attribute must not have a value. |
726 | # If the given value is an "n" or an "N", the attribute must have one. |
727 | |
728 | if (strtolower($checkvalue) != $vless) |
729 | $ok = false; |
730 | break; |
731 | } # switch |
732 | |
733 | return $ok; |
734 | } |
735 | |
736 | /** |
737 | * Sanitize string from bad protocols. |
738 | * |
739 | * This function removes all non-allowed protocols from the beginning of |
740 | * $string. It ignores whitespace and the case of the letters, and it does |
741 | * understand HTML entities. It does its work in a while loop, so it won't be |
742 | * fooled by a string like "javascript:javascript:alert(57)". |
743 | * |
744 | * @since 1.0.0 |
745 | * |
746 | * @param string $string Content to filter bad protocols from |
747 | * @param array $allowed_protocols Allowed protocols to keep |
748 | * @return string Filtered content |
749 | */ |
750 | function wp_kses_bad_protocol($string, $allowed_protocols) { |
751 | $string = wp_kses_no_null($string); |
752 | $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" |
753 | $string2 = $string.'a'; |
754 | |
755 | while ($string != $string2) { |
756 | $string2 = $string; |
757 | $string = wp_kses_bad_protocol_once($string, $allowed_protocols); |
758 | } # while |
759 | |
760 | return $string; |
761 | } |
762 | |
763 | /** |
764 | * Removes any NULL characters in $string. |
765 | * |
766 | * @since 1.0.0 |
767 | * |
768 | * @param string $string |
769 | * @return string |
770 | */ |
771 | function wp_kses_no_null($string) { |
772 | $string = preg_replace('/\0+/', '', $string); |
773 | $string = preg_replace('/(\\\\0)+/', '', $string); |
774 | |
775 | return $string; |
776 | } |
777 | |
778 | /** |
779 | * Strips slashes from in front of quotes. |
780 | * |
781 | * This function changes the character sequence \" to just ". It leaves all |
782 | * other slashes alone. It's really weird, but the quoting from |
783 | * preg_replace(//e) seems to require this. |
784 | * |
785 | * @since 1.0.0 |
786 | * |
787 | * @param string $string String to strip slashes |
788 | * @return string Fixed strings with quoted slashes |
789 | */ |
790 | function wp_kses_stripslashes($string) { |
791 | return preg_replace('%\\\\"%', '"', $string); |
792 | } |
793 | |
794 | /** |
795 | * Goes through an array and changes the keys to all lower case. |
796 | * |
797 | * @since 1.0.0 |
798 | * |
799 | * @param array $inarray Unfiltered array |
800 | * @return array Fixed array with all lowercase keys |
801 | */ |
802 | function wp_kses_array_lc($inarray) { |
803 | $outarray = array (); |
804 | |
805 | foreach ( (array) $inarray as $inkey => $inval) { |
806 | $outkey = strtolower($inkey); |
807 | $outarray[$outkey] = array (); |
808 | |
809 | foreach ( (array) $inval as $inkey2 => $inval2) { |
810 | $outkey2 = strtolower($inkey2); |
811 | $outarray[$outkey][$outkey2] = $inval2; |
812 | } # foreach $inval |
813 | } # foreach $inarray |
814 | |
815 | return $outarray; |
816 | } |
817 | |
818 | /** |
819 | * Removes the HTML JavaScript entities found in early versions of Netscape 4. |
820 | * |
821 | * @since 1.0.0 |
822 | * |
823 | * @param string $string |
824 | * @return string |
825 | */ |
826 | function wp_kses_js_entities($string) { |
827 | return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); |
828 | } |
829 | |
830 | /** |
831 | * Handles parsing errors in wp_kses_hair(). |
832 | * |
833 | * The general plan is to remove everything to and including some whitespace, |
834 | * but it deals with quotes and apostrophes as well. |
835 | * |
836 | * @since 1.0.0 |
837 | * |
838 | * @param string $string |
839 | * @return string |
840 | */ |
841 | function wp_kses_html_error($string) { |
842 | return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); |
843 | } |
844 | |
845 | /** |
846 | * Sanitizes content from bad protocols and other characters. |
847 | * |
848 | * This function searches for URL protocols at the beginning of $string, while |
849 | * handling whitespace and HTML entities. |
850 | * |
851 | * @since 1.0.0 |
852 | * |
853 | * @param string $string Content to check for bad protocols |
854 | * @param string $allowed_protocols Allowed protocols |
855 | * @return string Sanitized content |
856 | */ |
857 | function wp_kses_bad_protocol_once($string, $allowed_protocols) { |
858 | global $_kses_allowed_protocols; |
859 | $_kses_allowed_protocols = $allowed_protocols; |
860 | |
861 | $string2 = preg_split('/:|:|:/i', $string, 2); |
862 | if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) ) |
863 | $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]); |
864 | else |
865 | $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string); |
866 | |
867 | return $string; |
868 | } |
869 | |
870 | /** |
871 | * Callback for wp_kses_bad_protocol_once() regular expression. |
872 | * |
873 | * This function processes URL protocols, checks to see if they're in the |
874 | * white-list or not, and returns different data depending on the answer. |
875 | * |
876 | * @access private |
877 | * @since 1.0.0 |
878 | * |
879 | * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols |
880 | * @return string Sanitized content |
881 | */ |
882 | function wp_kses_bad_protocol_once2($matches) { |
883 | global $_kses_allowed_protocols; |
884 | |
885 | if ( is_array($matches) ) { |
886 | if ( ! isset($matches[1]) || empty($matches[1]) ) |
887 | return ''; |
888 | |
889 | $string = $matches[1]; |
890 | } else { |
891 | $string = $matches; |
892 | } |
893 | |
894 | $string2 = wp_kses_decode_entities($string); |
895 | $string2 = preg_replace('/\s/', '', $string2); |
896 | $string2 = wp_kses_no_null($string2); |
897 | $string2 = preg_replace('/\xad+/', '', $string2); |
898 | # deals with Opera "feature" |
899 | $string2 = strtolower($string2); |
900 | |
901 | $allowed = false; |
902 | foreach ( (array) $_kses_allowed_protocols as $one_protocol) |
903 | if (strtolower($one_protocol) == $string2) { |
904 | $allowed = true; |
905 | break; |
906 | } |
907 | |
908 | if ($allowed) |
909 | return "$string2:"; |
910 | else |
911 | return ''; |
912 | } |
913 | |
914 | /** |
915 | * Converts and fixes HTML entities. |
916 | * |
917 | * This function normalizes HTML entities. It will convert "AT&T" to the correct |
918 | * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
919 | * |
920 | * @since 1.0.0 |
921 | * |
922 | * @param string $string Content to normalize entities |
923 | * @return string Content with normalized entities |
924 | */ |
925 | function wp_kses_normalize_entities($string) { |
926 | # Disarm all entities by converting & to & |
927 | |
928 | $string = str_replace('&', '&', $string); |
929 | |
930 | # Change back the allowed entities in our entity whitelist |
931 | |
932 | $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); |
933 | $string = preg_replace_callback('/&#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string); |
934 | $string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string); |
935 | |
936 | return $string; |
937 | } |
938 | |
939 | /** |
940 | * Callback for wp_kses_normalize_entities() regular expression. |
941 | * |
942 | * This function helps wp_kses_normalize_entities() to only accept 16 bit values |
943 | * and nothing more for &#number; entities. |
944 | * |
945 | * @access private |
946 | * @since 1.0.0 |
947 | * |
948 | * @param array $matches preg_replace_callback() matches array |
949 | * @return string Correctly encoded entity |
950 | */ |
951 | function wp_kses_normalize_entities2($matches) { |
952 | if ( ! isset($matches[1]) || empty($matches[1]) ) |
953 | return ''; |
954 | |
955 | $i = $matches[1]; |
956 | return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&#$i;" : "&#$i;" ); |
957 | } |
958 | |
959 | /** |
960 | * Callback for wp_kses_normalize_entities() for regular expression. |
961 | * |
962 | * This function helps wp_kses_normalize_entities() to only accept valid Unicode |
963 | * numeric entities in hex form. |
964 | * |
965 | * @access private |
966 | * |
967 | * @param array $matches preg_replace_callback() matches array |
968 | * @return string Correctly encoded entity |
969 | */ |
970 | function wp_kses_normalize_entities3($matches) { |
971 | if ( ! isset($matches[2]) || empty($matches[2]) ) |
972 | return ''; |
973 | |
974 | $hexchars = $matches[2]; |
975 | return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : "&#x$hexchars;" ); |
976 | } |
977 | |
978 | /** |
979 | * Helper function to determine if a Unicode value is valid. |
980 | * |
981 | * @param int $i Unicode value |
982 | * @return bool true if the value was a valid Unicode number |
983 | */ |
984 | function valid_unicode($i) { |
985 | return ( $i == 0x9 || $i == 0xa || $i == 0xd || |
986 | ($i >= 0x20 && $i <= 0xd7ff) || |
987 | ($i >= 0xe000 && $i <= 0xfffd) || |
988 | ($i >= 0x10000 && $i <= 0x10ffff) ); |
989 | } |
990 | |
991 | /** |
992 | * Convert all entities to their character counterparts. |
993 | * |
994 | * This function decodes numeric HTML entities (A and A). It doesn't do |
995 | * anything with other entities like ä, but we don't need them in the URL |
996 | * protocol whitelisting system anyway. |
997 | * |
998 | * @since 1.0.0 |
999 | * |
1000 | * @param string $string Content to change entities |
1001 | * @return string Content after decoded entities |
1002 | */ |
1003 | function wp_kses_decode_entities($string) { |
1004 | $string = preg_replace_callback('/&#([0-9]+);/', function($match) { |
1005 | return chr($match[1]); |
1006 | }, $string); |
1007 | $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', function($match) { |
1008 | return chr(hexdec($match[1])); |
1009 | }, $string); |
1010 | |
1011 | return $string; |
1012 | } |
1013 | |
1014 | /** |
1015 | * Sanitize content with allowed HTML Kses rules. |
1016 | * |
1017 | * @since 1.0.0 |
1018 | * @uses $allowedtags |
1019 | * |
1020 | * @param string $data Content to filter |
1021 | * @return string Filtered content |
1022 | */ |
1023 | function wp_filter_kses($data) { |
1024 | global $allowedtags; |
1025 | return addslashes( wp_kses(stripslashes( $data ), $allowedtags) ); |
1026 | } |
1027 | |
1028 | /** |
1029 | * Sanitize content for allowed HTML tags for post content. |
1030 | * |
1031 | * Post content refers to the page contents of the 'post' type and not $_POST |
1032 | * data from forms. |
1033 | * |
1034 | * @since 2.0.0 |
1035 | * @uses $allowedposttags |
1036 | * |
1037 | * @param string $data Post content to filter |
1038 | * @return string Filtered post content with allowed HTML tags and attributes intact. |
1039 | */ |
1040 | function wp_filter_post_kses($data) { |
1041 | global $allowedposttags; |
1042 | return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) ); |
1043 | } |
1044 | |
1045 | /** |
1046 | * Strips all of the HTML in the content. |
1047 | * |
1048 | * @since 2.1.0 |
1049 | * |
1050 | * @param string $data Content to strip all HTML from |
1051 | * @return string Filtered content without any HTML |
1052 | */ |
1053 | function wp_filter_nohtml_kses($data) { |
1054 | return addslashes ( wp_kses(stripslashes( $data ), array()) ); |
1055 | } |
1056 | |
1057 | /** |
1058 | * Adds all Kses input form content filters. |
1059 | * |
1060 | * All hooks have default priority. The wp_filter_kses() function is added to |
1061 | * the 'pre_comment_content' and 'title_save_pre' hooks. |
1062 | * |
1063 | * The wp_filter_post_kses() function is added to the 'content_save_pre', |
1064 | * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks. |
1065 | * |
1066 | * @since 2.0.0 |
1067 | * @uses add_filter() See description for what functions are added to what hooks. |
1068 | */ |
1069 | function kses_init_filters() { |
1070 | // Normal filtering. |
1071 | add_filter('pre_comment_content', 'wp_filter_kses'); |
1072 | add_filter('title_save_pre', 'wp_filter_kses'); |
1073 | |
1074 | // Post filtering |
1075 | add_filter('content_save_pre', 'wp_filter_post_kses'); |
1076 | add_filter('excerpt_save_pre', 'wp_filter_post_kses'); |
1077 | add_filter('content_filtered_save_pre', 'wp_filter_post_kses'); |
1078 | } |
1079 | |
1080 | /** |
1081 | * Removes all Kses input form content filters. |
1082 | * |
1083 | * A quick procedural method to removing all of the filters that kses uses for |
1084 | * content in WordPress Loop. |
1085 | * |
1086 | * Does not remove the kses_init() function from 'init' hook (priority is |
1087 | * default). Also does not remove kses_init() function from 'set_current_user' |
1088 | * hook (priority is also default). |
1089 | * |
1090 | * @since 2.0.6 |
1091 | */ |
1092 | function kses_remove_filters() { |
1093 | // Normal filtering. |
1094 | remove_filter('pre_comment_content', 'wp_filter_kses'); |
1095 | remove_filter('title_save_pre', 'wp_filter_kses'); |
1096 | |
1097 | // Post filtering |
1098 | remove_filter('content_save_pre', 'wp_filter_post_kses'); |
1099 | remove_filter('excerpt_save_pre', 'wp_filter_post_kses'); |
1100 | remove_filter('content_filtered_save_pre', 'wp_filter_post_kses'); |
1101 | } |
1102 | |
1103 | /** |
1104 | * Sets up most of the Kses filters for input form content. |
1105 | * |
1106 | * If you remove the kses_init() function from 'init' hook and |
1107 | * 'set_current_user' (priority is default), then none of the Kses filter hooks |
1108 | * will be added. |
1109 | * |
1110 | * First removes all of the Kses filters in case the current user does not need |
1111 | * to have Kses filter the content. If the user does not have unfiltered html |
1112 | * capability, then Kses filters are added. |
1113 | * |
1114 | * @uses kses_remove_filters() Removes the Kses filters |
1115 | * @uses kses_init_filters() Adds the Kses filters back if the user |
1116 | * does not have unfiltered HTML capability. |
1117 | * @since 2.0.0 |
1118 | */ |
1119 | function kses_init() { |
1120 | kses_remove_filters(); |
1121 | |
1122 | if (current_user_can('unfiltered_html') == false) |
1123 | kses_init_filters(); |
1124 | } |
1125 | |
1126 | add_action('init', 'kses_init'); |
1127 | add_action('set_current_user', 'kses_init'); |
1128 | ?> |