Projects : mp-wp : mp-wp_genesis

mp-wp/wp-includes/kses.php

Dir - Raw

1<?php
2/**
3 * HTML/XHTML filter that only allows some elements and attributes
4 *
5 * Added wp_ prefix to avoid conflicts with existing kses users
6 *
7 * @version 0.2.2
8 * @copyright (C) 2002, 2003, 2005
9 * @author Ulf Harnhammar <metaur@users.sourceforge.net>
10 *
11 * @package External
12 * @subpackage KSES
13 *
14 * @internal
15 * *** CONTACT INFORMATION ***
16 * E-mail: metaur at users dot sourceforge dot net
17 * Web page: http://sourceforge.net/projects/kses
18 * Paper mail: Ulf Harnhammar
19 * Ymergatan 17 C
20 * 753 25 Uppsala
21 * SWEDEN
22 *
23 * [kses strips evil scripts!]
24 */
25
26/**
27 * You can override this in your my-hacks.php file You can also override this
28 * in a plugin file. The my-hacks.php is deprecated in its usage.
29 *
30 * @since 1.2.0
31 */
32if (!defined('CUSTOM_TAGS'))
33 define('CUSTOM_TAGS', false);
34
35if (!CUSTOM_TAGS) {
36 /**
37 * Kses global for default allowable HTML tags.
38 *
39 * Can be override by using CUSTOM_TAGS constant.
40 *
41 * @global array $allowedposttags
42 * @since 2.0.0
43 */
44 $allowedposttags = array(
45 'address' => array(),
46 'a' => array(
47 'class' => array (),
48 'href' => array (),
49 'id' => array (),
50 'title' => array (),
51 'rel' => array (),
52 'rev' => array (),
53 'name' => array (),
54 'target' => array()),
55 'abbr' => array(
56 'class' => array (),
57 'title' => array ()),
58 'acronym' => array(
59 'title' => array ()),
60 'b' => array(),
61 'big' => array(),
62 'blockquote' => array(
63 'id' => array (),
64 'cite' => array (),
65 'class' => array(),
66 'lang' => array(),
67 'xml:lang' => array()),
68 'br' => array (
69 'class' => array ()),
70 'button' => array(
71 'disabled' => array (),
72 'name' => array (),
73 'type' => array (),
74 'value' => array ()),
75 'caption' => array(
76 'align' => array (),
77 'class' => array ()),
78 'cite' => array (
79 'class' => array(),
80 'dir' => array(),
81 'lang' => array(),
82 'title' => array ()),
83 'code' => array (
84 'style' => array()),
85 'col' => array(
86 'align' => array (),
87 'char' => array (),
88 'charoff' => array (),
89 'span' => array (),
90 'dir' => array(),
91 'style' => array (),
92 'valign' => array (),
93 'width' => array ()),
94 'del' => array(
95 'datetime' => array ()),
96 'dd' => array(),
97 'div' => array(
98 'align' => array (),
99 'class' => array (),
100 'dir' => array (),
101 'lang' => array(),
102 'style' => array (),
103 'xml:lang' => array()),
104 'dl' => array(),
105 'dt' => array(),
106 'em' => array(),
107 'fieldset' => array(),
108 'font' => array(
109 'color' => array (),
110 'face' => array (),
111 'size' => array ()),
112 'form' => array(
113 'action' => array (),
114 'accept' => array (),
115 'accept-charset' => array (),
116 'enctype' => array (),
117 'method' => array (),
118 'name' => array (),
119 'target' => array ()),
120 'h1' => array(
121 'align' => array (),
122 'class' => array ()),
123 'h2' => array(
124 'align' => array (),
125 'class' => array ()),
126 'h3' => array(
127 'align' => array (),
128 'class' => array ()),
129 'h4' => array(
130 'align' => array (),
131 'class' => array ()),
132 'h5' => array(
133 'align' => array (),
134 'class' => array ()),
135 'h6' => array(
136 'align' => array (),
137 'class' => array ()),
138 'hr' => array(
139 'align' => array (),
140 'class' => array (),
141 'noshade' => array (),
142 'size' => array (),
143 'width' => array ()),
144 'i' => array(),
145 'img' => array(
146 'alt' => array (),
147 'align' => array (),
148 'border' => array (),
149 'class' => array (),
150 'height' => array (),
151 'hspace' => array (),
152 'longdesc' => array (),
153 'vspace' => array (),
154 'src' => array (),
155 'style' => array (),
156 'width' => array ()),
157 'ins' => array(
158 'datetime' => array (),
159 'cite' => array ()),
160 'kbd' => array(),
161 'label' => array(
162 'for' => array ()),
163 'legend' => array(
164 'align' => array ()),
165 'li' => array (
166 'align' => array (),
167 'class' => array ()),
168 'p' => array(
169 'class' => array (),
170 'align' => array (),
171 'dir' => array(),
172 'lang' => array(),
173 'style' => array (),
174 'xml:lang' => array()),
175 'pre' => array(
176 'style' => array(),
177 'width' => array ()),
178 'q' => array(
179 'cite' => array ()),
180 's' => array(),
181 'span' => array (
182 'class' => array (),
183 'dir' => array (),
184 'align' => array (),
185 'lang' => array (),
186 'style' => array (),
187 'title' => array (),
188 'xml:lang' => array()),
189 'strike' => array(),
190 'strong' => array(),
191 'sub' => array(),
192 'sup' => array(),
193 'table' => array(
194 'align' => array (),
195 'bgcolor' => array (),
196 'border' => array (),
197 'cellpadding' => array (),
198 'cellspacing' => array (),
199 'class' => array (),
200 'dir' => array(),
201 'id' => array(),
202 'rules' => array (),
203 'style' => array (),
204 'summary' => array (),
205 'width' => array ()),
206 'tbody' => array(
207 'align' => array (),
208 'char' => array (),
209 'charoff' => array (),
210 'valign' => array ()),
211 'td' => array(
212 'abbr' => array (),
213 'align' => array (),
214 'axis' => array (),
215 'bgcolor' => array (),
216 'char' => array (),
217 'charoff' => array (),
218 'class' => array (),
219 'colspan' => array (),
220 'dir' => array(),
221 'headers' => array (),
222 'height' => array (),
223 'nowrap' => array (),
224 'rowspan' => array (),
225 'scope' => array (),
226 'style' => array (),
227 'valign' => array (),
228 'width' => array ()),
229 'textarea' => array(
230 'cols' => array (),
231 'rows' => array (),
232 'disabled' => array (),
233 'name' => array (),
234 'readonly' => array ()),
235 'tfoot' => array(
236 'align' => array (),
237 'char' => array (),
238 'class' => array (),
239 'charoff' => array (),
240 'valign' => array ()),
241 'th' => array(
242 'abbr' => array (),
243 'align' => array (),
244 'axis' => array (),
245 'bgcolor' => array (),
246 'char' => array (),
247 'charoff' => array (),
248 'class' => array (),
249 'colspan' => array (),
250 'headers' => array (),
251 'height' => array (),
252 'nowrap' => array (),
253 'rowspan' => array (),
254 'scope' => array (),
255 'valign' => array (),
256 'width' => array ()),
257 'thead' => array(
258 'align' => array (),
259 'char' => array (),
260 'charoff' => array (),
261 'class' => array (),
262 'valign' => array ()),
263 'title' => array(),
264 'tr' => array(
265 'align' => array (),
266 'bgcolor' => array (),
267 'char' => array (),
268 'charoff' => array (),
269 'class' => array (),
270 'style' => array (),
271 'valign' => array ()),
272 'tt' => array(),
273 'u' => array(),
274 'ul' => array (
275 'class' => array (),
276 'style' => array (),
277 'type' => array ()),
278 'ol' => array (
279 'class' => array (),
280 'start' => array (),
281 'style' => array (),
282 'type' => array ()),
283 'var' => array ());
284
285 /**
286 * Kses allowed HTML elements.
287 *
288 * @global array $allowedtags
289 * @since 1.0.0
290 */
291 $allowedtags = array(
292 'a' => array(
293 'href' => array (),
294 'title' => array ()),
295 'abbr' => array(
296 'title' => array ()),
297 'acronym' => array(
298 'title' => array ()),
299 'b' => array(),
300 'blockquote' => array(
301 'cite' => array ()),
302 // 'br' => array(),
303 'cite' => array (),
304 'code' => array(),
305 'del' => array(
306 'datetime' => array ()),
307 // 'dd' => array(),
308 // 'dl' => array(),
309 // 'dt' => array(),
310 'em' => array (), 'i' => array (),
311 // 'ins' => array('datetime' => array(), 'cite' => array()),
312 // 'li' => array(),
313 // 'ol' => array(),
314 // 'p' => array(),
315 'q' => array(
316 'cite' => array ()),
317 'strike' => array(),
318 'strong' => array(),
319 // 'sub' => array(),
320 // 'sup' => array(),
321 // 'u' => array(),
322 // 'ul' => array(),
323 );
324}
325
326/**
327 * Filters content and keeps only allowable HTML elements.
328 *
329 * This function makes sure that only the allowed HTML element names, attribute
330 * names and attribute values plus only sane HTML entities will occur in
331 * $string. You have to remove any slashes from PHP's magic quotes before you
332 * call this function.
333 *
334 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
335 * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
336 * link protocols, except for 'javascript' which should not be allowed for
337 * untrusted users.
338 *
339 * @since 1.0.0
340 *
341 * @param string $string Content to filter through kses
342 * @param array $allowed_html List of allowed HTML elements
343 * @param array $allowed_protocols Optional. Allowed protocol in links.
344 * @return string Filtered content with only allowed HTML elements
345 */
346function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) {
347 $string = wp_kses_no_null($string);
348 $string = wp_kses_js_entities($string);
349 $string = wp_kses_normalize_entities($string);
350 $allowed_html_fixed = wp_kses_array_lc($allowed_html);
351 $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
352 return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
353}
354
355/**
356 * You add any kses hooks here.
357 *
358 * There is currently only one kses WordPress hook and it is called here. All
359 * parameters are passed to the hooks and expected to recieve a string.
360 *
361 * @since 1.0.0
362 *
363 * @param string $string Content to filter through kses
364 * @param array $allowed_html List of allowed HTML elements
365 * @param array $allowed_protocols Allowed protocol in links
366 * @return string Filtered content through 'pre_kses' hook
367 */
368function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
369 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
370 return $string;
371}
372
373/**
374 * This function returns kses' version number.
375 *
376 * @since 1.0.0
377 *
378 * @return string KSES Version Number
379 */
380function wp_kses_version() {
381 return '0.2.2';
382}
383
384/**
385 * Searches for HTML tags, no matter how malformed.
386 *
387 * It also matches stray ">" characters.
388 *
389 * @since 1.0.0
390 *
391 * @param string $string Content to filter
392 * @param array $allowed_html Allowed HTML elements
393 * @param array $allowed_protocols Allowed protocols to keep
394 * @return string Content with fixed HTML tags
395 */
396function wp_kses_split($string, $allowed_html, $allowed_protocols) {
397 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
398 function($match) {
399 return wp_kses_split2($match[1], $allowed_html, $allowed_protocols);
400 }, $string);
401}
402
403/**
404 * Callback for wp_kses_split for fixing malformed HTML tags.
405 *
406 * This function does a lot of work. It rejects some very malformed things like
407 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
408 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
409 * list.
410 *
411 * After the tag is split into an element and an attribute list, it is run
412 * through another filter which will remove illegal attributes and once that is
413 * completed, will be returned.
414 *
415 * @access private
416 * @since 1.0.0
417 * @uses wp_kses_attr()
418 *
419 * @param string $string Content to filter
420 * @param array $allowed_html Allowed HTML elements
421 * @param array $allowed_protocols Allowed protocols to keep
422 * @return string Fixed HTML element
423 */
424function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
425 $string = wp_kses_stripslashes($string);
426
427 if (substr($string, 0, 1) != '<')
428 return '&gt;';
429 # It matched a ">" character
430
431 if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
432 $string = str_replace(array('<!--', '-->'), '', $matches[1]);
433 while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
434 $string = $newstring;
435 if ( $string == '' )
436 return '';
437 // prevent multiple dashes in comments
438 $string = preg_replace('/--+/', '-', $string);
439 // prevent three dashes closing a comment
440 $string = preg_replace('/-$/', '', $string);
441 return "<!--{$string}-->";
442 }
443 # Allow HTML comments
444
445 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
446 return '';
447 # It's seriously malformed
448
449 $slash = trim($matches[1]);
450 $elem = $matches[2];
451 $attrlist = $matches[3];
452
453 if (!@isset($allowed_html[strtolower($elem)]))
454 return '';
455 # They are using a not allowed HTML element
456
457 if ($slash != '')
458 return "<$slash$elem>";
459 # No attributes are allowed for closing elements
460
461 return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
462}
463
464/**
465 * Removes all attributes, if none are allowed for this element.
466 *
467 * If some are allowed it calls wp_kses_hair() to split them further, and then
468 * it builds up new HTML code from the data that kses_hair() returns. It also
469 * removes "<" and ">" characters, if there are any left. One more thing it does
470 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
471 * in the returned code as well.
472 *
473 * @since 1.0.0
474 *
475 * @param string $element HTML element/tag
476 * @param string $attr HTML attributes from HTML element to closing HTML element tag
477 * @param array $allowed_html Allowed HTML elements
478 * @param array $allowed_protocols Allowed protocols to keep
479 * @return string Sanitized HTML element
480 */
481function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
482 # Is there a closing XHTML slash at the end of the attributes?
483
484 $xhtml_slash = '';
485 if (preg_match('%\s/\s*$%', $attr))
486 $xhtml_slash = ' /';
487
488 # Are any attributes allowed at all for this element?
489
490 if (@ count($allowed_html[strtolower($element)]) == 0)
491 return "<$element$xhtml_slash>";
492
493 # Split it
494
495 $attrarr = wp_kses_hair($attr, $allowed_protocols);
496
497 # Go through $attrarr, and save the allowed attributes for this element
498 # in $attr2
499
500 $attr2 = '';
501
502 foreach ($attrarr as $arreach) {
503 if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
504 continue; # the attribute is not allowed
505
506 $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
507 if ($current == '')
508 continue; # the attribute is not allowed
509
510 if (!is_array($current))
511 $attr2 .= ' '.$arreach['whole'];
512 # there are no checks
513
514 else {
515 # there are some checks
516 $ok = true;
517 foreach ($current as $currkey => $currval)
518 if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
519 $ok = false;
520 break;
521 }
522
523 if ($ok)
524 $attr2 .= ' '.$arreach['whole']; # it passed them
525 } # if !is_array($current)
526 } # foreach
527
528 # Remove any "<" or ">" characters
529
530 $attr2 = preg_replace('/[<>]/', '', $attr2);
531
532 return "<$element$attr2$xhtml_slash>";
533}
534
535/**
536 * Builds an attribute list from string containing attributes.
537 *
538 * This function does a lot of work. It parses an attribute list into an array
539 * with attribute data, and tries to do the right thing even if it gets weird
540 * input. It will add quotes around attribute values that don't have any quotes
541 * or apostrophes around them, to make it easier to produce HTML code that will
542 * conform to W3C's HTML specification. It will also remove bad URL protocols
543 * from attribute values. It also reduces duplicate attributes by using the
544 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
545 *
546 * @since 1.0.0
547 *
548 * @param string $attr Attribute list from HTML element to closing HTML element tag
549 * @param array $allowed_protocols Allowed protocols to keep
550 * @return array List of attributes after parsing
551 */
552function wp_kses_hair($attr, $allowed_protocols) {
553 $attrarr = array ();
554 $mode = 0;
555 $attrname = '';
556 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
557
558 # Loop through the whole attribute list
559
560 while (strlen($attr) != 0) {
561 $working = 0; # Was the last operation successful?
562
563 switch ($mode) {
564 case 0 : # attribute name, href for instance
565
566 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
567 $attrname = $match[1];
568 $working = $mode = 1;
569 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
570 }
571
572 break;
573
574 case 1 : # equals sign or valueless ("selected")
575
576 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
577 {
578 $working = 1;
579 $mode = 2;
580 $attr = preg_replace('/^\s*=\s*/', '', $attr);
581 break;
582 }
583
584 if (preg_match('/^\s+/', $attr)) # valueless
585 {
586 $working = 1;
587 $mode = 0;
588 if(FALSE === array_key_exists($attrname, $attrarr)) {
589 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
590 }
591 $attr = preg_replace('/^\s+/', '', $attr);
592 }
593
594 break;
595
596 case 2 : # attribute value, a URL after href= for instance
597
598 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
599 # "value"
600 {
601 $thisval = $match[1];
602 if ( in_array($attrname, $uris) )
603 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
604
605 if(FALSE === array_key_exists($attrname, $attrarr)) {
606 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
607 }
608 $working = 1;
609 $mode = 0;
610 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
611 break;
612 }
613
614 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
615 # 'value'
616 {
617 $thisval = $match[1];
618 if ( in_array($attrname, $uris) )
619 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
620
621 if(FALSE === array_key_exists($attrname, $attrarr)) {
622 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
623 }
624 $working = 1;
625 $mode = 0;
626 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
627 break;
628 }
629
630 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
631 # value
632 {
633 $thisval = $match[1];
634 if ( in_array($attrname, $uris) )
635 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
636
637 if(FALSE === array_key_exists($attrname, $attrarr)) {
638 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
639 }
640 # We add quotes to conform to W3C's HTML spec.
641 $working = 1;
642 $mode = 0;
643 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
644 }
645
646 break;
647 } # switch
648
649 if ($working == 0) # not well formed, remove and try again
650 {
651 $attr = wp_kses_html_error($attr);
652 $mode = 0;
653 }
654 } # while
655
656 if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
657 # special case, for when the attribute list ends with a valueless
658 # attribute like "selected"
659 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
660
661 return $attrarr;
662}
663
664/**
665 * Performs different checks for attribute values.
666 *
667 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
668 * and "valueless" with even more checks to come soon.
669 *
670 * @since 1.0.0
671 *
672 * @param string $value Attribute value
673 * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
674 * @param string $checkname What $checkvalue is checking for.
675 * @param mixed $checkvalue What constraint the value should pass
676 * @return bool Whether check passes (true) or not (false)
677 */
678function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
679 $ok = true;
680
681 switch (strtolower($checkname)) {
682 case 'maxlen' :
683 # The maxlen check makes sure that the attribute value has a length not
684 # greater than the given value. This can be used to avoid Buffer Overflows
685 # in WWW clients and various Internet servers.
686
687 if (strlen($value) > $checkvalue)
688 $ok = false;
689 break;
690
691 case 'minlen' :
692 # The minlen check makes sure that the attribute value has a length not
693 # smaller than the given value.
694
695 if (strlen($value) < $checkvalue)
696 $ok = false;
697 break;
698
699 case 'maxval' :
700 # The maxval check does two things: it checks that the attribute value is
701 # an integer from 0 and up, without an excessive amount of zeroes or
702 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
703 # value is not greater than the given value.
704 # This check can be used to avoid Denial of Service attacks.
705
706 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
707 $ok = false;
708 if ($value > $checkvalue)
709 $ok = false;
710 break;
711
712 case 'minval' :
713 # The minval check checks that the attribute value is a positive integer,
714 # and that it is not smaller than the given value.
715
716 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
717 $ok = false;
718 if ($value < $checkvalue)
719 $ok = false;
720 break;
721
722 case 'valueless' :
723 # The valueless check checks if the attribute has a value
724 # (like <a href="blah">) or not (<option selected>). If the given value
725 # is a "y" or a "Y", the attribute must not have a value.
726 # If the given value is an "n" or an "N", the attribute must have one.
727
728 if (strtolower($checkvalue) != $vless)
729 $ok = false;
730 break;
731 } # switch
732
733 return $ok;
734}
735
736/**
737 * Sanitize string from bad protocols.
738 *
739 * This function removes all non-allowed protocols from the beginning of
740 * $string. It ignores whitespace and the case of the letters, and it does
741 * understand HTML entities. It does its work in a while loop, so it won't be
742 * fooled by a string like "javascript:javascript:alert(57)".
743 *
744 * @since 1.0.0
745 *
746 * @param string $string Content to filter bad protocols from
747 * @param array $allowed_protocols Allowed protocols to keep
748 * @return string Filtered content
749 */
750function wp_kses_bad_protocol($string, $allowed_protocols) {
751 $string = wp_kses_no_null($string);
752 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
753 $string2 = $string.'a';
754
755 while ($string != $string2) {
756 $string2 = $string;
757 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
758 } # while
759
760 return $string;
761}
762
763/**
764 * Removes any NULL characters in $string.
765 *
766 * @since 1.0.0
767 *
768 * @param string $string
769 * @return string
770 */
771function wp_kses_no_null($string) {
772 $string = preg_replace('/\0+/', '', $string);
773 $string = preg_replace('/(\\\\0)+/', '', $string);
774
775 return $string;
776}
777
778/**
779 * Strips slashes from in front of quotes.
780 *
781 * This function changes the character sequence \" to just ". It leaves all
782 * other slashes alone. It's really weird, but the quoting from
783 * preg_replace(//e) seems to require this.
784 *
785 * @since 1.0.0
786 *
787 * @param string $string String to strip slashes
788 * @return string Fixed strings with quoted slashes
789 */
790function wp_kses_stripslashes($string) {
791 return preg_replace('%\\\\"%', '"', $string);
792}
793
794/**
795 * Goes through an array and changes the keys to all lower case.
796 *
797 * @since 1.0.0
798 *
799 * @param array $inarray Unfiltered array
800 * @return array Fixed array with all lowercase keys
801 */
802function wp_kses_array_lc($inarray) {
803 $outarray = array ();
804
805 foreach ( (array) $inarray as $inkey => $inval) {
806 $outkey = strtolower($inkey);
807 $outarray[$outkey] = array ();
808
809 foreach ( (array) $inval as $inkey2 => $inval2) {
810 $outkey2 = strtolower($inkey2);
811 $outarray[$outkey][$outkey2] = $inval2;
812 } # foreach $inval
813 } # foreach $inarray
814
815 return $outarray;
816}
817
818/**
819 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
820 *
821 * @since 1.0.0
822 *
823 * @param string $string
824 * @return string
825 */
826function wp_kses_js_entities($string) {
827 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
828}
829
830/**
831 * Handles parsing errors in wp_kses_hair().
832 *
833 * The general plan is to remove everything to and including some whitespace,
834 * but it deals with quotes and apostrophes as well.
835 *
836 * @since 1.0.0
837 *
838 * @param string $string
839 * @return string
840 */
841function wp_kses_html_error($string) {
842 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
843}
844
845/**
846 * Sanitizes content from bad protocols and other characters.
847 *
848 * This function searches for URL protocols at the beginning of $string, while
849 * handling whitespace and HTML entities.
850 *
851 * @since 1.0.0
852 *
853 * @param string $string Content to check for bad protocols
854 * @param string $allowed_protocols Allowed protocols
855 * @return string Sanitized content
856 */
857function wp_kses_bad_protocol_once($string, $allowed_protocols) {
858 global $_kses_allowed_protocols;
859 $_kses_allowed_protocols = $allowed_protocols;
860
861 $string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
862 if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
863 $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);
864 else
865 $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);
866
867 return $string;
868}
869
870/**
871 * Callback for wp_kses_bad_protocol_once() regular expression.
872 *
873 * This function processes URL protocols, checks to see if they're in the
874 * white-list or not, and returns different data depending on the answer.
875 *
876 * @access private
877 * @since 1.0.0
878 *
879 * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
880 * @return string Sanitized content
881 */
882function wp_kses_bad_protocol_once2($matches) {
883 global $_kses_allowed_protocols;
884
885 if ( is_array($matches) ) {
886 if ( ! isset($matches[1]) || empty($matches[1]) )
887 return '';
888
889 $string = $matches[1];
890 } else {
891 $string = $matches;
892 }
893
894 $string2 = wp_kses_decode_entities($string);
895 $string2 = preg_replace('/\s/', '', $string2);
896 $string2 = wp_kses_no_null($string2);
897 $string2 = preg_replace('/\xad+/', '', $string2);
898 # deals with Opera "feature"
899 $string2 = strtolower($string2);
900
901 $allowed = false;
902 foreach ( (array) $_kses_allowed_protocols as $one_protocol)
903 if (strtolower($one_protocol) == $string2) {
904 $allowed = true;
905 break;
906 }
907
908 if ($allowed)
909 return "$string2:";
910 else
911 return '';
912}
913
914/**
915 * Converts and fixes HTML entities.
916 *
917 * This function normalizes HTML entities. It will convert "AT&T" to the correct
918 * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
919 *
920 * @since 1.0.0
921 *
922 * @param string $string Content to normalize entities
923 * @return string Content with normalized entities
924 */
925function wp_kses_normalize_entities($string) {
926 # Disarm all entities by converting & to &amp;
927
928 $string = str_replace('&', '&amp;', $string);
929
930 # Change back the allowed entities in our entity whitelist
931
932 $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
933 $string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
934 $string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
935
936 return $string;
937}
938
939/**
940 * Callback for wp_kses_normalize_entities() regular expression.
941 *
942 * This function helps wp_kses_normalize_entities() to only accept 16 bit values
943 * and nothing more for &#number; entities.
944 *
945 * @access private
946 * @since 1.0.0
947 *
948 * @param array $matches preg_replace_callback() matches array
949 * @return string Correctly encoded entity
950 */
951function wp_kses_normalize_entities2($matches) {
952 if ( ! isset($matches[1]) || empty($matches[1]) )
953 return '';
954
955 $i = $matches[1];
956 return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
957}
958
959/**
960 * Callback for wp_kses_normalize_entities() for regular expression.
961 *
962 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
963 * numeric entities in hex form.
964 *
965 * @access private
966 *
967 * @param array $matches preg_replace_callback() matches array
968 * @return string Correctly encoded entity
969 */
970function wp_kses_normalize_entities3($matches) {
971 if ( ! isset($matches[2]) || empty($matches[2]) )
972 return '';
973
974 $hexchars = $matches[2];
975 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
976}
977
978/**
979 * Helper function to determine if a Unicode value is valid.
980 *
981 * @param int $i Unicode value
982 * @return bool true if the value was a valid Unicode number
983 */
984function valid_unicode($i) {
985 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
986 ($i >= 0x20 && $i <= 0xd7ff) ||
987 ($i >= 0xe000 && $i <= 0xfffd) ||
988 ($i >= 0x10000 && $i <= 0x10ffff) );
989}
990
991/**
992 * Convert all entities to their character counterparts.
993 *
994 * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
995 * anything with other entities like &auml;, but we don't need them in the URL
996 * protocol whitelisting system anyway.
997 *
998 * @since 1.0.0
999 *
1000 * @param string $string Content to change entities
1001 * @return string Content after decoded entities
1002 */
1003function wp_kses_decode_entities($string) {
1004 $string = preg_replace_callback('/&#([0-9]+);/', function($match) {
1005 return chr($match[1]);
1006 }, $string);
1007 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', function($match) {
1008 return chr(hexdec($match[1]));
1009 }, $string);
1010
1011 return $string;
1012}
1013
1014/**
1015 * Sanitize content with allowed HTML Kses rules.
1016 *
1017 * @since 1.0.0
1018 * @uses $allowedtags
1019 *
1020 * @param string $data Content to filter
1021 * @return string Filtered content
1022 */
1023function wp_filter_kses($data) {
1024 global $allowedtags;
1025 return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1026}
1027
1028/**
1029 * Sanitize content for allowed HTML tags for post content.
1030 *
1031 * Post content refers to the page contents of the 'post' type and not $_POST
1032 * data from forms.
1033 *
1034 * @since 2.0.0
1035 * @uses $allowedposttags
1036 *
1037 * @param string $data Post content to filter
1038 * @return string Filtered post content with allowed HTML tags and attributes intact.
1039 */
1040function wp_filter_post_kses($data) {
1041 global $allowedposttags;
1042 return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1043}
1044
1045/**
1046 * Strips all of the HTML in the content.
1047 *
1048 * @since 2.1.0
1049 *
1050 * @param string $data Content to strip all HTML from
1051 * @return string Filtered content without any HTML
1052 */
1053function wp_filter_nohtml_kses($data) {
1054 return addslashes ( wp_kses(stripslashes( $data ), array()) );
1055}
1056
1057/**
1058 * Adds all Kses input form content filters.
1059 *
1060 * All hooks have default priority. The wp_filter_kses() function is added to
1061 * the 'pre_comment_content' and 'title_save_pre' hooks.
1062 *
1063 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1064 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1065 *
1066 * @since 2.0.0
1067 * @uses add_filter() See description for what functions are added to what hooks.
1068 */
1069function kses_init_filters() {
1070 // Normal filtering.
1071 add_filter('pre_comment_content', 'wp_filter_kses');
1072 add_filter('title_save_pre', 'wp_filter_kses');
1073
1074 // Post filtering
1075 add_filter('content_save_pre', 'wp_filter_post_kses');
1076 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1077 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1078}
1079
1080/**
1081 * Removes all Kses input form content filters.
1082 *
1083 * A quick procedural method to removing all of the filters that kses uses for
1084 * content in WordPress Loop.
1085 *
1086 * Does not remove the kses_init() function from 'init' hook (priority is
1087 * default). Also does not remove kses_init() function from 'set_current_user'
1088 * hook (priority is also default).
1089 *
1090 * @since 2.0.6
1091 */
1092function kses_remove_filters() {
1093 // Normal filtering.
1094 remove_filter('pre_comment_content', 'wp_filter_kses');
1095 remove_filter('title_save_pre', 'wp_filter_kses');
1096
1097 // Post filtering
1098 remove_filter('content_save_pre', 'wp_filter_post_kses');
1099 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1100 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1101}
1102
1103/**
1104 * Sets up most of the Kses filters for input form content.
1105 *
1106 * If you remove the kses_init() function from 'init' hook and
1107 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1108 * will be added.
1109 *
1110 * First removes all of the Kses filters in case the current user does not need
1111 * to have Kses filter the content. If the user does not have unfiltered html
1112 * capability, then Kses filters are added.
1113 *
1114 * @uses kses_remove_filters() Removes the Kses filters
1115 * @uses kses_init_filters() Adds the Kses filters back if the user
1116 * does not have unfiltered HTML capability.
1117 * @since 2.0.0
1118 */
1119function kses_init() {
1120 kses_remove_filters();
1121
1122 if (current_user_can('unfiltered_html') == false)
1123 kses_init_filters();
1124}
1125
1126add_action('init', 'kses_init');
1127add_action('set_current_user', 'kses_init');
1128?>