RemoveForeignElements.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. <?php
  2. /**
  3. * Removes all unrecognized tags from the list of tokens.
  4. *
  5. * This strategy iterates through all the tokens and removes unrecognized
  6. * tokens. If a token is not recognized but a TagTransform is defined for
  7. * that element, the element will be transformed accordingly.
  8. */
  9. class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
  10. {
  11. /**
  12. * @param HTMLPurifier_Token[] $tokens
  13. * @param HTMLPurifier_Config $config
  14. * @param HTMLPurifier_Context $context
  15. * @return array|HTMLPurifier_Token[]
  16. */
  17. public function execute($tokens, $config, $context)
  18. {
  19. $definition = $config->getHTMLDefinition();
  20. $generator = new HTMLPurifier_Generator($config, $context);
  21. $result = array();
  22. $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
  23. $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
  24. // currently only used to determine if comments should be kept
  25. $trusted = $config->get('HTML.Trusted');
  26. $comment_lookup = $config->get('HTML.AllowedComments');
  27. $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
  28. $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
  29. $remove_script_contents = $config->get('Core.RemoveScriptContents');
  30. $hidden_elements = $config->get('Core.HiddenElements');
  31. // remove script contents compatibility
  32. if ($remove_script_contents === true) {
  33. $hidden_elements['script'] = true;
  34. } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
  35. unset($hidden_elements['script']);
  36. }
  37. $attr_validator = new HTMLPurifier_AttrValidator();
  38. // removes tokens until it reaches a closing tag with its value
  39. $remove_until = false;
  40. // converts comments into text tokens when this is equal to a tag name
  41. $textify_comments = false;
  42. $token = false;
  43. $context->register('CurrentToken', $token);
  44. $e = false;
  45. if ($config->get('Core.CollectErrors')) {
  46. $e =& $context->get('ErrorCollector');
  47. }
  48. foreach ($tokens as $token) {
  49. if ($remove_until) {
  50. if (empty($token->is_tag) || $token->name !== $remove_until) {
  51. continue;
  52. }
  53. }
  54. if (!empty($token->is_tag)) {
  55. // DEFINITION CALL
  56. // before any processing, try to transform the element
  57. if (isset($definition->info_tag_transform[$token->name])) {
  58. $original_name = $token->name;
  59. // there is a transformation for this tag
  60. // DEFINITION CALL
  61. $token = $definition->
  62. info_tag_transform[$token->name]->transform($token, $config, $context);
  63. if ($e) {
  64. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
  65. }
  66. }
  67. if (isset($definition->info[$token->name])) {
  68. // mostly everything's good, but
  69. // we need to make sure required attributes are in order
  70. if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
  71. $definition->info[$token->name]->required_attr &&
  72. ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
  73. ) {
  74. $attr_validator->validateToken($token, $config, $context);
  75. $ok = true;
  76. foreach ($definition->info[$token->name]->required_attr as $name) {
  77. if (!isset($token->attr[$name])) {
  78. $ok = false;
  79. break;
  80. }
  81. }
  82. if (!$ok) {
  83. if ($e) {
  84. $e->send(
  85. E_ERROR,
  86. 'Strategy_RemoveForeignElements: Missing required attribute',
  87. $name
  88. );
  89. }
  90. continue;
  91. }
  92. $token->armor['ValidateAttributes'] = true;
  93. }
  94. if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
  95. $textify_comments = $token->name;
  96. } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
  97. $textify_comments = false;
  98. }
  99. } elseif ($escape_invalid_tags) {
  100. // invalid tag, generate HTML representation and insert in
  101. if ($e) {
  102. $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
  103. }
  104. $token = new HTMLPurifier_Token_Text(
  105. $generator->generateFromToken($token)
  106. );
  107. } else {
  108. // check if we need to destroy all of the tag's children
  109. // CAN BE GENERICIZED
  110. if (isset($hidden_elements[$token->name])) {
  111. if ($token instanceof HTMLPurifier_Token_Start) {
  112. $remove_until = $token->name;
  113. } elseif ($token instanceof HTMLPurifier_Token_Empty) {
  114. // do nothing: we're still looking
  115. } else {
  116. $remove_until = false;
  117. }
  118. if ($e) {
  119. $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
  120. }
  121. } else {
  122. if ($e) {
  123. $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
  124. }
  125. }
  126. continue;
  127. }
  128. } elseif ($token instanceof HTMLPurifier_Token_Comment) {
  129. // textify comments in script tags when they are allowed
  130. if ($textify_comments !== false) {
  131. $data = $token->data;
  132. $token = new HTMLPurifier_Token_Text($data);
  133. } elseif ($trusted || $check_comments) {
  134. // always cleanup comments
  135. $trailing_hyphen = false;
  136. if ($e) {
  137. // perform check whether or not there's a trailing hyphen
  138. if (substr($token->data, -1) == '-') {
  139. $trailing_hyphen = true;
  140. }
  141. }
  142. $token->data = rtrim($token->data, '-');
  143. $found_double_hyphen = false;
  144. while (strpos($token->data, '--') !== false) {
  145. $found_double_hyphen = true;
  146. $token->data = str_replace('--', '-', $token->data);
  147. }
  148. if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
  149. ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
  150. // OK good
  151. if ($e) {
  152. if ($trailing_hyphen) {
  153. $e->send(
  154. E_NOTICE,
  155. 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
  156. );
  157. }
  158. if ($found_double_hyphen) {
  159. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
  160. }
  161. }
  162. } else {
  163. if ($e) {
  164. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
  165. }
  166. continue;
  167. }
  168. } else {
  169. // strip comments
  170. if ($e) {
  171. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
  172. }
  173. continue;
  174. }
  175. } elseif ($token instanceof HTMLPurifier_Token_Text) {
  176. } else {
  177. continue;
  178. }
  179. $result[] = $token;
  180. }
  181. if ($remove_until && $e) {
  182. // we removed tokens until the end, throw error
  183. $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
  184. }
  185. $context->destroy('CurrentToken');
  186. return $result;
  187. }
  188. }
  189. // vim: et sw=4 sts=4