FiterService.php 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. <?php
  2. // +----------------------------------------------------------------------
  3. // | CRMEB [ CRMEB赋能开发者,助力企业发展 ]
  4. // +----------------------------------------------------------------------
  5. // | Copyright (c) 2016~2020 https://www.crmeb.com All rights reserved.
  6. // +----------------------------------------------------------------------
  7. // | Licensed CRMEB并不是自由软件,未经许可不能去掉CRMEB相关版权
  8. // +----------------------------------------------------------------------
  9. // | Author: CRMEB Team <admin@crmeb.com>
  10. // +----------------------------------------------------------------------
  11. namespace service;
  12. class Fiter
  13. {
  14. private $dict;
  15. private $dictPath = '';
  16. public function __construct($dictPath = '')
  17. {
  18. $this->dict = array();
  19. $this->dictPath = $dictPath;
  20. $this->initDict();
  21. }
  22. private function initDict()
  23. {
  24. $handle = fopen($this->dictPath, 'r');
  25. if (!$handle) {
  26. throw new \Exception('未找到敏感词过滤文件');
  27. }
  28. while (!feof($handle)) {
  29. $word = trim(fgets($handle, 128));
  30. if (empty($word)) {
  31. continue;
  32. }
  33. $uWord = $this->unicodeSplit($word);
  34. $pdict = &$this->dict;
  35. $count = count($uWord);
  36. for ($i = 0; $i < $count; $i++) {
  37. if (!isset($pdict[$uWord[$i]])) {
  38. $pdict[$uWord[$i]] = array();
  39. }
  40. $pdict = &$pdict[$uWord[$i]];
  41. }
  42. $pdict['end'] = true;
  43. }
  44. fclose($handle);
  45. }
  46. public function filter($str, $maxDistance = 5)
  47. {
  48. if ($maxDistance < 1) {
  49. $maxDistance = 1;
  50. }
  51. $uStr = $this->unicodeSplit($str);
  52. $count = count($uStr);
  53. for ($i = 0; $i < $count; $i++) {
  54. if (isset($this->dict[$uStr[$i]])) {
  55. $pdict = &$this->dict[$uStr[$i]];
  56. $matchIndexes = array();
  57. for ($j = $i + 1, $d = 0; $d < $maxDistance && $j < $count; $j++, $d++) {
  58. if (isset($pdict[$uStr[$j]])) {
  59. $matchIndexes[] = $j;
  60. $pdict = &$pdict[$uStr[$j]];
  61. $d = -1;
  62. }
  63. }
  64. if (isset($pdict['end'])) {
  65. // $uStr[$i] = '*';
  66. $uStr[$i] = "<font color='red'>{$uStr[$i]}</font>";
  67. foreach ($matchIndexes as $k) {
  68. if ($k - $i == 1) {
  69. $i = $k;
  70. }
  71. //$uStr[$k] = '*';
  72. $uStr[$k] = "<font color='red'>{$uStr[$i]}</font>";
  73. }
  74. }
  75. }
  76. }
  77. return implode($uStr);
  78. }
  79. public function unicodeSplit($str)
  80. {
  81. $str = strtolower($str);
  82. $ret = array();
  83. $len = strlen($str);
  84. for ($i = 0; $i < $len; $i++) {
  85. $c = ord($str[$i]);
  86. if ($c & 0x80) {
  87. if (($c & 0xf8) == 0xf0 && $len - $i >= 4) {
  88. if ((ord($str[$i + 1]) & 0xc0) == 0x80 && (ord($str[$i + 2]) & 0xc0) == 0x80 && (ord($str[$i + 3]) & 0xc0) == 0x80) {
  89. $uc = substr($str, $i, 4);
  90. $ret[] = $uc;
  91. $i += 3;
  92. }
  93. } else if (($c & 0xf0) == 0xe0 && $len - $i >= 3) {
  94. if ((ord($str[$i + 1]) & 0xc0) == 0x80 && (ord($str[$i + 2]) & 0xc0) == 0x80) {
  95. $uc = substr($str, $i, 3);
  96. $ret[] = $uc;
  97. $i += 2;
  98. }
  99. } else if (($c & 0xe0) == 0xc0 && $len - $i >= 2) {
  100. if ((ord($str[$i + 1]) & 0xc0) == 0x80) {
  101. $uc = substr($str, $i, 2);
  102. $ret[] = $uc;
  103. $i += 1;
  104. }
  105. }
  106. } else {
  107. $ret[] = $str[$i];
  108. }
  109. }
  110. return $ret;
  111. }
  112. }