VicDict.php 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. /**
  3. * Add word to dict.
  4. */
  5. namespace Lizhichao\Word;
  6. class VicDict
  7. {
  8. private $word = [];
  9. private $code = 'utf-8';
  10. private $end = ['\\' => 1];
  11. private $default_end = ['\\' => 1];
  12. private $end_key = '\\';
  13. private $type = '';
  14. private $dictPath = '';
  15. /**
  16. * VicDict constructor.
  17. * @param string $path 词库地址
  18. * @throws \Exception
  19. */
  20. public function __construct($path = '')
  21. {
  22. if($path === ''){
  23. $this->dictPath = dirname(__DIR__) . '/Data/dict.json';
  24. }else{
  25. $this->dictPath = $path;
  26. }
  27. $this->type = pathinfo($this->dictPath)['extension'];
  28. if ( ! \file_exists($this->dictPath)) {
  29. throw new \Exception("Invalid dict file: {$this->dictPath}");
  30. }
  31. // check dict type
  32. switch ($this->type) {
  33. case 'igb':
  34. if ( ! \function_exists('\\igbinary_unserialize')) {
  35. throw new \Exception('Requires igbinary PHP extension.');
  36. }
  37. $this->word = \igbinary_unserialize(\file_get_contents($this->dictPath));
  38. break;
  39. case 'json':
  40. $this->word = \json_decode(\file_get_contents($this->dictPath), true);
  41. break;
  42. default:
  43. throw new \Exception('Invalid dict type.');
  44. }
  45. }
  46. /**
  47. * @param string $word
  48. * @param null|string $x 词性
  49. *
  50. * @return bool
  51. */
  52. public function add($word, $x = null)
  53. {
  54. $this->end = ['\\x' => $x] + $this->default_end;
  55. $word = $this->filter($word);
  56. if ($word) {
  57. return $this->merge($word);
  58. }
  59. return false;
  60. }
  61. public function save()
  62. {
  63. if ('igb' === $this->type) {
  64. $str = \igbinary_serialize($this->word);
  65. } else {
  66. $str = \json_encode($this->word);
  67. }
  68. return \file_put_contents($this->dictPath, $str);
  69. }
  70. private function merge($word)
  71. {
  72. $ar = $this->toArr($word);
  73. $br = $ar;
  74. $wr = &$this->word;
  75. foreach ($ar as $i => $v) {
  76. \array_shift($br);
  77. if ( ! isset($wr[$v])) {
  78. $wr[$v] = $this->dict($br, $this->end);
  79. return true;
  80. }
  81. $wr = &$wr[$v];
  82. }
  83. if ( ! isset($wr[$this->end_key])) {
  84. foreach ($this->end as $k => $v) {
  85. $wr[$k] = $v;
  86. $wr[$k] = $v;
  87. }
  88. }
  89. return true;
  90. }
  91. private function filter($word)
  92. {
  93. return \str_replace(["\n", "\t", "\r"], '', $word);
  94. }
  95. private function dict($arr, $v, $i = 0)
  96. {
  97. if (isset($arr[$i])) {
  98. return [$arr[$i] => $this->dict($arr, $v, $i + 1)];
  99. }
  100. return $v;
  101. }
  102. private function toArr($str)
  103. {
  104. $l = \mb_strlen($str, $this->code);
  105. $r = [];
  106. for ($i = 0; $i < $l; ++$i) {
  107. $r[] = \mb_substr($str, $i, 1, $this->code);
  108. }
  109. return $r;
  110. }
  111. }