123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534 |
- <?php
- if (!defined('PHPEXCEL_ROOT')) {
-
- define('PHPEXCEL_ROOT', dirname(__FILE__) . '/../../');
- require(PHPEXCEL_ROOT . 'PHPExcel/Autoloader.php');
- }
- class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader
- {
-
- protected $_inputEncoding = 'ANSI';
-
- protected $_sheetIndex = 0;
-
- protected $_formats = array(
- 'h1' => array('font' => array('bold' => true,
- 'size' => 24,
- ),
- ),
- 'h2' => array('font' => array('bold' => true,
- 'size' => 18,
- ),
- ),
- 'h3' => array('font' => array('bold' => true,
- 'size' => 13.5,
- ),
- ),
- 'h4' => array('font' => array('bold' => true,
- 'size' => 12,
- ),
- ),
- 'h5' => array('font' => array('bold' => true,
- 'size' => 10,
- ),
- ),
- 'h6' => array('font' => array('bold' => true,
- 'size' => 7.5,
- ),
- ),
- 'a' => array('font' => array('underline' => true,
- 'color' => array('argb' => PHPExcel_Style_Color::COLOR_BLUE,
- ),
- ),
- ),
- 'hr' => array('borders' => array('bottom' => array('style' => PHPExcel_Style_Border::BORDER_THIN,
- 'color' => array(\PHPExcel_Style_Color::COLOR_BLACK,
- ),
- ),
- ),
- ),
- );
- protected $rowspan = array();
-
- public function __construct()
- {
- $this->_readFilter = new PHPExcel_Reader_DefaultReadFilter();
- }
-
- protected function _isValidFormat()
- {
-
- $data = fread($this->_fileHandle, 2048);
- if ((strpos($data, '<') !== FALSE) &&
- (strlen($data) !== strlen(strip_tags($data)))) {
- return TRUE;
- }
- return FALSE;
- }
-
- public function load($pFilename)
- {
-
- $objPHPExcel = new PHPExcel();
-
- return $this->loadIntoExisting($pFilename, $objPHPExcel);
- }
-
- public function setInputEncoding($pValue = 'ANSI')
- {
- $this->_inputEncoding = $pValue;
- return $this;
- }
-
- public function getInputEncoding()
- {
- return $this->_inputEncoding;
- }
-
- protected $_dataArray = array();
- protected $_tableLevel = 0;
- protected $_nestedColumn = array('A');
- protected function _setTableStartColumn($column)
- {
- if ($this->_tableLevel == 0)
- $column = 'A';
- ++$this->_tableLevel;
- $this->_nestedColumn[$this->_tableLevel] = $column;
- return $this->_nestedColumn[$this->_tableLevel];
- }
- protected function _getTableStartColumn()
- {
- return $this->_nestedColumn[$this->_tableLevel];
- }
- protected function _releaseTableStartColumn()
- {
- --$this->_tableLevel;
- return array_pop($this->_nestedColumn);
- }
- protected function _flushCell($sheet, $column, $row, &$cellContent)
- {
- if (is_string($cellContent)) {
-
- if (trim($cellContent) > '') {
-
-
-
- $sheet->setCellValue($column . $row, $cellContent, true);
- $this->_dataArray[$row][$column] = $cellContent;
- }
- } else {
-
-
- $this->_dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
- }
- $cellContent = (string) '';
- }
- protected function _processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format = null)
- {
- foreach ($element->childNodes as $child) {
- if ($child instanceof DOMText) {
- $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue));
- if (is_string($cellContent)) {
-
- $cellContent .= $domText;
- } else {
-
-
- }
- } elseif ($child instanceof DOMElement) {
- $attributeArray = array();
- foreach ($child->attributes as $attribute) {
- $attributeArray[$attribute->name] = $attribute->value;
- }
- switch ($child->nodeName) {
- case 'meta' :
- foreach ($attributeArray as $attributeName => $attributeValue) {
- switch ($attributeName) {
- case 'content':
-
-
- break;
- }
- }
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- break;
- case 'title' :
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- $sheet->setTitle($cellContent);
- $cellContent = '';
- break;
- case 'span' :
- case 'div' :
- case 'font' :
- case 'i' :
- case 'em' :
- case 'strong':
- case 'b' :
- if ($cellContent > '')
- $cellContent .= ' ';
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- if ($cellContent > '')
- $cellContent .= ' ';
- break;
- case 'hr' :
- $this->_flushCell($sheet, $column, $row, $cellContent);
- ++$row;
- if (isset($this->_formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
- } else {
- $cellContent = '----------';
- $this->_flushCell($sheet, $column, $row, $cellContent);
- }
- ++$row;
- case 'br' :
- if ($this->_tableLevel > 0) {
-
- $cellContent .= "\n";
- } else {
-
- $this->_flushCell($sheet, $column, $row, $cellContent);
- ++$row;
- }
- break;
- case 'a' :
- foreach ($attributeArray as $attributeName => $attributeValue) {
- switch ($attributeName) {
- case 'href':
- $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
- if (isset($this->_formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
- }
- break;
- }
- }
- $cellContent .= ' ';
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- break;
- case 'h1' :
- case 'h2' :
- case 'h3' :
- case 'h4' :
- case 'h5' :
- case 'h6' :
- case 'ol' :
- case 'ul' :
- case 'p' :
- if ($this->_tableLevel > 0) {
-
- $cellContent .= "\n";
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- } else {
- if ($cellContent > '') {
- $this->_flushCell($sheet, $column, $row, $cellContent);
- $row++;
- }
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- $this->_flushCell($sheet, $column, $row, $cellContent);
- if (isset($this->_formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
- }
- $row++;
- $column = 'A';
- }
- break;
- case 'li' :
- if ($this->_tableLevel > 0) {
-
- $cellContent .= "\n";
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- } else {
- if ($cellContent > '') {
- $this->_flushCell($sheet, $column, $row, $cellContent);
- }
- ++$row;
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- $this->_flushCell($sheet, $column, $row, $cellContent);
- $column = 'A';
- }
- break;
- case 'table' :
- $this->_flushCell($sheet, $column, $row, $cellContent);
- $column = $this->_setTableStartColumn($column);
- if ($this->_tableLevel > 1)
- --$row;
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- $column = $this->_releaseTableStartColumn();
- if ($this->_tableLevel > 1) {
- ++$column;
- } else {
- ++$row;
- }
- break;
- case 'thead' :
- case 'tbody' :
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- break;
- case 'tr' :
- $column = $this->_getTableStartColumn();
- $cellContent = '';
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- ++$row;
- break;
- case 'th' :
- case 'td' :
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- while (isset($this->rowspan[$column . $row])) {
- ++$column;
- }
- $this->_flushCell($sheet, $column, $row, $cellContent);
- if (isset($attributeArray['rowspan']) && isset($attributeArray['colspan'])) {
-
- $columnTo = $column;
- for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
- ++$columnTo;
- }
- $range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1);
- foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
- $this->rowspan[$value] = true;
- }
- $sheet->mergeCells($range);
- $column = $columnTo;
- } elseif (isset($attributeArray['rowspan'])) {
-
- $range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1);
- foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
- $this->rowspan[$value] = true;
- }
- $sheet->mergeCells($range);
- } elseif (isset($attributeArray['colspan'])) {
-
- $columnTo = $column;
- for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
- ++$columnTo;
- }
- $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
- $column = $columnTo;
- }
- ++$column;
- break;
- case 'body' :
- $row = 1;
- $column = 'A';
- $content = '';
- $this->_tableLevel = 0;
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- break;
- default:
- $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
- }
- }
- }
- }
-
- public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
- {
-
- $this->_openFile($pFilename);
- if (!$this->_isValidFormat()) {
- fclose($this->_fileHandle);
- throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file.");
- }
-
- fclose($this->_fileHandle);
-
- while ($objPHPExcel->getSheetCount() <= $this->_sheetIndex) {
- $objPHPExcel->createSheet();
- }
- $objPHPExcel->setActiveSheetIndex($this->_sheetIndex);
-
- $dom = new domDocument;
-
- $loaded = $dom->loadHTML($this->securityScanFile($pFilename));
- if ($loaded === FALSE) {
- throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document');
- }
-
- $dom->preserveWhiteSpace = false;
- $row = 0;
- $column = 'A';
- $content = '';
- $this->_processDomElement($dom, $objPHPExcel->getActiveSheet(), $row, $column, $content);
-
- return $objPHPExcel;
- }
-
- public function getSheetIndex()
- {
- return $this->_sheetIndex;
- }
-
- public function setSheetIndex($pValue = 0)
- {
- $this->_sheetIndex = $pValue;
- return $this;
- }
-
- public function securityScan($xml)
- {
- $pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
- if (preg_match($pattern, $xml)) {
- throw new PHPExcel_Reader_Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
- }
- return $xml;
- }
- }
|