CsvTest.php 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
  3. use PhpOffice\PhpSpreadsheet\Reader\Csv;
  4. use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  5. use PHPUnit\Framework\TestCase;
  6. class CsvTest extends TestCase
  7. {
  8. /**
  9. * @dataProvider providerDelimiterDetection
  10. *
  11. * @param string $filename
  12. * @param string $expectedDelimiter
  13. * @param string $cell
  14. * @param float|int|string $expectedValue
  15. */
  16. public function testDelimiterDetection($filename, $expectedDelimiter, $cell, $expectedValue): void
  17. {
  18. $reader = new Csv();
  19. $delim1 = $reader->getDelimiter();
  20. self::assertNull($delim1);
  21. $spreadsheet = $reader->load($filename);
  22. self::assertSame($expectedDelimiter, $reader->getDelimiter(), 'should be able to infer the delimiter');
  23. $actual = $spreadsheet->getActiveSheet()->getCell($cell)->getValue();
  24. self::assertSame($expectedValue, $actual, 'should be able to retrieve correct value');
  25. }
  26. public function providerDelimiterDetection(): array
  27. {
  28. return [
  29. [
  30. 'tests/data/Reader/CSV/enclosure.csv',
  31. ',',
  32. 'C4',
  33. 'username2',
  34. ],
  35. [
  36. 'tests/data/Reader/CSV/semicolon_separated.csv',
  37. ';',
  38. 'C2',
  39. '25,5',
  40. ],
  41. [
  42. 'tests/data/Reader/CSV/line_break_in_enclosure.csv',
  43. ',',
  44. 'A3',
  45. 'Test',
  46. ],
  47. [
  48. 'tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv',
  49. ',',
  50. 'A3',
  51. 'Test',
  52. ],
  53. [
  54. 'tests/data/Reader/HTML/csv_with_angle_bracket.csv',
  55. ',',
  56. 'B1',
  57. 'Number of items with weight <= 50kg',
  58. ],
  59. [
  60. 'samples/Reader/sampleData/example1.csv',
  61. ',',
  62. 'I4',
  63. '100%',
  64. ],
  65. [
  66. 'samples/Reader/sampleData/example2.csv',
  67. ',',
  68. 'D8',
  69. -58.373161,
  70. ],
  71. [
  72. 'tests/data/Reader/CSV/empty.csv',
  73. ',',
  74. 'A1',
  75. null,
  76. ],
  77. [
  78. 'tests/data/Reader/CSV/no_delimiter.csv',
  79. ',',
  80. 'A1',
  81. 'SingleLine',
  82. ],
  83. ];
  84. }
  85. /**
  86. * @dataProvider providerCanLoad
  87. *
  88. * @param bool $expected
  89. * @param string $filename
  90. */
  91. public function testCanLoad($expected, $filename): void
  92. {
  93. $reader = new Csv();
  94. self::assertSame($expected, $reader->canRead($filename));
  95. }
  96. public function providerCanLoad(): array
  97. {
  98. return [
  99. [false, 'tests/data/Reader/Ods/data.ods'],
  100. [false, 'samples/templates/excel2003.xml'],
  101. [true, 'tests/data/Reader/CSV/enclosure.csv'],
  102. [true, 'tests/data/Reader/CSV/semicolon_separated.csv'],
  103. [true, 'tests/data/Reader/CSV/contains_html.csv'],
  104. [true, 'tests/data/Reader/CSV/csv_without_extension'],
  105. [true, 'tests/data/Reader/HTML/csv_with_angle_bracket.csv'],
  106. [true, 'tests/data/Reader/CSV/empty.csv'],
  107. [true, 'samples/Reader/sampleData/example1.csv'],
  108. [true, 'samples/Reader/sampleData/example2.csv'],
  109. ];
  110. }
  111. public function testEscapeCharacters(): void
  112. {
  113. $reader = (new Csv())->setEscapeCharacter('"');
  114. $worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv')
  115. ->getActiveSheet();
  116. $expected = [
  117. ['field 1', 'field 2\\'],
  118. ['field 3\\', 'field 4'],
  119. ];
  120. self::assertSame('"', $reader->getEscapeCharacter());
  121. self::assertSame($expected, $worksheet->toArray());
  122. }
  123. public function testInvalidWorkSheetInfo(): void
  124. {
  125. $this->expectException(ReaderException::class);
  126. $reader = new Csv();
  127. $reader->listWorksheetInfo('');
  128. }
  129. public function testUtf16LineBreak(): void
  130. {
  131. $reader = new Csv();
  132. $reader->setInputEncoding('UTF-16BE');
  133. $spreadsheet = $reader->load('tests/data/Reader/CSV/utf16be.line_break_in_enclosure.csv');
  134. $sheet = $spreadsheet->getActiveSheet();
  135. $expected = <<<EOF
  136. This is a test
  137. with line breaks
  138. that breaks the
  139. delimiters
  140. EOF;
  141. self::assertEquals($expected, $sheet->getCell('B3')->getValue());
  142. }
  143. public function testLineBreakEscape(): void
  144. {
  145. $reader = new Csv();
  146. $spreadsheet = $reader->load('tests/data/Reader/CSV/line_break_in_enclosure_with_escaped_quotes.csv');
  147. $sheet = $spreadsheet->getActiveSheet();
  148. $expected = <<<EOF
  149. This is a "test csv file"
  150. with both "line breaks"
  151. and "escaped
  152. quotes" that breaks
  153. the delimiters
  154. EOF;
  155. self::assertEquals($expected, $sheet->getCell('B3')->getValue());
  156. }
  157. public function testUtf32LineBreakEscape(): void
  158. {
  159. $reader = new Csv();
  160. $reader->setInputEncoding('UTF-32LE');
  161. $spreadsheet = $reader->load('tests/data/Reader/CSV/line_break_escaped_32le.csv');
  162. $sheet = $spreadsheet->getActiveSheet();
  163. $expected = <<<EOF
  164. This is a "test csv file"
  165. with both "line breaks"
  166. and "escaped
  167. quotes" that breaks
  168. the delimiters
  169. EOF;
  170. self::assertEquals($expected, $sheet->getCell('B3')->getValue());
  171. }
  172. public function testSeparatorLine(): void
  173. {
  174. $reader = new Csv();
  175. $reader->setSheetIndex(3);
  176. $spreadsheet = $reader->load('tests/data/Reader/CSV/sep.csv');
  177. self::assertEquals(';', $reader->getDelimiter());
  178. $sheet = $spreadsheet->getActiveSheet();
  179. self::assertEquals(3, $reader->getSheetIndex());
  180. self::assertEquals(3, $spreadsheet->getActiveSheetIndex());
  181. self::assertEquals('A', $sheet->getCell('A1')->getValue());
  182. self::assertEquals(1, $sheet->getCell('B1')->getValue());
  183. self::assertEquals(2, $sheet->getCell('A2')->getValue());
  184. self::assertEquals(3, $sheet->getCell('B2')->getValue());
  185. }
  186. public function testDefaultSettings(): void
  187. {
  188. $reader = new Csv();
  189. self::assertEquals('UTF-8', $reader->getInputEncoding());
  190. self::assertEquals('"', $reader->getEnclosure());
  191. $reader->setEnclosure('\'');
  192. self::assertEquals('\'', $reader->getEnclosure());
  193. $reader->setEnclosure('');
  194. self::assertEquals('"', $reader->getEnclosure());
  195. // following tests from BaseReader
  196. self::assertTrue($reader->getReadEmptyCells());
  197. self::assertFalse($reader->getIncludeCharts());
  198. self::assertNull($reader->getLoadSheetsOnly());
  199. }
  200. public function testReadEmptyFileName(): void
  201. {
  202. $this->expectException(ReaderException::class);
  203. $reader = new Csv();
  204. $filename = '';
  205. $reader->load($filename);
  206. }
  207. public function testReadNonexistentFileName(): void
  208. {
  209. $this->expectException(ReaderException::class);
  210. $reader = new Csv();
  211. $reader->load('tests/data/Reader/CSV/encoding.utf8.csvxxx');
  212. }
  213. /**
  214. * @dataProvider providerEscapes
  215. */
  216. public function testInferSeparator(string $escape, string $delimiter): void
  217. {
  218. $reader = new Csv();
  219. $reader->setEscapeCharacter($escape);
  220. $filename = 'tests/data/Reader/CSV/escape.csv';
  221. $reader->listWorksheetInfo($filename);
  222. self::assertEquals($delimiter, $reader->getDelimiter());
  223. }
  224. public function providerEscapes(): array
  225. {
  226. return [
  227. ['\\', ';'],
  228. ["\x0", ','],
  229. [(version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '', ','],
  230. ];
  231. }
  232. /**
  233. * This test could be simpler, but Scrutinizer has a minor (and silly) problem.
  234. *
  235. * @dataProvider providerNull
  236. */
  237. public function testSetDelimiterNull(?string $setNull): void
  238. {
  239. $reader = new Csv();
  240. $reader->setDelimiter(',');
  241. self::assertSame(',', $reader->getDelimiter());
  242. $reader->setDelimiter($setNull);
  243. self::assertSame($setNull, $reader->getDelimiter());
  244. }
  245. public function providerNull(): array
  246. {
  247. return [
  248. [null],
  249. ];
  250. }
  251. }