CsvEncodingTest.php 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
  3. use PhpOffice\PhpSpreadsheet\Reader\Csv;
  4. use PHPUnit\Framework\TestCase;
  5. class CsvEncodingTest extends TestCase
  6. {
  7. /**
  8. * @dataProvider providerEncodings
  9. *
  10. * @param string $filename
  11. * @param string $encoding
  12. */
  13. public function testEncodings($filename, $encoding): void
  14. {
  15. $reader = new Csv();
  16. $reader->setInputEncoding($encoding);
  17. $spreadsheet = $reader->load($filename);
  18. $sheet = $spreadsheet->getActiveSheet();
  19. self::assertEquals('Å', $sheet->getCell('A1')->getValue());
  20. }
  21. /**
  22. * @dataProvider providerEncodings
  23. *
  24. * @param string $filename
  25. * @param string $encoding
  26. */
  27. public function testWorkSheetInfo($filename, $encoding): void
  28. {
  29. $reader = new Csv();
  30. $reader->setInputEncoding($encoding);
  31. $info = $reader->listWorksheetInfo($filename);
  32. self::assertEquals('Worksheet', $info[0]['worksheetName']);
  33. self::assertEquals('B', $info[0]['lastColumnLetter']);
  34. self::assertEquals(1, $info[0]['lastColumnIndex']);
  35. self::assertEquals(2, $info[0]['totalRows']);
  36. self::assertEquals(2, $info[0]['totalColumns']);
  37. }
  38. public function providerEncodings(): array
  39. {
  40. return [
  41. ['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'],
  42. ['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'],
  43. ['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'],
  44. ['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'],
  45. ['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'],
  46. ['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'],
  47. ['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'],
  48. ];
  49. }
  50. /**
  51. * @dataProvider providerGuessEncoding
  52. */
  53. public function testGuessEncoding(string $filename): void
  54. {
  55. $reader = new Csv();
  56. $reader->setInputEncoding(Csv::guessEncoding($filename));
  57. $spreadsheet = $reader->load($filename);
  58. $sheet = $spreadsheet->getActiveSheet();
  59. self::assertEquals('première', $sheet->getCell('A1')->getValue());
  60. self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
  61. }
  62. public function testSurrogate(): void
  63. {
  64. // Surrogates should occur only in UTF-16, and should
  65. // be properly converted to UTF8 when read.
  66. // FFFE/FFFF are illegal, and should be converted to
  67. // substitution character when read.
  68. // Excel does not handle any of the cells in row 3 well.
  69. // LibreOffice handles A3 fine, and discards B3/C3,
  70. // which is a reasonable action.
  71. $filename = 'tests/data/Reader/CSV/premiere.utf16le.csv';
  72. $reader = new Csv();
  73. $reader->setInputEncoding(Csv::guessEncoding($filename));
  74. $spreadsheet = $reader->load($filename);
  75. $sheet = $spreadsheet->getActiveSheet();
  76. self::assertEquals('𐐀', $sheet->getCell('A3')->getValue());
  77. self::assertEquals('�', $sheet->getCell('B3')->getValue());
  78. self::assertEquals('�', $sheet->getCell('C3')->getValue());
  79. }
  80. /**
  81. * @dataProvider providerGuessEncoding
  82. */
  83. public function testFallbackEncoding(string $filename): void
  84. {
  85. $reader = new Csv();
  86. $reader->setInputEncoding(Csv::GUESS_ENCODING);
  87. $spreadsheet = $reader->load($filename);
  88. $sheet = $spreadsheet->getActiveSheet();
  89. self::assertEquals('première', $sheet->getCell('A1')->getValue());
  90. self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
  91. }
  92. public function providerGuessEncoding(): array
  93. {
  94. return [
  95. ['tests/data/Reader/CSV/premiere.utf8.csv'],
  96. ['tests/data/Reader/CSV/premiere.utf8bom.csv'],
  97. ['tests/data/Reader/CSV/premiere.utf16be.csv'],
  98. ['tests/data/Reader/CSV/premiere.utf16bebom.csv'],
  99. ['tests/data/Reader/CSV/premiere.utf16le.csv'],
  100. ['tests/data/Reader/CSV/premiere.utf16lebom.csv'],
  101. ['tests/data/Reader/CSV/premiere.utf32be.csv'],
  102. ['tests/data/Reader/CSV/premiere.utf32bebom.csv'],
  103. ['tests/data/Reader/CSV/premiere.utf32le.csv'],
  104. ['tests/data/Reader/CSV/premiere.utf32lebom.csv'],
  105. ['tests/data/Reader/CSV/premiere.win1252.csv'],
  106. ];
  107. }
  108. public function testGuessEncodingDefltIso2(): void
  109. {
  110. $filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
  111. $reader = new Csv();
  112. $reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2'));
  113. $spreadsheet = $reader->load($filename);
  114. $sheet = $spreadsheet->getActiveSheet();
  115. self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
  116. self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
  117. }
  118. public function testFallbackEncodingDefltIso2(): void
  119. {
  120. $filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
  121. $reader = new Csv();
  122. self::assertSame('CP1252', $reader->getFallbackEncoding());
  123. $reader->setInputEncoding(Csv::GUESS_ENCODING);
  124. $reader->setFallbackEncoding('ISO-8859-2');
  125. $spreadsheet = $reader->load($filename);
  126. $sheet = $spreadsheet->getActiveSheet();
  127. self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
  128. self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
  129. }
  130. }