worksheet-reader.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. const {EventEmitter} = require('events');
  2. const parseSax = require('../../utils/parse-sax');
  3. const _ = require('../../utils/under-dash');
  4. const utils = require('../../utils/utils');
  5. const colCache = require('../../utils/col-cache');
  6. const Dimensions = require('../../doc/range');
  7. const Row = require('../../doc/row');
  8. const Column = require('../../doc/column');
  9. class WorksheetReader extends EventEmitter {
  10. constructor({workbook, id, iterator, options}) {
  11. super();
  12. this.workbook = workbook;
  13. this.id = id;
  14. this.iterator = iterator;
  15. this.options = options || {};
  16. // and a name
  17. this.name = `Sheet${this.id}`;
  18. // column definitions
  19. this._columns = null;
  20. this._keys = {};
  21. // keep a record of dimensions
  22. this._dimensions = new Dimensions();
  23. }
  24. // destroy - not a valid operation for a streaming writer
  25. // even though some streamers might be able to, it's a bad idea.
  26. destroy() {
  27. throw new Error('Invalid Operation: destroy');
  28. }
  29. // return the current dimensions of the writer
  30. get dimensions() {
  31. return this._dimensions;
  32. }
  33. // =========================================================================
  34. // Columns
  35. // get the current columns array.
  36. get columns() {
  37. return this._columns;
  38. }
  39. // get a single column by col number. If it doesn't exist, it and any gaps before it
  40. // are created.
  41. getColumn(c) {
  42. if (typeof c === 'string') {
  43. // if it matches a key'd column, return that
  44. const col = this._keys[c];
  45. if (col) {
  46. return col;
  47. }
  48. // otherise, assume letter
  49. c = colCache.l2n(c);
  50. }
  51. if (!this._columns) {
  52. this._columns = [];
  53. }
  54. if (c > this._columns.length) {
  55. let n = this._columns.length + 1;
  56. while (n <= c) {
  57. this._columns.push(new Column(this, n++));
  58. }
  59. }
  60. return this._columns[c - 1];
  61. }
  62. getColumnKey(key) {
  63. return this._keys[key];
  64. }
  65. setColumnKey(key, value) {
  66. this._keys[key] = value;
  67. }
  68. deleteColumnKey(key) {
  69. delete this._keys[key];
  70. }
  71. eachColumnKey(f) {
  72. _.each(this._keys, f);
  73. }
  74. async read() {
  75. try {
  76. for await (const events of this.parse()) {
  77. for (const {eventType, value} of events) {
  78. this.emit(eventType, value);
  79. }
  80. }
  81. this.emit('finished');
  82. } catch (error) {
  83. this.emit('error', error);
  84. }
  85. }
  86. async *[Symbol.asyncIterator]() {
  87. for await (const events of this.parse()) {
  88. for (const {eventType, value} of events) {
  89. if (eventType === 'row') {
  90. yield value;
  91. }
  92. }
  93. }
  94. }
  95. async *parse() {
  96. const {iterator, options} = this;
  97. let emitSheet = false;
  98. let emitHyperlinks = false;
  99. let hyperlinks = null;
  100. switch (options.worksheets) {
  101. case 'emit':
  102. emitSheet = true;
  103. break;
  104. case 'prep':
  105. break;
  106. default:
  107. break;
  108. }
  109. switch (options.hyperlinks) {
  110. case 'emit':
  111. emitHyperlinks = true;
  112. break;
  113. case 'cache':
  114. this.hyperlinks = hyperlinks = {};
  115. break;
  116. default:
  117. break;
  118. }
  119. if (!emitSheet && !emitHyperlinks && !hyperlinks) {
  120. return;
  121. }
  122. // references
  123. const {sharedStrings, styles, properties} = this.workbook;
  124. // xml position
  125. let inCols = false;
  126. let inRows = false;
  127. let inHyperlinks = false;
  128. // parse state
  129. let cols = null;
  130. let row = null;
  131. let c = null;
  132. let current = null;
  133. for await (const events of parseSax(iterator)) {
  134. const worksheetEvents = [];
  135. for (const {eventType, value} of events) {
  136. if (eventType === 'opentag') {
  137. const node = value;
  138. if (emitSheet) {
  139. switch (node.name) {
  140. case 'cols':
  141. inCols = true;
  142. cols = [];
  143. break;
  144. case 'sheetData':
  145. inRows = true;
  146. break;
  147. case 'col':
  148. if (inCols) {
  149. cols.push({
  150. min: parseInt(node.attributes.min, 10),
  151. max: parseInt(node.attributes.max, 10),
  152. width: parseFloat(node.attributes.width),
  153. styleId: parseInt(node.attributes.style || '0', 10),
  154. });
  155. }
  156. break;
  157. case 'row':
  158. if (inRows) {
  159. const r = parseInt(node.attributes.r, 10);
  160. row = new Row(this, r);
  161. if (node.attributes.ht) {
  162. row.height = parseFloat(node.attributes.ht);
  163. }
  164. if (node.attributes.s) {
  165. const styleId = parseInt(node.attributes.s, 10);
  166. const style = styles.getStyleModel(styleId);
  167. if (style) {
  168. row.style = style;
  169. }
  170. }
  171. }
  172. break;
  173. case 'c':
  174. if (row) {
  175. c = {
  176. ref: node.attributes.r,
  177. s: parseInt(node.attributes.s, 10),
  178. t: node.attributes.t,
  179. };
  180. }
  181. break;
  182. case 'f':
  183. if (c) {
  184. current = c.f = {text: ''};
  185. }
  186. break;
  187. case 'v':
  188. if (c) {
  189. current = c.v = {text: ''};
  190. }
  191. break;
  192. case 'is':
  193. case 't':
  194. if (c) {
  195. current = c.v = {text: ''};
  196. }
  197. break;
  198. case 'mergeCell':
  199. break;
  200. default:
  201. break;
  202. }
  203. }
  204. // =================================================================
  205. //
  206. if (emitHyperlinks || hyperlinks) {
  207. switch (node.name) {
  208. case 'hyperlinks':
  209. inHyperlinks = true;
  210. break;
  211. case 'hyperlink':
  212. if (inHyperlinks) {
  213. const hyperlink = {
  214. ref: node.attributes.ref,
  215. rId: node.attributes['r:id'],
  216. };
  217. if (emitHyperlinks) {
  218. worksheetEvents.push({eventType: 'hyperlink', value: hyperlink});
  219. } else {
  220. hyperlinks[hyperlink.ref] = hyperlink;
  221. }
  222. }
  223. break;
  224. default:
  225. break;
  226. }
  227. }
  228. } else if (eventType === 'text') {
  229. // only text data is for sheet values
  230. if (emitSheet) {
  231. if (current) {
  232. current.text += value;
  233. }
  234. }
  235. } else if (eventType === 'closetag') {
  236. const node = value;
  237. if (emitSheet) {
  238. switch (node.name) {
  239. case 'cols':
  240. inCols = false;
  241. this._columns = Column.fromModel(cols);
  242. break;
  243. case 'sheetData':
  244. inRows = false;
  245. break;
  246. case 'row':
  247. this._dimensions.expandRow(row);
  248. worksheetEvents.push({eventType: 'row', value: row});
  249. row = null;
  250. break;
  251. case 'c':
  252. if (row && c) {
  253. const address = colCache.decodeAddress(c.ref);
  254. const cell = row.getCell(address.col);
  255. if (c.s) {
  256. const style = styles.getStyleModel(c.s);
  257. if (style) {
  258. cell.style = style;
  259. }
  260. }
  261. if (c.f) {
  262. const cellValue = {
  263. formula: c.f.text,
  264. };
  265. if (c.v) {
  266. if (c.t === 'str') {
  267. cellValue.result = utils.xmlDecode(c.v.text);
  268. } else {
  269. cellValue.result = parseFloat(c.v.text);
  270. }
  271. }
  272. cell.value = cellValue;
  273. } else if (c.v) {
  274. switch (c.t) {
  275. case 's': {
  276. const index = parseInt(c.v.text, 10);
  277. if (sharedStrings) {
  278. cell.value = sharedStrings[index];
  279. } else {
  280. cell.value = {
  281. sharedString: index,
  282. };
  283. }
  284. break;
  285. }
  286. case 'inlineStr':
  287. case 'str':
  288. cell.value = utils.xmlDecode(c.v.text);
  289. break;
  290. case 'e':
  291. cell.value = {error: c.v.text};
  292. break;
  293. case 'b':
  294. cell.value = parseInt(c.v.text, 10) !== 0;
  295. break;
  296. default:
  297. if (utils.isDateFmt(cell.numFmt)) {
  298. cell.value = utils.excelToDate(
  299. parseFloat(c.v.text),
  300. properties.model && properties.model.date1904
  301. );
  302. } else {
  303. cell.value = parseFloat(c.v.text);
  304. }
  305. break;
  306. }
  307. }
  308. if (hyperlinks) {
  309. const hyperlink = hyperlinks[c.ref];
  310. if (hyperlink) {
  311. cell.text = cell.value;
  312. cell.value = undefined;
  313. cell.hyperlink = hyperlink;
  314. }
  315. }
  316. c = null;
  317. }
  318. break;
  319. default:
  320. break;
  321. }
  322. }
  323. if (emitHyperlinks || hyperlinks) {
  324. switch (node.name) {
  325. case 'hyperlinks':
  326. inHyperlinks = false;
  327. break;
  328. default:
  329. break;
  330. }
  331. }
  332. }
  333. }
  334. if (worksheetEvents.length > 0) {
  335. yield worksheetEvents;
  336. }
  337. }
  338. }
  339. }
  340. module.exports = WorksheetReader;