workbook-reader.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. "use strict";
  2. const fs = require('fs');
  3. const {
  4. EventEmitter
  5. } = require('events');
  6. const {
  7. PassThrough,
  8. Readable
  9. } = require('readable-stream');
  10. const nodeStream = require('stream');
  11. const unzip = require('unzipper');
  12. const tmp = require('tmp');
  13. const iterateStream = require('../../utils/iterate-stream');
  14. const parseSax = require('../../utils/parse-sax');
  15. const StyleManager = require('../../xlsx/xform/style/styles-xform');
  16. const WorkbookXform = require('../../xlsx/xform/book/workbook-xform');
  17. const RelationshipsXform = require('../../xlsx/xform/core/relationships-xform');
  18. const WorksheetReader = require('./worksheet-reader');
  19. const HyperlinkReader = require('./hyperlink-reader');
  20. tmp.setGracefulCleanup();
  21. class WorkbookReader extends EventEmitter {
  22. constructor(input) {
  23. let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
  24. super();
  25. this.input = input;
  26. this.options = {
  27. worksheets: 'emit',
  28. sharedStrings: 'cache',
  29. hyperlinks: 'ignore',
  30. styles: 'ignore',
  31. entries: 'ignore',
  32. ...options
  33. };
  34. this.styles = new StyleManager();
  35. this.styles.init();
  36. }
  37. _getStream(input) {
  38. if (input instanceof nodeStream.Readable || input instanceof Readable) {
  39. return input;
  40. }
  41. if (typeof input === 'string') {
  42. return fs.createReadStream(input);
  43. }
  44. throw new Error(`Could not recognise input: ${input}`);
  45. }
  46. async read(input, options) {
  47. try {
  48. for await (const {
  49. eventType,
  50. value
  51. } of this.parse(input, options)) {
  52. switch (eventType) {
  53. case 'shared-strings':
  54. this.emit(eventType, value);
  55. break;
  56. case 'worksheet':
  57. this.emit(eventType, value);
  58. await value.read();
  59. break;
  60. case 'hyperlinks':
  61. this.emit(eventType, value);
  62. break;
  63. }
  64. }
  65. this.emit('end');
  66. this.emit('finished');
  67. } catch (error) {
  68. this.emit('error', error);
  69. }
  70. }
  71. async *[Symbol.asyncIterator]() {
  72. for await (const {
  73. eventType,
  74. value
  75. } of this.parse()) {
  76. if (eventType === 'worksheet') {
  77. yield value;
  78. }
  79. }
  80. }
  81. async *parse(input, options) {
  82. if (options) this.options = options;
  83. const stream = this.stream = this._getStream(input || this.input);
  84. const zip = unzip.Parse({
  85. forceStream: true
  86. });
  87. stream.pipe(zip);
  88. // worksheets, deferred for parsing after shared strings reading
  89. const waitingWorkSheets = [];
  90. for await (const entry of iterateStream(zip)) {
  91. let match;
  92. let sheetNo;
  93. switch (entry.path) {
  94. case '_rels/.rels':
  95. break;
  96. case 'xl/_rels/workbook.xml.rels':
  97. await this._parseRels(entry);
  98. break;
  99. case 'xl/workbook.xml':
  100. await this._parseWorkbook(entry);
  101. break;
  102. case 'xl/sharedStrings.xml':
  103. yield* this._parseSharedStrings(entry);
  104. break;
  105. case 'xl/styles.xml':
  106. await this._parseStyles(entry);
  107. break;
  108. default:
  109. if (entry.path.match(/xl\/worksheets\/sheet\d+[.]xml/)) {
  110. match = entry.path.match(/xl\/worksheets\/sheet(\d+)[.]xml/);
  111. sheetNo = match[1];
  112. if (this.sharedStrings && this.workbookRels) {
  113. yield* this._parseWorksheet(iterateStream(entry), sheetNo);
  114. } else {
  115. // create temp file for each worksheet
  116. await new Promise((resolve, reject) => {
  117. tmp.file((err, path, fd, tempFileCleanupCallback) => {
  118. if (err) {
  119. return reject(err);
  120. }
  121. waitingWorkSheets.push({
  122. sheetNo,
  123. path,
  124. tempFileCleanupCallback
  125. });
  126. const tempStream = fs.createWriteStream(path);
  127. tempStream.on('error', reject);
  128. entry.pipe(tempStream);
  129. return tempStream.on('finish', () => {
  130. return resolve();
  131. });
  132. });
  133. });
  134. }
  135. } else if (entry.path.match(/xl\/worksheets\/_rels\/sheet\d+[.]xml.rels/)) {
  136. match = entry.path.match(/xl\/worksheets\/_rels\/sheet(\d+)[.]xml.rels/);
  137. sheetNo = match[1];
  138. yield* this._parseHyperlinks(iterateStream(entry), sheetNo);
  139. }
  140. break;
  141. }
  142. entry.autodrain();
  143. }
  144. for (const {
  145. sheetNo,
  146. path,
  147. tempFileCleanupCallback
  148. } of waitingWorkSheets) {
  149. let fileStream = fs.createReadStream(path);
  150. // TODO: Remove once node v8 is deprecated
  151. // Detect and upgrade old fileStreams
  152. if (!fileStream[Symbol.asyncIterator]) {
  153. fileStream = fileStream.pipe(new PassThrough());
  154. }
  155. yield* this._parseWorksheet(fileStream, sheetNo);
  156. tempFileCleanupCallback();
  157. }
  158. }
  159. _emitEntry(payload) {
  160. if (this.options.entries === 'emit') {
  161. this.emit('entry', payload);
  162. }
  163. }
  164. async _parseRels(entry) {
  165. const xform = new RelationshipsXform();
  166. this.workbookRels = await xform.parseStream(iterateStream(entry));
  167. }
  168. async _parseWorkbook(entry) {
  169. this._emitEntry({
  170. type: 'workbook'
  171. });
  172. const workbook = new WorkbookXform();
  173. await workbook.parseStream(iterateStream(entry));
  174. this.properties = workbook.map.workbookPr;
  175. this.model = workbook.model;
  176. }
  177. async *_parseSharedStrings(entry) {
  178. this._emitEntry({
  179. type: 'shared-strings'
  180. });
  181. switch (this.options.sharedStrings) {
  182. case 'cache':
  183. this.sharedStrings = [];
  184. break;
  185. case 'emit':
  186. break;
  187. default:
  188. return;
  189. }
  190. let text = null;
  191. let richText = [];
  192. let index = 0;
  193. let font = null;
  194. for await (const events of parseSax(iterateStream(entry))) {
  195. for (const {
  196. eventType,
  197. value
  198. } of events) {
  199. if (eventType === 'opentag') {
  200. const node = value;
  201. switch (node.name) {
  202. case 'b':
  203. font = font || {};
  204. font.bold = true;
  205. break;
  206. case 'charset':
  207. font = font || {};
  208. font.charset = parseInt(node.attributes.charset, 10);
  209. break;
  210. case 'color':
  211. font = font || {};
  212. font.color = {};
  213. if (node.attributes.rgb) {
  214. font.color.argb = node.attributes.argb;
  215. }
  216. if (node.attributes.val) {
  217. font.color.argb = node.attributes.val;
  218. }
  219. if (node.attributes.theme) {
  220. font.color.theme = node.attributes.theme;
  221. }
  222. break;
  223. case 'family':
  224. font = font || {};
  225. font.family = parseInt(node.attributes.val, 10);
  226. break;
  227. case 'i':
  228. font = font || {};
  229. font.italic = true;
  230. break;
  231. case 'outline':
  232. font = font || {};
  233. font.outline = true;
  234. break;
  235. case 'rFont':
  236. font = font || {};
  237. font.name = node.value;
  238. break;
  239. case 'si':
  240. font = null;
  241. richText = [];
  242. text = null;
  243. break;
  244. case 'sz':
  245. font = font || {};
  246. font.size = parseInt(node.attributes.val, 10);
  247. break;
  248. case 'strike':
  249. break;
  250. case 't':
  251. text = null;
  252. break;
  253. case 'u':
  254. font = font || {};
  255. font.underline = true;
  256. break;
  257. case 'vertAlign':
  258. font = font || {};
  259. font.vertAlign = node.attributes.val;
  260. break;
  261. }
  262. } else if (eventType === 'text') {
  263. text = text ? text + value : value;
  264. } else if (eventType === 'closetag') {
  265. const node = value;
  266. switch (node.name) {
  267. case 'r':
  268. richText.push({
  269. font,
  270. text
  271. });
  272. font = null;
  273. text = null;
  274. break;
  275. case 'si':
  276. if (this.options.sharedStrings === 'cache') {
  277. this.sharedStrings.push(richText.length ? {
  278. richText
  279. } : text);
  280. } else if (this.options.sharedStrings === 'emit') {
  281. yield {
  282. index: index++,
  283. text: richText.length ? {
  284. richText
  285. } : text
  286. };
  287. }
  288. richText = [];
  289. font = null;
  290. text = null;
  291. break;
  292. }
  293. }
  294. }
  295. }
  296. }
  297. async _parseStyles(entry) {
  298. this._emitEntry({
  299. type: 'styles'
  300. });
  301. if (this.options.styles === 'cache') {
  302. this.styles = new StyleManager();
  303. await this.styles.parseStream(iterateStream(entry));
  304. }
  305. }
  306. *_parseWorksheet(iterator, sheetNo) {
  307. this._emitEntry({
  308. type: 'worksheet',
  309. id: sheetNo
  310. });
  311. const worksheetReader = new WorksheetReader({
  312. workbook: this,
  313. id: sheetNo,
  314. iterator,
  315. options: this.options
  316. });
  317. const matchingRel = (this.workbookRels || []).find(rel => rel.Target === `worksheets/sheet${sheetNo}.xml`);
  318. const matchingSheet = matchingRel && (this.model.sheets || []).find(sheet => sheet.rId === matchingRel.Id);
  319. if (matchingSheet) {
  320. worksheetReader.id = matchingSheet.id;
  321. worksheetReader.name = matchingSheet.name;
  322. worksheetReader.state = matchingSheet.state;
  323. }
  324. if (this.options.worksheets === 'emit') {
  325. yield {
  326. eventType: 'worksheet',
  327. value: worksheetReader
  328. };
  329. }
  330. }
  331. *_parseHyperlinks(iterator, sheetNo) {
  332. this._emitEntry({
  333. type: 'hyperlinks',
  334. id: sheetNo
  335. });
  336. const hyperlinksReader = new HyperlinkReader({
  337. workbook: this,
  338. id: sheetNo,
  339. iterator,
  340. options: this.options
  341. });
  342. if (this.options.hyperlinks === 'emit') {
  343. yield {
  344. eventType: 'hyperlinks',
  345. value: hyperlinksReader
  346. };
  347. }
  348. }
  349. }
  350. // for reference - these are the valid values for options
  351. WorkbookReader.Options = {
  352. worksheets: ['emit', 'ignore'],
  353. sharedStrings: ['cache', 'emit', 'ignore'],
  354. hyperlinks: ['cache', 'emit', 'ignore'],
  355. styles: ['cache', 'ignore'],
  356. entries: ['emit', 'ignore']
  357. };
  358. module.exports = WorkbookReader;
  359. //# sourceMappingURL=workbook-reader.js.map