| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- const fs = require('fs');
- const {EventEmitter} = require('events');
- const {PassThrough, Readable} = require('readable-stream');
- const nodeStream = require('stream');
- const unzip = require('unzipper');
- const tmp = require('tmp');
- const iterateStream = require('../../utils/iterate-stream');
- const parseSax = require('../../utils/parse-sax');
- const StyleManager = require('../../xlsx/xform/style/styles-xform');
- const WorkbookXform = require('../../xlsx/xform/book/workbook-xform');
- const RelationshipsXform = require('../../xlsx/xform/core/relationships-xform');
- const WorksheetReader = require('./worksheet-reader');
- const HyperlinkReader = require('./hyperlink-reader');
- tmp.setGracefulCleanup();
- class WorkbookReader extends EventEmitter {
- constructor(input, options = {}) {
- super();
- this.input = input;
- this.options = {
- worksheets: 'emit',
- sharedStrings: 'cache',
- hyperlinks: 'ignore',
- styles: 'ignore',
- entries: 'ignore',
- ...options,
- };
- this.styles = new StyleManager();
- this.styles.init();
- }
- _getStream(input) {
- if (input instanceof nodeStream.Readable || input instanceof Readable) {
- return input;
- }
- if (typeof input === 'string') {
- return fs.createReadStream(input);
- }
- throw new Error(`Could not recognise input: ${input}`);
- }
- async read(input, options) {
- try {
- for await (const {eventType, value} of this.parse(input, options)) {
- switch (eventType) {
- case 'shared-strings':
- this.emit(eventType, value);
- break;
- case 'worksheet':
- this.emit(eventType, value);
- await value.read();
- break;
- case 'hyperlinks':
- this.emit(eventType, value);
- break;
- }
- }
- this.emit('end');
- this.emit('finished');
- } catch (error) {
- this.emit('error', error);
- }
- }
- async *[Symbol.asyncIterator]() {
- for await (const {eventType, value} of this.parse()) {
- if (eventType === 'worksheet') {
- yield value;
- }
- }
- }
- async *parse(input, options) {
- if (options) this.options = options;
- const stream = (this.stream = this._getStream(input || this.input));
- const zip = unzip.Parse({forceStream: true});
- stream.pipe(zip);
- // worksheets, deferred for parsing after shared strings reading
- const waitingWorkSheets = [];
- for await (const entry of iterateStream(zip)) {
- let match;
- let sheetNo;
- switch (entry.path) {
- case '_rels/.rels':
- break;
- case 'xl/_rels/workbook.xml.rels':
- await this._parseRels(entry);
- break;
- case 'xl/workbook.xml':
- await this._parseWorkbook(entry);
- break;
- case 'xl/sharedStrings.xml':
- yield* this._parseSharedStrings(entry);
- break;
- case 'xl/styles.xml':
- await this._parseStyles(entry);
- break;
- default:
- if (entry.path.match(/xl\/worksheets\/sheet\d+[.]xml/)) {
- match = entry.path.match(/xl\/worksheets\/sheet(\d+)[.]xml/);
- sheetNo = match[1];
- if (this.sharedStrings && this.workbookRels) {
- yield* this._parseWorksheet(iterateStream(entry), sheetNo);
- } else {
- // create temp file for each worksheet
- await new Promise((resolve, reject) => {
- tmp.file((err, path, fd, tempFileCleanupCallback) => {
- if (err) {
- return reject(err);
- }
- waitingWorkSheets.push({sheetNo, path, tempFileCleanupCallback});
- const tempStream = fs.createWriteStream(path);
- tempStream.on('error', reject);
- entry.pipe(tempStream);
- return tempStream.on('finish', () => {
- return resolve();
- });
- });
- });
- }
- } else if (entry.path.match(/xl\/worksheets\/_rels\/sheet\d+[.]xml.rels/)) {
- match = entry.path.match(/xl\/worksheets\/_rels\/sheet(\d+)[.]xml.rels/);
- sheetNo = match[1];
- yield* this._parseHyperlinks(iterateStream(entry), sheetNo);
- }
- break;
- }
- entry.autodrain();
- }
- for (const {sheetNo, path, tempFileCleanupCallback} of waitingWorkSheets) {
- let fileStream = fs.createReadStream(path);
- // TODO: Remove once node v8 is deprecated
- // Detect and upgrade old fileStreams
- if (!fileStream[Symbol.asyncIterator]) {
- fileStream = fileStream.pipe(new PassThrough());
- }
- yield* this._parseWorksheet(fileStream, sheetNo);
- tempFileCleanupCallback();
- }
- }
- _emitEntry(payload) {
- if (this.options.entries === 'emit') {
- this.emit('entry', payload);
- }
- }
- async _parseRels(entry) {
- const xform = new RelationshipsXform();
- this.workbookRels = await xform.parseStream(iterateStream(entry));
- }
- async _parseWorkbook(entry) {
- this._emitEntry({type: 'workbook'});
- const workbook = new WorkbookXform();
- await workbook.parseStream(iterateStream(entry));
- this.properties = workbook.map.workbookPr;
- this.model = workbook.model;
- }
- async *_parseSharedStrings(entry) {
- this._emitEntry({type: 'shared-strings'});
- switch (this.options.sharedStrings) {
- case 'cache':
- this.sharedStrings = [];
- break;
- case 'emit':
- break;
- default:
- return;
- }
- let text = null;
- let richText = [];
- let index = 0;
- let font = null;
- for await (const events of parseSax(iterateStream(entry))) {
- for (const {eventType, value} of events) {
- if (eventType === 'opentag') {
- const node = value;
- switch (node.name) {
- case 'b':
- font = font || {};
- font.bold = true;
- break;
- case 'charset':
- font = font || {};
- font.charset = parseInt(node.attributes.charset, 10);
- break;
- case 'color':
- font = font || {};
- font.color = {};
- if (node.attributes.rgb) {
- font.color.argb = node.attributes.argb;
- }
- if (node.attributes.val) {
- font.color.argb = node.attributes.val;
- }
- if (node.attributes.theme) {
- font.color.theme = node.attributes.theme;
- }
- break;
- case 'family':
- font = font || {};
- font.family = parseInt(node.attributes.val, 10);
- break;
- case 'i':
- font = font || {};
- font.italic = true;
- break;
- case 'outline':
- font = font || {};
- font.outline = true;
- break;
- case 'rFont':
- font = font || {};
- font.name = node.value;
- break;
- case 'si':
- font = null;
- richText = [];
- text = null;
- break;
- case 'sz':
- font = font || {};
- font.size = parseInt(node.attributes.val, 10);
- break;
- case 'strike':
- break;
- case 't':
- text = null;
- break;
- case 'u':
- font = font || {};
- font.underline = true;
- break;
- case 'vertAlign':
- font = font || {};
- font.vertAlign = node.attributes.val;
- break;
- }
- } else if (eventType === 'text') {
- text = text ? text + value : value;
- } else if (eventType === 'closetag') {
- const node = value;
- switch (node.name) {
- case 'r':
- richText.push({
- font,
- text,
- });
- font = null;
- text = null;
- break;
- case 'si':
- if (this.options.sharedStrings === 'cache') {
- this.sharedStrings.push(richText.length ? {richText} : text);
- } else if (this.options.sharedStrings === 'emit') {
- yield {index: index++, text: richText.length ? {richText} : text};
- }
- richText = [];
- font = null;
- text = null;
- break;
- }
- }
- }
- }
- }
- async _parseStyles(entry) {
- this._emitEntry({type: 'styles'});
- if (this.options.styles === 'cache') {
- this.styles = new StyleManager();
- await this.styles.parseStream(iterateStream(entry));
- }
- }
- *_parseWorksheet(iterator, sheetNo) {
- this._emitEntry({type: 'worksheet', id: sheetNo});
- const worksheetReader = new WorksheetReader({
- workbook: this,
- id: sheetNo,
- iterator,
- options: this.options,
- });
- const matchingRel = (this.workbookRels || []).find(rel => rel.Target === `worksheets/sheet${sheetNo}.xml`);
- const matchingSheet = matchingRel && (this.model.sheets || []).find(sheet => sheet.rId === matchingRel.Id);
- if (matchingSheet) {
- worksheetReader.id = matchingSheet.id;
- worksheetReader.name = matchingSheet.name;
- worksheetReader.state = matchingSheet.state;
- }
- if (this.options.worksheets === 'emit') {
- yield {eventType: 'worksheet', value: worksheetReader};
- }
- }
- *_parseHyperlinks(iterator, sheetNo) {
- this._emitEntry({type: 'hyperlinks', id: sheetNo});
- const hyperlinksReader = new HyperlinkReader({
- workbook: this,
- id: sheetNo,
- iterator,
- options: this.options,
- });
- if (this.options.hyperlinks === 'emit') {
- yield {eventType: 'hyperlinks', value: hyperlinksReader};
- }
- }
- }
- // for reference - these are the valid values for options
- WorkbookReader.Options = {
- worksheets: ['emit', 'ignore'],
- sharedStrings: ['cache', 'emit', 'ignore'],
- hyperlinks: ['cache', 'emit', 'ignore'],
- styles: ['cache', 'ignore'],
- entries: ['emit', 'ignore'],
- };
- module.exports = WorkbookReader;
|