diff --git a/CMakeLists.txt b/CMakeLists.txt --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,6 +120,23 @@ option(GHNS "support Get Hot New Stuff" OFF) option(PACKAGERS_BUILD "Build support of multiple CPU architectures in one binary. Should be used by packagers only." ON) +# MSOOXML filter options +if (MSOOXML_MAX_SPREADSHEET_COLS) + add_definitions( -DMSOOXML_MAX_SPREADSHEET_COLS=${MSOOXML_MAX_SPREADSHEET_COLS} ) +endif() +if (MSOOXML_MAX_SPREADSHEET_COLS) + add_definitions( -DMSOOXML_MAX_SPREADSHEET_COLS=${MSOOXML_MAX_SPREADSHEET_COLS} ) +endif() +if (MSOOXML_MAX_SPREADSHEET_ROWS) + add_definitions( -DMSOOXML_MAX_SPREADSHEET_ROWS=${MSOOXML_MAX_SPREADSHEET_ROWS} ) +endif() +if (MSOOXML_SPREADSHEET_CONTENT_BORDER) + add_definitions( -DMSOOXML_SPREADSHEET_CONTENT_BORDER=${MSOOXML_SPREADSHEET_CONTENT_BORDER} ) +endif() +if (MSOOXML_IMPORT_BY_VALUES) + add_definitions( -DMSOOXML_IMPORT_BY_VALUES=${MSOOXML_IMPORT_BY_VALUES} ) +endif() + ####################### ######################## ## Productset setting ## diff --git a/filters/libmsooxml/MsooXmlGlobal.h b/filters/libmsooxml/MsooXmlGlobal.h --- a/filters/libmsooxml/MsooXmlGlobal.h +++ b/filters/libmsooxml/MsooXmlGlobal.h @@ -32,12 +32,21 @@ namespace MSOOXML { -//! @return maximum number of spreadsheet columns per worksheet. Currently set to 32767, which is Calligra Sheets maximum. +//! @return maximum number of spreadsheet columns per worksheet. By default this is set to 32767, which is Calligra Sheets maximum. //! See http://en.wikipedia.org/wiki/OpenOffice.org_Calc#Specifications -KOMSOOXML_EXPORT unsigned int maximumSpreadsheetColumns(); +KOMSOOXML_EXPORT int maximumSpreadsheetColumns(); -//! @return maximum number of spreadsheet rows per worksheet. Currently set to 32767, which is Calligra Sheets maximum. -KOMSOOXML_EXPORT unsigned int maximumSpreadsheetRows(); +//! @return maximum number of spreadsheet rows per worksheet. By default this is set to 32767, which is Calligra Sheets maximum. +KOMSOOXML_EXPORT int maximumSpreadsheetRows(); + +//! @return true if the loaded spreadsheet will have its dimensions reduced to the minimum required to contain the cells with contents, plus a border. This can be a useful optimisation, but the default is false. +KOMSOOXML_EXPORT bool reduceSpreadsheetDimensionsToContent(); + +//! @return the size of the border in cells to place around the content. This applies only if reduceSpreadsheetDimensionsToContent() is true. +KOMSOOXML_EXPORT int spreadsheetContentBorder(); + +//! @return true if cell values are prefered over formulae when loading the spreadsheet. Default value is false. +KOMSOOXML_EXPORT bool byValuesSpreadsheet(); } // MSOOXML namespace diff --git a/filters/libmsooxml/MsooXmlGlobal.cpp b/filters/libmsooxml/MsooXmlGlobal.cpp --- a/filters/libmsooxml/MsooXmlGlobal.cpp +++ b/filters/libmsooxml/MsooXmlGlobal.cpp @@ -25,12 +25,43 @@ #include "MsooXmlGlobal.h" -KOMSOOXML_EXPORT unsigned int MSOOXML::maximumSpreadsheetColumns() +#ifndef MSOOXML_MAX_SPREADSHEET_COLS +#define MSOOXML_MAX_SPREADSHEET_COLS (0x7FFF) +#endif + +#ifndef MSOOXML_MAX_SPREADSHEET_ROWS +#define MSOOXML_MAX_SPREADSHEET_ROWS (0xFFFFF) +#endif + +#ifndef MSOOXML_SPREADSHEET_CONTENT_BORDER +#define MSOOXML_SPREADSHEET_CONTENT_BORDER (-1) +#endif + +#ifndef MSOOXML_IMPORT_BY_VALUES +#define MSOOXML_IMPORT_BY_VALUES (false) +#endif + +KOMSOOXML_EXPORT int MSOOXML::maximumSpreadsheetColumns() +{ + return MSOOXML_MAX_SPREADSHEET_COLS; +} + +KOMSOOXML_EXPORT int MSOOXML::maximumSpreadsheetRows() +{ + return MSOOXML_MAX_SPREADSHEET_ROWS; +} + +KOMSOOXML_EXPORT bool MSOOXML::reduceSpreadsheetDimensionsToContent() +{ + return (MSOOXML_SPREADSHEET_CONTENT_BORDER >= 0); +} + +KOMSOOXML_EXPORT int MSOOXML::spreadsheetContentBorder() { - return 0x7FFF; + return MSOOXML_SPREADSHEET_CONTENT_BORDER; } -KOMSOOXML_EXPORT unsigned int MSOOXML::maximumSpreadsheetRows() +KOMSOOXML_EXPORT bool MSOOXML::byValuesSpreadsheet() { - return 0xFFFFF; + return MSOOXML_IMPORT_BY_VALUES; } diff --git a/filters/sheets/xlsx/XlsxXmlWorksheetReader.cpp b/filters/sheets/xlsx/XlsxXmlWorksheetReader.cpp --- a/filters/sheets/xlsx/XlsxXmlWorksheetReader.cpp +++ b/filters/sheets/xlsx/XlsxXmlWorksheetReader.cpp @@ -604,9 +604,23 @@ body->endElement(); // table:shapes } + // If the sheet should be reduced to dimensions just containing the actual + // content, then calculate appropriate bounds. Otherwise set the bounds + // to the maximum column and row of the original spreadsheet. + const int exportMaxRow = MSOOXML::reduceSpreadsheetDimensionsToContent() + ? min(m_context->sheet->contentMaxRow() + + MSOOXML::spreadsheetContentBorder(), + m_context->sheet->maxRow()) + : m_context->sheet->maxRow(); + const int exportMaxColumn = MSOOXML::reduceSpreadsheetDimensionsToContent() + ? min(m_context->sheet->contentMaxColumn() + + MSOOXML::spreadsheetContentBorder(), + m_context->sheet->maxColumn()) + : m_context->sheet->maxColumn(); + // now we have everything to start writing the actual cells int c = 0; - while (c <= m_context->sheet->maxColumn()) { + while (c <= exportMaxColumn) { body->startElement("table:table-column"); int repeatedColumns = 1; bool currentColumnHidden = false; @@ -626,7 +640,7 @@ currentColumnHidden = true; } ++c; - while (c <= m_context->sheet->maxColumn()) { + while (c <= exportMaxColumn) { column = m_context->sheet->column(c, false); if (column && column->hidden ) { if (currentColumnHidden) { @@ -652,9 +666,8 @@ body->endElement(); // table:table-column } - const int rowCount = m_context->sheet->maxRow(); - for(int r = 0; r <= rowCount; ++r) { - const int columnCount = m_context->sheet->maxCellsInRow(r); + for(int r = 0; r <= exportMaxRow; ++r) { + const int columnCount = min(m_context->sheet->maxCellsInRow(r), exportMaxColumn); Row* row = m_context->sheet->row(r, false); body->startElement("table:table-row"); if (row) { @@ -1065,6 +1078,7 @@ ELSE_WRONG_FORMAT } } + READ_EPILOGUE_WITHOUT_RETURN // append remaining empty columns @@ -1139,6 +1153,18 @@ if (minCol > maxCol) qSwap(minCol, maxCol); + if (minCol >= MSOOXML::maximumSpreadsheetColumns()) { + showWarningAboutWorksheetSize(); + readNext(); + // This will return + READ_EPILOGUE + } + + if (maxCol > MSOOXML::maximumSpreadsheetColumns()) { + showWarningAboutWorksheetSize(); + maxCol = MSOOXML::maximumSpreadsheetColumns(); + } + if (m_columnCount < minCol) { appendTableColumns(minCol - m_columnCount); m_columnCount = minCol; @@ -1173,10 +1199,6 @@ m_columnCount += (maxCol - minCol); - if (m_columnCount > (int)MSOOXML::maximumSpreadsheetColumns()) { - showWarningAboutWorksheetSize(); - } - readNext(); READ_EPILOGUE } @@ -1198,13 +1220,20 @@ { READ_PROLOGUE m_currentRow = 0; + while (!atEnd()) { readNext(); qCDebug(lcXlsxImport) << *this; BREAK_IF_END_OF(CURRENT_EL) if (isStartElement()) { - TRY_READ_IF(row) - ELSE_WRONG_FORMAT + if (m_currentRow < MSOOXML::maximumSpreadsheetRows()) { + TRY_READ_IF(row) + ELSE_WRONG_FORMAT + } + else { + showWarningAboutWorksheetSize(); + skipCurrentElement(); + } } } READ_EPILOGUE @@ -1269,9 +1298,6 @@ if (!ok || m_currentRow < 0) return KoFilter::WrongFormat; } - if (m_currentRow > (int)MSOOXML::maximumSpreadsheetRows()) { - showWarningAboutWorksheetSize(); - } m_currentColumn = 0; Row* row = m_context->sheet->row(m_currentRow, true); @@ -1354,6 +1380,15 @@ return KoFilter::WrongFormat; } + if ((m_currentColumn >= MSOOXML::maximumSpreadsheetColumns()) + || (m_currentRow >= MSOOXML::maximumSpreadsheetRows())) { + skipCurrentElement(); + ++m_currentColumn; // This cell is done now. Select the next cell. + + // This will return + READ_EPILOGUE + } + TRY_READ_ATTR_WITHOUT_NS(s) TRY_READ_ATTR_WITHOUT_NS(t) @@ -1387,6 +1422,7 @@ // const bool addTextPElement = true;//m_value.isEmpty() || t != QLatin1String("s"); if (!m_value.isEmpty()) { + m_context->sheet->updateContentBounds(m_currentColumn, m_currentRow); /* depending on type: 18.18.11 ST_CellType (Cell Type), p. 2679: b (Boolean) Cell containing a boolean. d (Date) Cell contains a date in the ISO 8601 format. @@ -1564,48 +1600,51 @@ // Shared formula groups. int sharedGroupIndex = -1; - if (t == QLatin1String("shared")) { + if ((!t.isEmpty()) && (t == QLatin1String("shared"))) { TRY_READ_ATTR(si) STRING_TO_INT(si, sharedGroupIndex, "f@si") } - while (!atEnd() && !hasError()) { - readNext(); - BREAK_IF_END_OF(CURRENT_EL) - if (isCharacters()) { - delete cell->formula; - cell->formula = new FormulaImpl(Calligra::Sheets::MSOOXML::convertFormula(text().toString())); + if ((sharedGroupIndex >= 0) || (!MSOOXML::byValuesSpreadsheet())) { + while (!atEnd() && !hasError()) { + readNext(); + BREAK_IF_END_OF(CURRENT_EL) + if (isCharacters()) { + delete cell->formula; + cell->formula = new FormulaImpl(Calligra::Sheets::MSOOXML::convertFormula(text().toString())); + } } - } + } else { + skipCurrentElement(); + } + + if (sharedGroupIndex >= 0) { + /* Shared Group Index, p. 1815 + Optional attribute to optimize load performance by sharing formulas. + When a formula is a shared formula (t value is shared) then this value indicates the + group to which this particular cell's formula belongs. The first formula in a group of + shared formulas is saved in the f element. This is considered the 'master' formula cell. + Subsequent cells sharing this formula need not have the formula written in their f + element. Instead, the attribute si value for a particular cell is used to figure what the + formula expression should be based on the cell's relative location to the master formula + cell. + */ - if (!t.isEmpty()) { - if (t == QLatin1String("shared")) { - if (sharedGroupIndex >= 0) { - /* Shared Group Index, p. 1815 - Optional attribute to optimize load performance by sharing formulas. - When a formula is a shared formula (t value is shared) then this value indicates the - group to which this particular cell's formula belongs. The first formula in a group of - shared formulas is saved in the f element. This is considered the 'master' formula cell. - Subsequent cells sharing this formula need not have the formula written in their f - element. Instead, the attribute si value for a particular cell is used to figure what the - formula expression should be based on the cell's relative location to the master formula - cell. - */ - if (d->sharedFormulas.contains(sharedGroupIndex)) { - if (!cell->formula /* || cell->formula->isEmpty() */) { // don't do anything if the cell already defines a formula - QHash::iterator it = d->sharedFormulas.find(sharedGroupIndex); - if (it != d->sharedFormulas.end()) { - delete cell->formula; - cell->formula = new SharedFormula(it.value()); - } - } - } else if (cell->formula /* && !cell->formula->isEmpty()*/) { // is this cell the master cell? - d->sharedFormulas[sharedGroupIndex] = cell; + if (d->sharedFormulas.contains(sharedGroupIndex)) { + if (!cell->formula /* || cell->formula->isEmpty() */) { // don't do anything if the cell already defines a formula + QHash::iterator it = d->sharedFormulas.find(sharedGroupIndex); + if (it != d->sharedFormulas.end()) { + delete cell->formula; + cell->formula = new SharedFormula(it.value()); } } + } else if (cell->formula /* && !cell->formula->isEmpty()*/) { // is this cell the master cell? + d->sharedFormulas[sharedGroupIndex] = cell; } } + m_context->sheet->updateContentBounds(m_currentColumn, m_currentRow); + /* if (!ref.isEmpty()) { const int pos = ref.indexOf(':'); diff --git a/filters/sheets/xlsx/XlsxXmlWorksheetReader_p.h b/filters/sheets/xlsx/XlsxXmlWorksheetReader_p.h --- a/filters/sheets/xlsx/XlsxXmlWorksheetReader_p.h +++ b/filters/sheets/xlsx/XlsxXmlWorksheetReader_p.h @@ -177,7 +177,7 @@ QString m_name; double m_defaultRowHeight, m_defaultColWidth, m_baseColWidth; - explicit Sheet(const QString &name) : m_name(name), m_defaultRowHeight(-1.0), m_defaultColWidth(-1.0), m_baseColWidth(-1.0), m_maxRow(0), m_maxColumn(0), m_visible(true) {} + explicit Sheet(const QString &name) : m_name(name), m_defaultRowHeight(-1.0), m_defaultColWidth(-1.0), m_baseColWidth(-1.0), m_maxRow(0), m_maxColumn(0), m_contentMaxRow(0), m_contentMaxColumn(0), m_visible(true) {} ~Sheet() { qDeleteAll(m_rows); qDeleteAll(m_columns); /*qDeleteAll(m_cells);*/ } Row* row(int rowIndex, bool autoCreate) @@ -219,9 +219,20 @@ return c; } + void updateContentBounds(int column, int row) { + if (column > m_contentMaxColumn) { + m_contentMaxColumn = column; + } + if (row > m_contentMaxRow) { + m_contentMaxRow = row; + } + } + int maxRow() const { return m_maxRow; } int maxColumn() const { return m_maxColumn; } int maxCellsInRow(int rowIndex) const { return m_maxCellsInRow[rowIndex]; } + int contentMaxRow() const { return m_contentMaxRow; } + int contentMaxColumn() const { return m_contentMaxColumn; } bool visible() const { return m_visible; } void setVisible(bool visible) { m_visible = visible; } @@ -237,6 +248,8 @@ QString m_pictureBackgroundPath; int m_maxRow; int m_maxColumn; + int m_contentMaxRow; + int m_contentMaxColumn; bool m_visible : 1; };