diff --git a/mens-core/pom.xml b/mens-core/pom.xml index c97e4d9..2055034 100644 --- a/mens-core/pom.xml +++ b/mens-core/pom.xml @@ -374,6 +374,28 @@ test + + + org.apache.poi + poi + ${apache.poi.version} + + + org.apache.poi + poi-ooxml + ${apache.poi.version} + + + org.apache.poi + poi-scratchpad + ${apache.poi.version} + + + xml-apis + xml-apis + 1.4.01 + + diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/StreamingReader.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/StreamingReader.java new file mode 100644 index 0000000..73e4e41 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/StreamingReader.java @@ -0,0 +1,402 @@ +package kr.xit.core.support.xlsx; + +import static kr.xit.core.support.xlsx.XmlUtils.document; +import static kr.xit.core.support.xlsx.XmlUtils.searchForNodeList; +import static kr.xit.core.support.xlsx.impl.TempFileUtil.writeInputStreamToFile; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.security.GeneralSecurityException; +import java.util.Iterator; +import java.util.Objects; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLStreamException; +import kr.xit.core.support.xlsx.exceptions.CloseException; +import kr.xit.core.support.xlsx.exceptions.MissingSheetException; +import kr.xit.core.support.xlsx.exceptions.OpenException; +import kr.xit.core.support.xlsx.exceptions.ReadException; +import kr.xit.core.support.xlsx.impl.StreamingSheetReader; +import kr.xit.core.support.xlsx.impl.StreamingWorkbook; +import kr.xit.core.support.xlsx.impl.StreamingWorkbookReader; +import kr.xit.core.support.xlsx.sst.BufferedStringsTable; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.util.StaxHelper; +import org.apache.poi.xssf.eventusermodel.XSSFReader; +import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.model.StylesTable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Streaming Excel workbook implementation. Most advanced features of POI are not supported. Use + * this only if your application can handle iterating through an entire workbook, row by row. + */ +public class StreamingReader implements Iterable, AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(StreamingReader.class); + + private File tmp; + private final StreamingWorkbookReader workbook; + + public StreamingReader(StreamingWorkbookReader workbook) { + this.workbook = workbook; + } + + /** + * Returns a new streaming iterator to loop through rows. This iterator is not guaranteed to + * have all rows in memory, and any particular iteration may trigger a load from disk to read in + * new data. + * + * @return the streaming iterator + * @deprecated StreamingReader is equivalent to the POI Workbook object rather than the Sheet + * object. This method will be removed in a future release. + */ + @Override + public Iterator iterator() { + return workbook.first().iterator(); + } + + /** + * Closes the streaming resource, attempting to clean up any temporary files created. + * + * @throws CloseException if there is an issue closing the stream + */ + @Override + public void close() throws IOException { + try { + workbook.close(); + } finally { + if (tmp != null) { + if (log.isDebugEnabled()) { + log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]"); + } + tmp.delete(); + } + } + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private int rowCacheSize = 10; + private int bufferSize = 1024; + private int sheetIndex = 0; + private int sstCacheSizeBytes = -1; + private String sheetName; + private String password; + + public int getRowCacheSize() { + return rowCacheSize; + } + + public int getBufferSize() { + return bufferSize; + } + + /** + * @return The sheet index + * @deprecated This method will be removed in a future release. + */ + public int getSheetIndex() { + return sheetIndex; + } + + /** + * @return The sheet name + * @deprecated This method will be removed in a future release. + */ + public String getSheetName() { + return sheetName; + } + + /** + * @return The password to use to unlock this workbook + */ + public String getPassword() { + return password; + } + + /** + * @return The size of the shared string table cache. If less than 0, no cache will be used + * and the entire table will be loaded into memory. + */ + public int getSstCacheSizeBytes() { + return sstCacheSizeBytes; + } + + /** + * The number of rows to keep in memory at any given point. + *

+ * Defaults to 10 + *

+ * + * @param rowCacheSize number of rows + * @return reference to current {@code Builder} + */ + public Builder rowCacheSize(int rowCacheSize) { + this.rowCacheSize = rowCacheSize; + return this; + } + + /** + * The number of bytes to read into memory from the input resource. + *

+ * Defaults to 1024 + *

+ * + * @param bufferSize buffer size in bytes + * @return reference to current {@code Builder} + */ + public Builder bufferSize(int bufferSize) { + this.bufferSize = bufferSize; + return this; + } + + /** + * Which sheet to open. There can only be one sheet open for a single instance of + * {@code StreamingReader}. If more sheets need to be read, a new instance must be created. + *

+ * Defaults to 0 + *

+ * + * @param sheetIndex index of sheet + * @return reference to current {@code Builder} + * @deprecated This method will be removed in a future release. Use + * {@link StreamingWorkbook#getSheetAt(int)} instead. + */ + public Builder sheetIndex(int sheetIndex) { + this.sheetIndex = sheetIndex; + return this; + } + + /** + * Which sheet to open. There can only be one sheet open for a single instance of + * {@code StreamingReader}. If more sheets need to be read, a new instance must be created. + * + * @param sheetName name of sheet + * @return reference to current {@code Builder} + * @deprecated This method will be removed in a future release. Use + * {@link StreamingWorkbook#getSheet(String)} instead. + */ + public Builder sheetName(String sheetName) { + this.sheetName = sheetName; + return this; + } + + /** + * For password protected files specify password to open file. If the password is incorrect + * a {@code ReadException} is thrown on {@code read}. + *

NULL indicates that no password should be used, this is the + * default value.

+ * + * @param password to use when opening file + * @return reference to current {@code Builder} + */ + public Builder password(String password) { + this.password = password; + return this; + } + + /** + *

!!! This option is experimental !!!

+ *

+ * Set the size of the Shared Strings Table cache. This option exists to accommodate + * extremely large workbooks with millions of unique strings. Normally the SST is entirely + * loaded into memory, but with large workbooks with high cardinality (i.e., very few + * duplicate values) the SST may not fit entirely into memory. + *

+ * By default, the entire SST *will* be loaded into memory. Setting a value greater than 0 + * for this option will only cache up to this many entries in memory. + * However, enabling this option at all will have some noticeable + * performance degredation as you are trading memory for disk space. + * + * @param sstCacheSizeBytes size of SST cache + * @return reference to current {@code Builder} + */ + public Builder sstCacheSizeBytes(int sstCacheSizeBytes) { + this.sstCacheSizeBytes = sstCacheSizeBytes; + return this; + } + + /** + * Reads a given {@code InputStream} and returns a new instance of {@code Workbook}. Due to + * Apache POI limitations, a temporary file must be written in order to create a streaming + * iterator. This process will use the same buffer size as specified in + * {@link #bufferSize(int)}. + * + * @param is input stream to read in + * @return A {@link Workbook} that can be read from + * @throws ReadException if there is an issue reading the stream + */ + public Workbook open(InputStream is) { + StreamingWorkbookReader workbook = new StreamingWorkbookReader(this); + workbook.init(is); + return new StreamingWorkbook(workbook); + } + + /** + * Reads a given {@code File} and returns a new instance of {@code Workbook}. + * + * @param file file to read in + * @return built streaming reader instance + * @throws OpenException if there is an issue opening the file + * @throws ReadException if there is an issue reading the file + */ + public Workbook open(File file) { + StreamingWorkbookReader workbook = new StreamingWorkbookReader(this); + workbook.init(file); + return new StreamingWorkbook(workbook); + } + + /** + * Reads a given {@code InputStream} and returns a new instance of {@code StreamingReader}. + * Due to Apache POI limitations, a temporary file must be written in order to create a + * streaming iterator. This process will use the same buffer size as specified in + * {@link #bufferSize(int)}. + * + * @param is input stream to read in + * @return built streaming reader instance + * @throws ReadException if there is an issue reading the stream + * @deprecated This method will be removed in a future release. Use + * {@link Builder#open(InputStream)} instead + */ + public StreamingReader read(InputStream is) { + File f = null; + try { + f = writeInputStreamToFile(is, bufferSize); + log.debug("Created temp file [" + f.getAbsolutePath() + "]"); + + StreamingReader r = read(f); + r.tmp = f; + return r; + } catch (IOException e) { + throw new ReadException("Unable to read input stream", e); + } catch (RuntimeException e) { + if (f != null) { + f.delete(); + } + throw e; + } + } + + /** + * 요청한 {@code File}을 {@code StreamingReader} 인스턴스로 return. + * + * @param f file to read in + * @return built streaming reader instance + * @throws OpenException if there is an issue opening the file + * @throws ReadException if there is an issue reading the file + * @deprecated This method will be removed in a future release. Use + * {@link Builder#open(File)} instead + */ + public StreamingReader read(File f) { + try { + OPCPackage pkg; + if (password != null) { + // Based on: https://poi.apache.org/encryption.html + POIFSFileSystem poifs = new POIFSFileSystem(f); + EncryptionInfo info = new EncryptionInfo(poifs); + Decryptor d = Decryptor.getInstance(info); + d.verifyPassword(password); + pkg = OPCPackage.open(d.getDataStream(poifs)); + } else { + pkg = OPCPackage.open(f); + } + + boolean use1904Dates = false; + XSSFReader reader = new XSSFReader(pkg); + + SharedStringsTable sst; + File sstCache = null; + if (sstCacheSizeBytes > 0) { + sstCache = Files.createTempFile("", "").toFile(); + log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]"); + sst = BufferedStringsTable.getSharedStringsTable(sstCache, sstCacheSizeBytes, + pkg); + } else { + sst = reader.getSharedStringsTable(); + } + + StylesTable styles = reader.getStylesTable(); + NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), + "/ss:workbook/ss:workbookPr"); + if (workbookPr.getLength() == 1) { + final Node date1904 = workbookPr.item(0).getAttributes() + .getNamedItem("date1904"); + if (date1904 != null) { + use1904Dates = ("1".equals(date1904.getTextContent())); + } + } + InputStream sheet = findSheet(reader); + if (sheet == null) { + throw new MissingSheetException( + "Unable to find sheet at index [" + sheetIndex + "]"); + } + + XMLEventReader parser = StaxHelper.newXMLInputFactory().createXMLEventReader(sheet); + + return new StreamingReader(new StreamingWorkbookReader(sst, sstCache, pkg, + new StreamingSheetReader(sst, styles, parser, use1904Dates, rowCacheSize), + this)); + } catch (IOException e) { + throw new OpenException("Failed to open file", e); + } catch (OpenXML4JException | XMLStreamException e) { + throw new ReadException("Unable to read workbook", e); + } catch (GeneralSecurityException e) { + throw new ReadException("Unable to read workbook - Decryption failed", e); + } + } + + /** + * @deprecated This will be removed when the transition to the 1.x API is complete + */ + private InputStream findSheet(XSSFReader reader) + throws IOException, InvalidFormatException { + int index = sheetIndex; + if (sheetName != null) { + index = -1; + //This file is separate from the worksheet data, and should be fairly small + NodeList nl = searchForNodeList(document(reader.getWorkbookData()), + "/ss:workbook/ss:sheets/ss:sheet"); + for (int i = 0; i < nl.getLength(); i++) { + if (Objects.equals( + nl.item(i).getAttributes().getNamedItem("name").getTextContent(), + sheetName)) { + index = i; + } + } + if (index < 0) { + return null; + } + } + Iterator iter = reader.getSheetsData(); + InputStream sheet = null; + + int i = 0; + while (iter.hasNext()) { + InputStream is = iter.next(); + if (i++ == index) { + sheet = is; + log.debug("Found sheet at index [" + sheetIndex + "]"); + break; + } + } + return sheet; + } + } + +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/XmlUtils.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/XmlUtils.java new file mode 100644 index 0000000..8511ef4 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/XmlUtils.java @@ -0,0 +1,94 @@ +package kr.xit.core.support.xlsx; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import kr.xit.core.support.xlsx.exceptions.ParseException; +import org.apache.poi.ooxml.util.DocumentHelper; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class XmlUtils { + + public static Document document(InputStream is) { + try { + return DocumentHelper.readDocument(is); + } catch (SAXException | IOException e) { + throw new ParseException(e); + } + } + + public static NodeList searchForNodeList(Document document, String xpath) { + try { + XPath xp = XPathFactory.newInstance().newXPath(); + NamespaceContextImpl nc = new NamespaceContextImpl(); + nc.addNamespace("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); + xp.setNamespaceContext(nc); + return (NodeList) xp.compile(xpath) + .evaluate(document, XPathConstants.NODESET); + } catch (XPathExpressionException e) { + throw new ParseException(e); + } + } + + private static class NamespaceContextImpl implements NamespaceContext { + + private Map urisByPrefix = new HashMap<>(); + + private Map prefixesByURI = new HashMap<>(); + + public NamespaceContextImpl() { + addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI); + addNamespace(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI); + } + + public void addNamespace(String prefix, String namespaceURI) { + urisByPrefix.put(prefix, namespaceURI); + if (prefixesByURI.containsKey(namespaceURI)) { + (prefixesByURI.get(namespaceURI)).add(prefix); + } else { + Set set = new HashSet<>(); + set.add(prefix); + prefixesByURI.put(namespaceURI, set); + } + } + + public String getNamespaceURI(String prefix) { + if (prefix == null) { + throw new IllegalArgumentException("prefix cannot be null"); + } + if (urisByPrefix.containsKey(prefix)) { + return (String) urisByPrefix.get(prefix); + } else { + return XMLConstants.NULL_NS_URI; + } + } + + public String getPrefix(String namespaceURI) { + return (String) getPrefixes(namespaceURI).next(); + } + + public Iterator getPrefixes(String namespaceURI) { + if (namespaceURI == null) { + throw new IllegalArgumentException("namespaceURI cannot be null"); + } + if (prefixesByURI.containsKey(namespaceURI)) { + return ((Set) prefixesByURI.get(namespaceURI)).iterator(); + } else { + return Collections.EMPTY_SET.iterator(); + } + } + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/CloseException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/CloseException.java new file mode 100644 index 0000000..a5ad074 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/CloseException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class CloseException extends RuntimeException { + + public CloseException() { + super(); + } + + public CloseException(String msg) { + super(msg); + } + + public CloseException(Exception e) { + super(e); + } + + public CloseException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/MissingSheetException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/MissingSheetException.java new file mode 100644 index 0000000..4f29625 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/MissingSheetException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class MissingSheetException extends RuntimeException { + + public MissingSheetException() { + super(); + } + + public MissingSheetException(String msg) { + super(msg); + } + + public MissingSheetException(Exception e) { + super(e); + } + + public MissingSheetException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/NotSupportedException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/NotSupportedException.java new file mode 100644 index 0000000..da9cf01 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/NotSupportedException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class NotSupportedException extends RuntimeException { + + public NotSupportedException() { + super(); + } + + public NotSupportedException(String msg) { + super(msg); + } + + public NotSupportedException(Exception e) { + super(e); + } + + public NotSupportedException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/OpenException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/OpenException.java new file mode 100644 index 0000000..28f560d --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/OpenException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class OpenException extends RuntimeException { + + public OpenException() { + super(); + } + + public OpenException(String msg) { + super(msg); + } + + public OpenException(Exception e) { + super(e); + } + + public OpenException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ParseException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ParseException.java new file mode 100644 index 0000000..3b59a5d --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ParseException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class ParseException extends RuntimeException { + + public ParseException() { + super(); + } + + public ParseException(String msg) { + super(msg); + } + + public ParseException(Exception e) { + super(e); + } + + public ParseException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ReadException.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ReadException.java new file mode 100644 index 0000000..8d0d556 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/exceptions/ReadException.java @@ -0,0 +1,20 @@ +package kr.xit.core.support.xlsx.exceptions; + +public class ReadException extends RuntimeException { + + public ReadException() { + super(); + } + + public ReadException(String msg) { + super(msg); + } + + public ReadException(Exception e) { + super(e); + } + + public ReadException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingCell.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingCell.java new file mode 100644 index 0000000..2c37c31 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingCell.java @@ -0,0 +1,536 @@ +package kr.xit.core.support.xlsx.impl; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Calendar; +import java.util.Date; +import kr.xit.core.support.xlsx.exceptions.NotSupportedException; +import org.apache.poi.ss.formula.FormulaParseException; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Comment; +import org.apache.poi.ss.usermodel.DateUtil; +import org.apache.poi.ss.usermodel.Hyperlink; +import org.apache.poi.ss.usermodel.RichTextString; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; + +public class StreamingCell implements Cell { + + private static final Supplier NULL_SUPPLIER = () -> null; + private static final String FALSE_AS_STRING = "0"; + private static final String TRUE_AS_STRING = "1"; + + private final Sheet sheet; + private int columnIndex; + private int rowIndex; + private final boolean use1904Dates; + + private Supplier contentsSupplier = NULL_SUPPLIER; + private Object rawContents; + private String formula; + private String numericFormat; + private Short numericFormatIndex; + private String type; + private CellStyle cellStyle; + private Row row; + private boolean formulaType; + + public StreamingCell(Sheet sheet, int columnIndex, int rowIndex, boolean use1904Dates) { + this.sheet = sheet; + this.columnIndex = columnIndex; + this.rowIndex = rowIndex; + this.use1904Dates = use1904Dates; + } + + public void setContentSupplier(Supplier contentsSupplier) { + this.contentsSupplier = contentsSupplier; + } + + public void setRawContents(Object rawContents) { + this.rawContents = rawContents; + } + + public String getNumericFormat() { + return numericFormat; + } + + public void setNumericFormat(String numericFormat) { + this.numericFormat = numericFormat; + } + + public Short getNumericFormatIndex() { + return numericFormatIndex; + } + + public void setNumericFormatIndex(Short numericFormatIndex) { + this.numericFormatIndex = numericFormatIndex; + } + + public void setFormula(String formula) { + this.formula = formula; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public boolean isFormulaType() { + return formulaType; + } + + public void setFormulaType(boolean formulaType) { + this.formulaType = formulaType; + } + + @Override + public void setCellStyle(CellStyle cellStyle) { + this.cellStyle = cellStyle; + } + + /* Supported */ + + /** + * Returns column index of this cell + * + * @return zero-based column index of a column in a sheet. + */ + @Override + public int getColumnIndex() { + return columnIndex; + } + + /** + * Returns row index of a row in the sheet that contains this cell + * + * @return zero-based row index of a row in the sheet that contains this cell + */ + @Override + public int getRowIndex() { + return rowIndex; + } + + /** + * Returns the Row this cell belongs to. Note that keeping references to cell rows around after + * the iterator window has passed will preserve them. + * + * @return the Row that owns this cell + */ + @Override + public Row getRow() { + return row; + } + + /** + * Sets the Row this cell belongs to. Note that keeping references to cell rows around after the + * iterator window has passed will preserve them. + *

+ * The row is not automatically set. + * + * @param row The row + */ + public void setRow(Row row) { + this.row = row; + } + + + /** + * Return the cell type. + * + * @return the cell type + */ + @Override + public CellType getCellType() { + if (formulaType) { + return CellType.FORMULA; + } else if (contentsSupplier.getContent() == null || type == null) { + return CellType.BLANK; + } else if ("n".equals(type)) { + return CellType.NUMERIC; + } else if ("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) { + return CellType.STRING; + } else if ("str".equals(type)) { + return CellType.FORMULA; + } else if ("b".equals(type)) { + return CellType.BOOLEAN; + } else if ("e".equals(type)) { + return CellType.ERROR; + } else { + throw new UnsupportedOperationException("Unsupported cell type '" + type + "'"); + } + } + + /** + * Get the value of the cell as a string. For blank cells we return an empty string. + * + * @return the value of the cell as a string + */ + @Override + public String getStringCellValue() { + Object c = contentsSupplier.getContent(); + + return c == null ? "" : c.toString(); + } + + /** + * Get the value of the cell as a number. For strings we throw an exception. For blank cells we + * return a 0. + * + * @return the value of the cell as a number + * @throws NumberFormatException if the cell value isn't a parsable double. + */ + @Override + public double getNumericCellValue() { + return rawContents == null ? 0.0 : Double.parseDouble((String) rawContents); + } + + /** + * Get the value of the cell as a date. For strings we throw an exception. For blank cells we + * return a null. + * + * @return the value of the cell as a date + * @throws IllegalStateException if the cell type returned by {@link #getCellType()} is + * CELL_TYPE_STRING + * @throws NumberFormatException if the cell value isn't a parsable double. + */ + @Override + public Date getDateCellValue() { + if (getCellType() == CellType.STRING) { + throw new IllegalStateException("Cell type cannot be CELL_TYPE_STRING"); + } + return rawContents == null ? null + : DateUtil.getJavaDate(getNumericCellValue(), use1904Dates); + } + + @Override + public LocalDateTime getLocalDateTimeCellValue() { + return LocalDateTime.ofInstant(Instant.ofEpochMilli(getDateCellValue().getTime()), + ZoneOffset.systemDefault()); + } + + /** + * Get the value of the cell as a boolean. For strings we throw an exception. For blank cells we + * return a false. + * + * @return the value of the cell as a date + */ + @Override + public boolean getBooleanCellValue() { + CellType cellType = getCellType(); + switch (cellType) { + case BLANK: + return false; + case BOOLEAN: + return rawContents != null && TRUE_AS_STRING.equals(rawContents); + case FORMULA: + throw new NotSupportedException(); + default: + throw typeMismatch(CellType.BOOLEAN, cellType, false); + } + } + + /** + * Get the value of the cell as a XSSFRichTextString + *

+ * For numeric cells we throw an exception. For blank cells we return an empty string. For + * formula cells we return the pre-calculated value if a string, otherwise an exception + *

+ * + * @return the value of the cell as a XSSFRichTextString + */ + @Override + public XSSFRichTextString getRichStringCellValue() { + CellType cellType = getCellType(); + XSSFRichTextString rt; + switch (cellType) { + case BLANK: + rt = new XSSFRichTextString(""); + break; + case STRING: + rt = new XSSFRichTextString(getStringCellValue()); + break; + default: + throw new NotSupportedException(); + } + return rt; + } + + @Override + public Sheet getSheet() { + return sheet; + } + + private static RuntimeException typeMismatch(CellType expectedType, CellType actualType, + boolean isFormulaCell) { + String msg = "Cannot get a " + + getCellTypeName(expectedType) + " value from a " + + getCellTypeName(actualType) + " " + (isFormulaCell ? "formula " : "") + "cell"; + return new IllegalStateException(msg); + } + + /** + * Used to help format error messages + */ + private static String getCellTypeName(CellType cellType) { + switch (cellType) { + case BLANK: + return "blank"; + case STRING: + return "text"; + case BOOLEAN: + return "boolean"; + case ERROR: + return "error"; + case NUMERIC: + return "numeric"; + case FORMULA: + return "formula"; + } + return "#unknown cell type (" + cellType + ")#"; + } + + /** + * @return the style of the cell + */ + @Override + public CellStyle getCellStyle() { + return this.cellStyle; + } + + /** + * Return a formula for the cell, for example, SUM(C4:E4) + * + * @return a formula for the cell + * @throws IllegalStateException if the cell type returned by {@link #getCellType()} is not + * CELL_TYPE_FORMULA + */ + @Override + public String getCellFormula() { + if (!formulaType) { + throw new IllegalStateException("This cell does not have a formula"); + } + return formula; + } + + /** + * Only valid for formula cells + * + * @return one of ({@link CellType#NUMERIC}, {@link CellType#STRING}, {@link CellType#BOOLEAN}, + * {@link CellType#ERROR}) depending on the cached value of the formula + */ + @Override + public CellType getCachedFormulaResultType() { + if (formulaType) { + if (contentsSupplier.getContent() == null || type == null) { + return CellType.BLANK; + } else if ("n".equals(type)) { + return CellType.NUMERIC; + } else if ("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) { + return CellType.STRING; + } else if ("b".equals(type)) { + return CellType.BOOLEAN; + } else if ("e".equals(type)) { + return CellType.ERROR; + } else { + throw new UnsupportedOperationException("Unsupported cell type '" + type + "'"); + } + } else { + throw new IllegalStateException("Only formula cells have cached results"); + } + } + + /* Not supported */ + + /** + * Not supported + */ + @Override + public void setCellType(CellType cellType) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(double value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(Date value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(LocalDateTime value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(Calendar value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(RichTextString value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(String value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellFormula(String formula) throws FormulaParseException { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellValue(boolean value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellErrorValue(byte value) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public byte getErrorCellValue() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setAsActiveCell() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public CellAddress getAddress() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setCellComment(Comment comment) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public Comment getCellComment() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void removeCellComment() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public Hyperlink getHyperlink() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setHyperlink(Hyperlink link) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void removeHyperlink() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public CellRangeAddress getArrayFormulaRange() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public boolean isPartOfArrayFormulaGroup() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setBlank() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void removeFormula() throws IllegalStateException { + throw new NotSupportedException(); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingRow.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingRow.java new file mode 100644 index 0000000..980b128 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingRow.java @@ -0,0 +1,263 @@ +package kr.xit.core.support.xlsx.impl; + +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import kr.xit.core.support.xlsx.exceptions.NotSupportedException; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; + +public class StreamingRow implements Row { + + private final Sheet sheet; + private int rowIndex; + private boolean isHidden; + private TreeMap cellMap = new TreeMap<>(); + + public StreamingRow(Sheet sheet, int rowIndex, boolean isHidden) { + this.sheet = sheet; + this.rowIndex = rowIndex; + this.isHidden = isHidden; + } + + @Override + public Sheet getSheet() { + return sheet; + } + + public Map getCellMap() { + return cellMap; + } + + public void setCellMap(TreeMap cellMap) { + this.cellMap = cellMap; + } + + /* Supported */ + + /** + * Get row number this row represents + * + * @return the row number (0 based) + */ + @Override + public int getRowNum() { + return rowIndex; + } + + /** + * @return Cell iterator of the physically defined cells for this row. + */ + @Override + public Iterator cellIterator() { + return cellMap.values().iterator(); + } + + /** + * @return Cell iterator of the physically defined cells for this row. + */ + @Override + public Iterator iterator() { + return cellMap.values().iterator(); + } + + /** + * Get the cell representing a given column (logical cell) 0-based. If you ask for a cell that + * is not defined, you get a null. + * + * @param cellnum 0 based column number + * @return Cell representing that column or null if undefined. + */ + @Override + public Cell getCell(int cellnum) { + return cellMap.get(cellnum); + } + + /** + * Gets the index of the last cell contained in this row PLUS ONE. + * + * @return short representing the last logical cell in the row PLUS ONE, or -1 if the row + * does not contain any cells. + */ + @Override + public short getLastCellNum() { + return (short) (cellMap.size() == 0 ? -1 + : cellMap.lastEntry().getValue().getColumnIndex() + 1); + } + + /** + * Get whether or not to display this row with 0 height + * + * @return - zHeight height is zero or not. + */ + @Override + public boolean getZeroHeight() { + return isHidden; + } + + /** + * Gets the number of defined cells (NOT number of cells in the actual row!). That is to say if + * only columns 0,4,5 have values then there would be 3. + * + * @return int representing the number of defined cells in the row. + */ + @Override + public int getPhysicalNumberOfCells() { + return cellMap.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public short getFirstCellNum() { + if (cellMap.size() == 0) { + return -1; + } + return cellMap.firstKey().shortValue(); + } + + /** + * {@inheritDoc} + */ + @Override + public Cell getCell(int cellnum, MissingCellPolicy policy) { + StreamingCell cell = (StreamingCell) cellMap.get(cellnum); + if (policy == MissingCellPolicy.CREATE_NULL_AS_BLANK) { + if (cell == null) { + return new StreamingCell(sheet, cellnum, rowIndex, false); + } + } else if (policy == MissingCellPolicy.RETURN_BLANK_AS_NULL) { + if (cell == null || cell.getCellType() == CellType.BLANK) { + return null; + } + } + return cell; + } + + /* Not supported */ + + /** + * Not supported + */ + @Override + public Cell createCell(int column) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public Cell createCell(int i, CellType cellType) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void removeCell(Cell cell) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setRowNum(int rowNum) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setHeight(short height) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setZeroHeight(boolean zHeight) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setHeightInPoints(float height) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public short getHeight() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public float getHeightInPoints() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public boolean isFormatted() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public CellStyle getRowStyle() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void setRowStyle(CellStyle style) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public int getOutlineLevel() { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void shiftCellsRight(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) { + throw new NotSupportedException(); + } + + /** + * Not supported + */ + @Override + public void shiftCellsLeft(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) { + throw new NotSupportedException(); + } + +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheet.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheet.java new file mode 100644 index 0000000..c7c3415 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheet.java @@ -0,0 +1,1004 @@ +package kr.xit.core.support.xlsx.impl; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.poi.ss.usermodel.AutoFilter; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellRange; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.Comment; +import org.apache.poi.ss.usermodel.DataValidation; +import org.apache.poi.ss.usermodel.DataValidationHelper; +import org.apache.poi.ss.usermodel.Drawing; +import org.apache.poi.ss.usermodel.Footer; +import org.apache.poi.ss.usermodel.Header; +import org.apache.poi.ss.usermodel.Hyperlink; +import org.apache.poi.ss.usermodel.PrintSetup; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.SheetConditionalFormatting; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.ss.util.PaneInformation; + +public class StreamingSheet implements Sheet { + + private final String name; + private final StreamingSheetReader reader; + + public StreamingSheet(String name, StreamingSheetReader reader) { + this.name = name; + this.reader = reader; + reader.setSheet(this); + } + + StreamingSheetReader getReader() { + return reader; + } + + /* Supported */ + + /** + * {@inheritDoc} + */ + @Override + public Iterator iterator() { + return reader.iterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public Iterator rowIterator() { + return reader.iterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public String getSheetName() { + return name; + } + + /** + * Get the hidden state for a given column + * + * @param columnIndex - the column to set (0-based) + * @return hidden - false if the column is visible + */ + @Override + public boolean isColumnHidden(int columnIndex) { + return reader.isColumnHidden(columnIndex); + } + + /* Unsupported */ + + /** + * Not supported + */ + @Override + public Row createRow(int rownum) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeRow(Row row) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Row getRow(int rownum) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getPhysicalNumberOfRows() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getFirstRowNum() { + throw new UnsupportedOperationException(); + } + + /** + * Gets the last row on the sheet + * + * @return last row contained n this sheet (0-based) + */ + @Override + public int getLastRowNum() { + return reader.getLastRowNum(); + } + + /** + * Not supported + */ + @Override + public void setColumnHidden(int columnIndex, boolean hidden) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRightToLeft(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isRightToLeft() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setColumnWidth(int columnIndex, int width) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getColumnWidth(int columnIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public float getColumnWidthInPixels(int columnIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDefaultColumnWidth(int width) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getDefaultColumnWidth() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public short getDefaultRowHeight() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public float getDefaultRowHeightInPoints() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDefaultRowHeight(short height) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDefaultRowHeightInPoints(float height) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellStyle getColumnStyle(int column) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int addMergedRegion(CellRangeAddress region) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int addMergedRegionUnsafe(CellRangeAddress cellRangeAddress) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void validateMergedRegions() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setVerticallyCenter(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setHorizontallyCenter(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getHorizontallyCenter() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getVerticallyCenter() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeMergedRegion(int index) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeMergedRegions(Collection collection) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getNumMergedRegions() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellRangeAddress getMergedRegion(int index) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getMergedRegions() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setForceFormulaRecalculation(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getForceFormulaRecalculation() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setAutobreaks(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDisplayGuts(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDisplayZeros(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isDisplayZeros() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setFitToPage(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRowSumsBelow(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRowSumsRight(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getAutobreaks() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getDisplayGuts() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getFitToPage() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getRowSumsBelow() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getRowSumsRight() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isPrintGridlines() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setPrintGridlines(boolean show) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isPrintRowAndColumnHeadings() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setPrintRowAndColumnHeadings(boolean b) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public PrintSetup getPrintSetup() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Header getHeader() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Footer getFooter() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSelected(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public double getMargin(short margin) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setMargin(short margin, double size) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getProtect() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void protectSheet(String password) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getScenarioProtect() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setZoom(int i) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public short getTopRow() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public short getLeftCol() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void showInPane(int toprow, int leftcol) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void shiftRows(int startRow, int endRow, int n) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void shiftRows(int startRow, int endRow, int n, boolean copyRowHeight, + boolean resetOriginalRowHeight) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void shiftColumns(int startColumn, int endColumn, final int n) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void createFreezePane(int colSplit, int rowSplit, int leftmostColumn, int topRow) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void createFreezePane(int colSplit, int rowSplit) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void createSplitPane(int xSplitPos, int ySplitPos, int leftmostColumn, int topRow, + int activePane) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public PaneInformation getPaneInformation() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDisplayGridlines(boolean show) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isDisplayGridlines() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDisplayFormulas(boolean show) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isDisplayFormulas() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDisplayRowColHeadings(boolean show) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isDisplayRowColHeadings() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRowBreak(int row) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isRowBroken(int row) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeRowBreak(int row) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int[] getRowBreaks() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int[] getColumnBreaks() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setColumnBreak(int column) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isColumnBroken(int column) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeColumnBreak(int column) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setColumnGroupCollapsed(int columnNumber, boolean collapsed) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void groupColumn(int fromColumn, int toColumn) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void ungroupColumn(int fromColumn, int toColumn) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void groupRow(int fromRow, int toRow) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void ungroupRow(int fromRow, int toRow) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRowGroupCollapsed(int row, boolean collapse) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setDefaultColumnStyle(int column, CellStyle style) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void autoSizeColumn(int column) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void autoSizeColumn(int column, boolean useMergedCells) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Comment getCellComment(CellAddress cellAddress) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Map getCellComments() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Drawing getDrawingPatriarch() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Drawing createDrawingPatriarch() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Workbook getWorkbook() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isSelected() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellRange setArrayFormula(String formula, CellRangeAddress range) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellRange removeArrayFormula(Cell cell) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public DataValidationHelper getDataValidationHelper() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getDataValidations() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void addValidationData(DataValidation dataValidation) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public AutoFilter setAutoFilter(CellRangeAddress range) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public SheetConditionalFormatting getSheetConditionalFormatting() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellRangeAddress getRepeatingRows() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellRangeAddress getRepeatingColumns() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRepeatingRows(CellRangeAddress rowRangeRef) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setRepeatingColumns(CellRangeAddress columnRangeRef) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getColumnOutlineLevel(int columnIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Hyperlink getHyperlink(int i, int i1) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Hyperlink getHyperlink(CellAddress cellAddress) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getHyperlinkList() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellAddress getActiveCell() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setActiveCell(CellAddress cellAddress) { + throw new UnsupportedOperationException(); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheetReader.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheetReader.java new file mode 100644 index 0000000..42435cc --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingSheetReader.java @@ -0,0 +1,434 @@ +package kr.xit.core.support.xlsx.impl; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import javax.xml.namespace.QName; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.Attribute; +import javax.xml.stream.events.Characters; +import javax.xml.stream.events.EndElement; +import javax.xml.stream.events.StartElement; +import javax.xml.stream.events.XMLEvent; +import kr.xit.core.support.xlsx.exceptions.CloseException; +import kr.xit.core.support.xlsx.exceptions.ParseException; +import org.apache.poi.ss.usermodel.BuiltinFormats; +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.util.CellReference; +import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.model.StylesTable; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StreamingSheetReader implements Iterable { + + private static final Logger log = LoggerFactory.getLogger(StreamingSheetReader.class); + + private final SharedStringsTable sst; + private final StylesTable stylesTable; + private final XMLEventReader parser; + private final DataFormatter dataFormatter = new DataFormatter(); + private final Set hiddenColumns = new HashSet<>(); + + private int lastRowNum; + private int currentRowNum; + private int firstColNum = 0; + private int currentColNum; + private int rowCacheSize; + private List rowCache = new ArrayList<>(); + private Iterator rowCacheIterator; + + private String lastContents; + private Sheet sheet; + private StreamingRow currentRow; + private StreamingCell currentCell; + private boolean use1904Dates; + + public StreamingSheetReader(SharedStringsTable sst, StylesTable stylesTable, + XMLEventReader parser, + final boolean use1904Dates, int rowCacheSize) { + this.sst = sst; + this.stylesTable = stylesTable; + this.parser = parser; + this.use1904Dates = use1904Dates; + this.rowCacheSize = rowCacheSize; + } + + void setSheet(StreamingSheet sheet) { + this.sheet = sheet; + } + + /** + * Read through a number of rows equal to the rowCacheSize field or until there is no more data + * to read + * + * @return true if data was read + */ + private boolean getRow() { + try { + rowCache.clear(); + while (rowCache.size() < rowCacheSize && parser.hasNext()) { + handleEvent(parser.nextEvent()); + } + rowCacheIterator = rowCache.iterator(); + return rowCacheIterator.hasNext(); + } catch (XMLStreamException e) { + throw new ParseException("Error reading XML stream", e); + } + } + + private String[] splitCellRef(String ref) { + int splitPos = -1; + + // start at pos 1, since the first char is expected to always be a letter + for (int i = 1; i < ref.length(); i++) { + char c = ref.charAt(i); + + if (c >= '0' && c <= '9') { + splitPos = i; + break; + } + } + + return new String[]{ + ref.substring(0, splitPos), + ref.substring(splitPos) + }; + } + + /** + * Handles a SAX event. + * + * @param event + */ + private void handleEvent(XMLEvent event) { + if (event.getEventType() == XMLStreamConstants.CHARACTERS) { + Characters c = event.asCharacters(); + lastContents += c.getData(); + } else if (event.getEventType() == XMLStreamConstants.START_ELEMENT + && isSpreadsheetTag(event.asStartElement().getName())) { + StartElement startElement = event.asStartElement(); + String tagLocalName = startElement.getName().getLocalPart(); + + if ("row".equals(tagLocalName)) { + Attribute rowNumAttr = startElement.getAttributeByName(new QName("r")); + int rowIndex = currentRowNum; + if (rowNumAttr != null) { + rowIndex = Integer.parseInt(rowNumAttr.getValue()) - 1; + currentRowNum = rowIndex; + } + Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden")); + boolean isHidden = + isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals( + isHiddenAttr.getValue())); + currentRow = new StreamingRow(sheet, rowIndex, isHidden); + currentColNum = firstColNum; + } else if ("col".equals(tagLocalName)) { + Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden")); + boolean isHidden = + isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals( + isHiddenAttr.getValue())); + if (isHidden) { + Attribute minAttr = startElement.getAttributeByName(new QName("min")); + Attribute maxAttr = startElement.getAttributeByName(new QName("max")); + int min = Integer.parseInt(minAttr.getValue()) - 1; + int max = Integer.parseInt(maxAttr.getValue()) - 1; + for (int columnIndex = min; columnIndex <= max; columnIndex++) { + hiddenColumns.add(columnIndex); + } + } + } else if ("c".equals(tagLocalName)) { + Attribute ref = startElement.getAttributeByName(new QName("r")); + + if (ref != null) { + String[] coord = splitCellRef(ref.getValue()); + currentColNum = CellReference.convertColStringToIndex(coord[0]); + currentCell = new StreamingCell(sheet, currentColNum, + Integer.parseInt(coord[1]) - 1, use1904Dates); + } else { + currentCell = new StreamingCell(sheet, currentColNum, currentRowNum, + use1904Dates); + } + setFormatString(startElement, currentCell); + + Attribute type = startElement.getAttributeByName(new QName("t")); + if (type != null) { + currentCell.setType(type.getValue()); + } else { + currentCell.setType("n"); + } + + Attribute style = startElement.getAttributeByName(new QName("s")); + if (style != null) { + String indexStr = style.getValue(); + try { + int index = Integer.parseInt(indexStr); + currentCell.setCellStyle(stylesTable.getStyleAt(index)); + } catch (NumberFormatException nfe) { + log.warn("Ignoring invalid style index {}", indexStr); + } + } else { + currentCell.setCellStyle(stylesTable.getStyleAt(0)); + } + } else if ("dimension".equals(tagLocalName)) { + Attribute refAttr = startElement.getAttributeByName(new QName("ref")); + String ref = refAttr != null ? refAttr.getValue() : null; + if (ref != null) { + // ref is formatted as A1 or A1:F25. Take the last numbers of this string and use it as lastRowNum + for (int i = ref.length() - 1; i >= 0; i--) { + if (!Character.isDigit(ref.charAt(i))) { + try { + lastRowNum = Integer.parseInt(ref.substring(i + 1)) - 1; + } catch (NumberFormatException ignore) { + } + break; + } + } + for (int i = 0; i < ref.length(); i++) { + if (!Character.isAlphabetic(ref.charAt(i))) { + firstColNum = CellReference.convertColStringToIndex( + ref.substring(0, i)); + break; + } + } + } + } else if ("f".equals(tagLocalName)) { + if (currentCell != null) { + currentCell.setFormulaType(true); + } + } + + // Clear contents cache + lastContents = ""; + } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT + && isSpreadsheetTag(event.asEndElement().getName())) { + EndElement endElement = event.asEndElement(); + String tagLocalName = endElement.getName().getLocalPart(); + + if ("v".equals(tagLocalName) || "t".equals(tagLocalName)) { + currentCell.setRawContents(unformattedContents()); + currentCell.setContentSupplier(formattedContents()); + } else if ("row".equals(tagLocalName) && currentRow != null) { + rowCache.add(currentRow); + currentRowNum++; + } else if ("c".equals(tagLocalName)) { + currentRow.getCellMap().put(currentCell.getColumnIndex(), currentCell); + currentCell = null; + currentColNum++; + } else if ("f".equals(tagLocalName)) { + if (currentCell != null) { + currentCell.setFormula(lastContents); + } + } + + } + } + + /** + * Returns true if a tag is part of the main namespace for SpreadsheetML: + *
    + *
  • http://schemas.openxmlformats.org/spreadsheetml/2006/main + *
  • http://purl.oclc.org/ooxml/spreadsheetml/main + *
+ * As opposed to http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing, etc. + * + * @param name + * @return + */ + private boolean isSpreadsheetTag(QName name) { + return (name.getNamespaceURI() != null + && name.getNamespaceURI().endsWith("/main")); + } + + /** + * Get the hidden state for a given column + * + * @param columnIndex - the column to set (0-based) + * @return hidden - false if the column is visible + */ + boolean isColumnHidden(int columnIndex) { + if (rowCacheIterator == null) { + getRow(); + } + return hiddenColumns.contains(columnIndex); + } + + /** + * Gets the last row on the sheet + * + * @return + */ + int getLastRowNum() { + if (rowCacheIterator == null) { + getRow(); + } + return lastRowNum; + } + + /** + * Read the numeric format string out of the styles table for this cell. Stores the result in + * the Cell. + * + * @param startElement + * @param cell + */ + void setFormatString(StartElement startElement, StreamingCell cell) { + Attribute cellStyle = startElement.getAttributeByName(new QName("s")); + String cellStyleString = (cellStyle != null) ? cellStyle.getValue() : null; + XSSFCellStyle style = null; + + if (cellStyleString != null) { + style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString)); + } else if (stylesTable.getNumCellStyles() > 0) { + style = stylesTable.getStyleAt(0); + } + + if (style != null) { + cell.setNumericFormatIndex(style.getDataFormat()); + String formatString = style.getDataFormatString(); + + if (formatString != null) { + cell.setNumericFormat(formatString); + } else { + cell.setNumericFormat( + BuiltinFormats.getBuiltinFormat(cell.getNumericFormatIndex())); + } + } else { + cell.setNumericFormatIndex(null); + cell.setNumericFormat(null); + } + } + + /** + * Tries to format the contents of the last contents appropriately based on the type of cell and + * the discovered numeric format. + * + * @return + */ + Supplier formattedContents() { + return getFormatterForType(currentCell.getType()); + } + + /** + * Tries to format the contents of the last contents appropriately based on the provided type + * and the discovered numeric format. + * + * @return + */ + private Supplier getFormatterForType(String type) { + switch (type) { + case "s": //string stored in shared table + if (!lastContents.isEmpty()) { + int idx = Integer.parseInt(lastContents); + return new StringSupplier(sst.getItemAt(idx).toString()); + } + return new StringSupplier(lastContents); + case "inlineStr": //inline string (not in sst) + case "str": + return new StringSupplier(new XSSFRichTextString(lastContents).toString()); + case "e": //error type + return new StringSupplier("ERROR: " + lastContents); + case "n": //numeric type + if (currentCell.getNumericFormat() != null && lastContents.length() > 0) { + // the formatRawCellContents operation incurs a significant overhead on large sheets, + // and we want to defer the execution of this method until the value is actually needed. + // it is not needed in all cases.. + final String currentLastContents = lastContents; + final int currentNumericFormatIndex = currentCell.getNumericFormatIndex(); + final String currentNumericFormat = currentCell.getNumericFormat(); + + return new Supplier() { + String cachedContent; + + @Override + public Object getContent() { + if (cachedContent == null) { + cachedContent = dataFormatter.formatRawCellContents( + Double.parseDouble(currentLastContents), + currentNumericFormatIndex, + currentNumericFormat); + } + + return cachedContent; + } + }; + } else { + return new StringSupplier(lastContents); + } + default: + return new StringSupplier(lastContents); + } + } + + /** + * Returns the contents of the cell, with no formatting applied + * + * @return + */ + String unformattedContents() { + switch (currentCell.getType()) { + case "s": //string stored in shared table + if (!lastContents.isEmpty()) { + int idx = Integer.parseInt(lastContents); + return sst.getItemAt(idx).toString(); + } + return lastContents; + case "inlineStr": //inline string (not in sst) + return new XSSFRichTextString(lastContents).toString(); + default: + return lastContents; + } + } + + /** + * Returns a new streaming iterator to loop through rows. This iterator is not guaranteed to + * have all rows in memory, and any particular iteration may trigger a load from disk to read in + * new data. + * + * @return the streaming iterator + */ + @Override + public Iterator iterator() { + return new StreamingRowIterator(); + } + + public void close() { + try { + parser.close(); + } catch (XMLStreamException e) { + throw new CloseException(e); + } + } + + class StreamingRowIterator implements Iterator { + + public StreamingRowIterator() { + if (rowCacheIterator == null) { + hasNext(); + } + } + + @Override + public boolean hasNext() { + return (rowCacheIterator != null && rowCacheIterator.hasNext()) || getRow(); + } + + @Override + public Row next() { + return rowCacheIterator.next(); + } + + @Override + public void remove() { + throw new RuntimeException("NotSupported"); + } + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbook.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbook.java new file mode 100644 index 0000000..e8d12ad --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbook.java @@ -0,0 +1,518 @@ +package kr.xit.core.support.xlsx.impl; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Iterator; +import java.util.List; +import kr.xit.core.support.xlsx.exceptions.MissingSheetException; +import org.apache.poi.ss.SpreadsheetVersion; +import org.apache.poi.ss.formula.EvaluationWorkbook; +import org.apache.poi.ss.formula.udf.UDFFinder; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.CreationHelper; +import org.apache.poi.ss.usermodel.DataFormat; +import org.apache.poi.ss.usermodel.Font; +import org.apache.poi.ss.usermodel.Name; +import org.apache.poi.ss.usermodel.PictureData; +import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.SheetVisibility; +import org.apache.poi.ss.usermodel.Workbook; + +public class StreamingWorkbook implements Workbook, AutoCloseable { + + private final StreamingWorkbookReader reader; + + public StreamingWorkbook(StreamingWorkbookReader reader) { + this.reader = reader; + } + + int findSheetByName(String name) { + for (int i = 0; i < reader.getSheetProperties().size(); i++) { + if (reader.getSheetProperties().get(i).get("name").equals(name)) { + return i; + } + } + return -1; + } + + /* Supported */ + + /** + * {@inheritDoc} + */ + @Override + public Iterator iterator() { + return reader.iterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public Iterator sheetIterator() { + return iterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public String getSheetName(int sheet) { + return reader.getSheetProperties().get(sheet).get("name"); + } + + /** + * {@inheritDoc} + */ + @Override + public int getSheetIndex(String name) { + return findSheetByName(name); + } + + /** + * {@inheritDoc} + */ + @Override + public int getSheetIndex(Sheet sheet) { + if (sheet instanceof StreamingSheet) { + return findSheetByName(sheet.getSheetName()); + } else { + throw new UnsupportedOperationException("Cannot use non-StreamingSheet sheets"); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int getNumberOfSheets() { + return reader.getSheets().size(); + } + + /** + * {@inheritDoc} + */ + @Override + public Sheet getSheetAt(int index) { + return reader.getSheets().get(index); + } + + /** + * {@inheritDoc} + */ + @Override + public Sheet getSheet(String name) { + int index = getSheetIndex(name); + if (index == -1) { + throw new MissingSheetException("Sheet '" + name + "' does not exist"); + } + return reader.getSheets().get(index); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isSheetHidden(int sheetIx) { + return "hidden".equals(reader.getSheetProperties().get(sheetIx).get("state")); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isSheetVeryHidden(int sheetIx) { + return "veryHidden".equals(reader.getSheetProperties().get(sheetIx).get("state")); + } + + /** + * {@inheritDoc} + */ + @Override + public void close() throws IOException { + reader.close(); + } + + /* Not supported */ + + /** + * Not supported + */ + @Override + public int getActiveSheetIndex() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setActiveSheet(int sheetIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getFirstVisibleTab() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setFirstVisibleTab(int sheetIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSheetOrder(String sheetname, int pos) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSelectedTab(int index) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSheetName(int sheet, String name) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Sheet createSheet() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Sheet createSheet(String sheetname) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Sheet cloneSheet(int sheetNum) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeSheetAt(int index) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Font createFont() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Font findFont(boolean b, short i, short i1, String s, boolean b1, boolean b2, short i2, + byte b3) { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumberOfFonts() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getNumberOfFontsAsInt() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Font getFontAt(int i) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellStyle createCellStyle() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getNumCellStyles() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CellStyle getCellStyleAt(int i) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void write(OutputStream stream) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int getNumberOfNames() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Name getName(String name) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getNames(String s) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getAllNames() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public Name createName() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removeName(Name name) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int linkExternalWorkbook(String name, Workbook workbook) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setPrintArea(int sheetIndex, String reference) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setPrintArea(int sheetIndex, int startColumn, int endColumn, int startRow, + int endRow) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public String getPrintArea(int sheetIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void removePrintArea(int sheetIndex) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public MissingCellPolicy getMissingCellPolicy() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setMissingCellPolicy(MissingCellPolicy missingCellPolicy) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public DataFormat createDataFormat() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int addPicture(byte[] pictureData, int format) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public List getAllPictures() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public CreationHelper getCreationHelper() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean isHidden() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setHidden(boolean hiddenFlag) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSheetHidden(int sheetIx, boolean hidden) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public SheetVisibility getSheetVisibility(int i) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setSheetVisibility(int i, SheetVisibility sheetVisibility) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void addToolPack(UDFFinder toopack) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public void setForceFormulaRecalculation(boolean value) { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public boolean getForceFormulaRecalculation() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public SpreadsheetVersion getSpreadsheetVersion() { + throw new UnsupportedOperationException(); + } + + /** + * Not supported + */ + @Override + public int addOlePackage(byte[] bytes, String s, String s1, String s2) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public EvaluationWorkbook createEvaluationWorkbook() { + return null; + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbookReader.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbookReader.java new file mode 100644 index 0000000..d6712fd --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StreamingWorkbookReader.java @@ -0,0 +1,245 @@ +package kr.xit.core.support.xlsx.impl; + +import static java.util.Arrays.asList; +import static kr.xit.core.support.xlsx.XmlUtils.document; +import static kr.xit.core.support.xlsx.XmlUtils.searchForNodeList; +import static kr.xit.core.support.xlsx.impl.TempFileUtil.writeInputStreamToFile; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.file.Files; +import java.security.GeneralSecurityException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLStreamException; +import kr.xit.core.support.xlsx.StreamingReader.Builder; +import kr.xit.core.support.xlsx.exceptions.OpenException; +import kr.xit.core.support.xlsx.exceptions.ReadException; +import kr.xit.core.support.xlsx.sst.BufferedStringsTable; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.util.StaxHelper; +import org.apache.poi.xssf.eventusermodel.XSSFReader; +import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator; +import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.model.StylesTable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class StreamingWorkbookReader implements Iterable, AutoCloseable { + + private static final Logger log = LoggerFactory.getLogger(StreamingWorkbookReader.class); + + private final List sheets; + private final List> sheetProperties = new ArrayList<>(); + private final Builder builder; + private File tmp; + private File sstCache; + private OPCPackage pkg; + private SharedStringsTable sst; + private boolean use1904Dates = false; + + /** + * This constructor exists only so the StreamingReader can instantiate a StreamingWorkbook using + * its own reader implementation. Do not use going forward. + * + * @param sst The SST data for this workbook + * @param sstCache The backing cache file for the SST data + * @param pkg The POI package that should be closed when this workbook is closed + * @param reader A single streaming reader instance + * @param builder The builder containing all options + */ + @Deprecated + public StreamingWorkbookReader(SharedStringsTable sst, File sstCache, OPCPackage pkg, + StreamingSheetReader reader, Builder builder) { + this.sst = sst; + this.sstCache = sstCache; + this.pkg = pkg; + this.sheets = asList(new StreamingSheet(null, reader)); + this.builder = builder; + } + + public StreamingWorkbookReader(Builder builder) { + this.sheets = new ArrayList<>(); + this.builder = builder; + } + + public StreamingSheetReader first() { + return sheets.get(0).getReader(); + } + + public void init(InputStream is) { + File f = null; + try { + f = writeInputStreamToFile(is, builder.getBufferSize()); + log.debug("Created temp file [" + f.getAbsolutePath() + "]"); + + init(f); + tmp = f; + } catch (IOException e) { + throw new ReadException("Unable to read input stream", e); + } catch (RuntimeException e) { + if (f != null) { + f.delete(); + } + throw e; + } + } + + public void init(File f) { + try { + if (builder.getPassword() != null) { + // Based on: https://poi.apache.org/encryption.html + POIFSFileSystem poifs = new POIFSFileSystem(f); + EncryptionInfo info = new EncryptionInfo(poifs); + Decryptor d = Decryptor.getInstance(info); + d.verifyPassword(builder.getPassword()); + pkg = OPCPackage.open(d.getDataStream(poifs)); + } else { + pkg = OPCPackage.open(f); + } + + XSSFReader reader = new XSSFReader(pkg); + if (builder.getSstCacheSizeBytes() > 0) { + sstCache = Files.createTempFile("", "").toFile(); + log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]"); + sst = BufferedStringsTable.getSharedStringsTable(sstCache, + builder.getSstCacheSizeBytes(), pkg); + } else { + sst = reader.getSharedStringsTable(); + } + + StylesTable styles = reader.getStylesTable(); + NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), + "/ss:workbook/ss:workbookPr"); + if (workbookPr.getLength() == 1) { + final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904"); + if (date1904 != null) { + use1904Dates = ("1".equals(date1904.getTextContent())); + } + } + + loadSheets(reader, sst, styles, builder.getRowCacheSize()); + } catch (IOException e) { + throw new OpenException("Failed to open file", e); + } catch (OpenXML4JException | XMLStreamException e) { + throw new ReadException("Unable to read workbook", e); + } catch (GeneralSecurityException e) { + throw new ReadException("Unable to read workbook - Decryption failed", e); + } + } + + void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable, + int rowCacheSize) + throws IOException, InvalidFormatException, XMLStreamException { + lookupSheetNames(reader); + + //Some workbooks have multiple references to the same sheet. Need to filter + //them out before creating the XMLEventReader by keeping track of their URIs. + //The sheets are listed in order, so we must keep track of insertion order. + SheetIterator iter = (SheetIterator) reader.getSheetsData(); + Map sheetStreams = new LinkedHashMap<>(); + while (iter.hasNext()) { + InputStream is = iter.next(); + sheetStreams.put(iter.getSheetPart().getPartName().getURI(), is); + } + + //Iterate over the loaded streams + int i = 0; + for (URI uri : sheetStreams.keySet()) { + XMLEventReader parser = StaxHelper.newXMLInputFactory() + .createXMLEventReader(sheetStreams.get(uri)); + sheets.add(new StreamingSheet(sheetProperties.get(i++).get("name"), + new StreamingSheetReader(sst, stylesTable, parser, use1904Dates, rowCacheSize))); + } + } + + void lookupSheetNames(XSSFReader reader) throws IOException, InvalidFormatException { + sheetProperties.clear(); + NodeList nl = searchForNodeList(document(reader.getWorkbookData()), + "/ss:workbook/ss:sheets/ss:sheet"); + for (int i = 0; i < nl.getLength(); i++) { + Map props = new HashMap<>(); + props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent()); + + Node state = nl.item(i).getAttributes().getNamedItem("state"); + props.put("state", state == null ? "visible" : state.getTextContent()); + sheetProperties.add(props); + } + } + + List getSheets() { + return sheets; + } + + public List> getSheetProperties() { + return sheetProperties; + } + + @Override + public Iterator iterator() { + return new StreamingSheetIterator(sheets.iterator()); + } + + @Override + public void close() throws IOException { + try { + for (StreamingSheet sheet : sheets) { + sheet.getReader().close(); + } + pkg.revert(); + } finally { + if (tmp != null) { + if (log.isDebugEnabled()) { + log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]"); + } + tmp.delete(); + } + if (sst instanceof BufferedStringsTable) { + if (log.isDebugEnabled()) { + log.debug("Deleting sst cache file [" + this.sstCache.getAbsolutePath() + "]"); + } + ((BufferedStringsTable) sst).close(); + sstCache.delete(); + } + } + } + + static class StreamingSheetIterator implements Iterator { + + private final Iterator iterator; + + public StreamingSheetIterator(Iterator iterator) { + this.iterator = iterator; + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public Sheet next() { + return iterator.next(); + } + + @Override + public void remove() { + throw new RuntimeException("NotSupported"); + } + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StringSupplier.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StringSupplier.java new file mode 100644 index 0000000..f33350a --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/StringSupplier.java @@ -0,0 +1,15 @@ +package kr.xit.core.support.xlsx.impl; + +class StringSupplier implements Supplier { + + private final String val; + + StringSupplier(String val) { + this.val = val; + } + + @Override + public Object getContent() { + return val; + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/Supplier.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/Supplier.java new file mode 100644 index 0000000..88e4a8f --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/Supplier.java @@ -0,0 +1,6 @@ +package kr.xit.core.support.xlsx.impl; + +interface Supplier { + + Object getContent(); +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/TempFileUtil.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/TempFileUtil.java new file mode 100644 index 0000000..48751ff --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/impl/TempFileUtil.java @@ -0,0 +1,24 @@ +package kr.xit.core.support.xlsx.impl; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; + +public class TempFileUtil { + + public static File writeInputStreamToFile(InputStream is, int bufferSize) throws IOException { + File f = Files.createTempFile("tmp-", ".xlsx").toFile(); + try (FileOutputStream fos = new FileOutputStream(f)) { + int read; + byte[] bytes = new byte[bufferSize]; + while ((read = is.read(bytes)) != -1) { + fos.write(bytes, 0, read); + } + return f; + } finally { + is.close(); + } + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/BufferedStringsTable.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/BufferedStringsTable.java new file mode 100644 index 0000000..ef9d23b --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/BufferedStringsTable.java @@ -0,0 +1,126 @@ +package kr.xit.core.support.xlsx.sst; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.XMLEvent; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.ss.usermodel.RichTextString; +import org.apache.poi.util.StaxHelper; +import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; + +public class BufferedStringsTable extends SharedStringsTable implements AutoCloseable { + + private final FileBackedList list; + + public static BufferedStringsTable getSharedStringsTable(File tmp, int cacheSizeBytes, + OPCPackage pkg) + throws IOException { + List parts = pkg.getPartsByContentType( + XSSFRelation.SHARED_STRINGS.getContentType()); + return parts.size() == 0 ? null + : new BufferedStringsTable(parts.get(0), tmp, cacheSizeBytes); + } + + private BufferedStringsTable(PackagePart part, File file, int cacheSizeBytes) + throws IOException { + this.list = new FileBackedList(file, cacheSizeBytes); + readFrom(part.getInputStream()); + } + + @Override + public void readFrom(InputStream is) throws IOException { + try { + XMLEventReader xmlEventReader = StaxHelper.newXMLInputFactory() + .createXMLEventReader(is); + + while (xmlEventReader.hasNext()) { + XMLEvent xmlEvent = xmlEventReader.nextEvent(); + + if (xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart() + .equals("si")) { + list.add(parseCT_Rst(xmlEventReader)); + } + } + } catch (XMLStreamException e) { + throw new IOException(e); + } + } + + /** + * Parses a {@code } String Item. Returns just the text and drops the formatting. See xmlschema + * type {@code CT_Rst}. + */ + private String parseCT_Rst(XMLEventReader xmlEventReader) throws XMLStreamException { + // Precondition: pointing to ; Post condition: pointing to + StringBuilder buf = new StringBuilder(); + XMLEvent xmlEvent; + while ((xmlEvent = xmlEventReader.nextTag()).isStartElement()) { + switch (xmlEvent.asStartElement().getName().getLocalPart()) { + case "t": // Text + buf.append(xmlEventReader.getElementText()); + break; + case "r": // Rich Text Run + parseCT_RElt(xmlEventReader, buf); + break; + case "rPh": // Phonetic Run + case "phoneticPr": // Phonetic Properties + skipElement(xmlEventReader); + break; + default: + throw new IllegalArgumentException( + xmlEvent.asStartElement().getName().getLocalPart()); + } + } + return buf.toString(); + } + + /** + * Parses a {@code } Rich Text Run. Returns just the text and drops the formatting. See xmlschema + * type {@code CT_RElt}. + */ + private void parseCT_RElt(XMLEventReader xmlEventReader, StringBuilder buf) + throws XMLStreamException { + // Precondition: pointing to ; Post condition: pointing to + XMLEvent xmlEvent; + while ((xmlEvent = xmlEventReader.nextTag()).isStartElement()) { + switch (xmlEvent.asStartElement().getName().getLocalPart()) { + case "t": // Text + buf.append(xmlEventReader.getElementText()); + break; + case "rPr": // Run Properties + skipElement(xmlEventReader); + break; + default: + throw new IllegalArgumentException( + xmlEvent.asStartElement().getName().getLocalPart()); + } + } + } + + private void skipElement(XMLEventReader xmlEventReader) throws XMLStreamException { + // Precondition: pointing to start element; Post condition: pointing to end element + while (xmlEventReader.nextTag().isStartElement()) { + skipElement(xmlEventReader); // recursively skip over child + } + } + + @Override + public RichTextString getItemAt(int idx) { + return new XSSFRichTextString(list.getAt(idx)); + } + + @Override + public void close() throws IOException { + super.close(); + list.close(); + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/FileBackedList.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/FileBackedList.java new file mode 100644 index 0000000..07599a8 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/FileBackedList.java @@ -0,0 +1,108 @@ +package kr.xit.core.support.xlsx.sst; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * File-backed list-like class. Allows addition of arbitrary numbers of array entries (serialized to + * JSON) in a binary packed file. Reading of entries is done with an NIO channel that seeks to the + * entry in the file. + *

+ * File entry format: + *

    + *
  • 4 bytes: length of entry
  • + *
  • length bytes: JSON string containing the entry data
  • + *
+ *

+ * Pointers to the offset of each entry are kept in a {@code List}. + * The values loaded from the the file are cached up to a maximum of + * {@code cacheSize}. Items are evicted from the cache with an LRU algorithm. + */ +public class FileBackedList implements AutoCloseable { + + private final List pointers = new ArrayList<>(); + private final RandomAccessFile raf; + private final FileChannel channel; + private final LRUCache cache; + + private long filesize; + + public FileBackedList(File file, final int cacheSizeBytes) throws IOException { + this.raf = new RandomAccessFile(file, "rw"); + this.channel = raf.getChannel(); + this.filesize = raf.length(); + this.cache = new LRUCache(cacheSizeBytes); + } + + public void add(String str) { + try { + writeToFile(str); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public String getAt(int index) { + String s = cache.getIfPresent(index); + if (s != null) { + return s; + } + + try { + String val = readFromFile(pointers.get(index)); + cache.store(index, val); + return val; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void writeToFile(String str) throws IOException { + synchronized (channel) { + ByteBuffer bytes = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)); + ByteBuffer length = ByteBuffer.allocate(4).putInt(bytes.array().length); + + channel.position(filesize); + pointers.add(channel.position()); + length.flip(); + channel.write(length); + channel.write(bytes); + + filesize += 4 + bytes.array().length; + } + } + + private String readFromFile(long pointer) throws IOException { + synchronized (channel) { + FileChannel fc = channel.position(pointer); + + //get length of entry + ByteBuffer buffer = ByteBuffer.wrap(new byte[4]); + fc.read(buffer); + buffer.flip(); + int length = buffer.getInt(); + + //read entry + buffer = ByteBuffer.wrap(new byte[length]); + fc.read(buffer); + buffer.flip(); + + return new String(buffer.array(), StandardCharsets.UTF_8); + } + } + + @Override + public void close() { + try { + raf.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/LRUCache.java b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/LRUCache.java new file mode 100644 index 0000000..0865627 --- /dev/null +++ b/mens-core/src/main/java/kr/xit/core/support/xlsx/sst/LRUCache.java @@ -0,0 +1,47 @@ +package kr.xit.core.support.xlsx.sst; + +import java.util.Iterator; +import java.util.LinkedHashMap; + +class LRUCache { + + private long sizeBytes; + private final long capacityBytes; + private final LinkedHashMap map = new LinkedHashMap<>(); + + LRUCache(long capacityBytes) { + this.capacityBytes = capacityBytes; + } + + String getIfPresent(int key) { + String s = map.get(key); + if (s != null) { + map.remove(key); + map.put(key, s); + } + return s; + } + + void store(int key, String val) { + long valSize = strSize(val); + if (valSize > capacityBytes) { + throw new RuntimeException("Insufficient cache space."); + } + Iterator it = map.values().iterator(); + while (valSize + sizeBytes > capacityBytes) { + String s = it.next(); + sizeBytes -= strSize(s); + it.remove(); + } + map.put(key, val); + sizeBytes += valSize; + } + + // just an estimation + private static long strSize(String str) { + long size = Integer.BYTES; // hashCode + size += Character.BYTES * str.length(); // characters + return size; + } + +} diff --git a/mens-core/src/test/java/kr/xit/core/support/xlsx/StreamingReaderTest.java b/mens-core/src/test/java/kr/xit/core/support/xlsx/StreamingReaderTest.java new file mode 100644 index 0000000..58fd76f --- /dev/null +++ b/mens-core/src/test/java/kr/xit/core/support/xlsx/StreamingReaderTest.java @@ -0,0 +1,452 @@ +package kr.xit.core.support.xlsx; + +import kr.xit.core.support.xlsx.exceptions.MissingSheetException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.DateUtil; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static org.apache.poi.ss.usermodel.CellType.BOOLEAN; +import static org.apache.poi.ss.usermodel.CellType.NUMERIC; +import static org.apache.poi.ss.usermodel.CellType.STRING; +import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.CREATE_NULL_AS_BLANK; +import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.RETURN_BLANK_AS_NULL; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +public class StreamingReaderTest { + @BeforeAll + public static void init() { + Locale.setDefault(Locale.ENGLISH); + } + + @Test + public void testEncryption() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/encrypted.xlsx")); + Workbook wb = StreamingReader.builder().password("test").open(is)) { + OUTER: + for(Row r : wb.getSheetAt(0)) { + int i = 0; + for(Cell c : r) { + System.out.println(++i + " : "+ c.getStringCellValue()); + //System.out.println(c.getRichStringCellValue().getString()); + //assertEquals("Demo", c.getStringCellValue()); + //assertEquals("Demo", c.getRichStringCellValue().getString()); + //break OUTER; + } + } + } + } + + @Test + public void testTypes() throws Exception { + SimpleDateFormat df = new SimpleDateFormat("MM/dd/yyyy"); + try( + InputStream is = new FileInputStream(new File("src/test/resources/data_types.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + + List> obj = new ArrayList<>(); + + for(Row r : wb.getSheetAt(0)) { + List o = new ArrayList<>(); + for(Cell c : r) { + o.add(c); + } + obj.add(o); + } + + assertEquals(7, obj.size()); + List row; + + row = obj.get(0); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(STRING, row.get(1).getCellType()); + assertEquals("Type", row.get(0).getStringCellValue()); + assertEquals("Type", row.get(0).getRichStringCellValue().getString()); + assertEquals("Value", row.get(1).getStringCellValue()); + assertEquals("Value", row.get(1).getRichStringCellValue().getString()); + + row = obj.get(1); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(STRING, row.get(1).getCellType()); + assertEquals("string", row.get(0).getStringCellValue()); + assertEquals("string", row.get(0).getRichStringCellValue().getString()); + assertEquals("jib-jab", row.get(1).getStringCellValue()); + assertEquals("jib-jab", row.get(1).getRichStringCellValue().getString()); + + row = obj.get(2); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(NUMERIC, row.get(1).getCellType()); + assertEquals("int", row.get(0).getStringCellValue()); + assertEquals("int", row.get(0).getRichStringCellValue().getString()); + assertEquals(10, row.get(1).getNumericCellValue(), 0); + + row = obj.get(3); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(NUMERIC, row.get(1).getCellType()); + assertEquals("double", row.get(0).getStringCellValue()); + assertEquals("double", row.get(0).getRichStringCellValue().getString()); + assertEquals(3.14, row.get(1).getNumericCellValue(), 0); + + row = obj.get(4); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(NUMERIC, row.get(1).getCellType()); + assertEquals("date", row.get(0).getStringCellValue()); + assertEquals("date", row.get(0).getRichStringCellValue().getString()); + assertEquals(df.parse("1/1/2014"), row.get(1).getDateCellValue()); + assertTrue(DateUtil.isCellDateFormatted(row.get(1))); + + row = obj.get(5); + assertEquals(7, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(STRING, row.get(1).getCellType()); + assertEquals(STRING, row.get(2).getCellType()); + assertEquals(STRING, row.get(3).getCellType()); + assertEquals(STRING, row.get(4).getCellType()); + assertEquals(STRING, row.get(5).getCellType()); + assertEquals(STRING, row.get(6).getCellType()); + assertEquals("long", row.get(0).getStringCellValue()); + assertEquals("long", row.get(0).getRichStringCellValue().getString()); + assertEquals("ass", row.get(1).getStringCellValue()); + assertEquals("ass", row.get(1).getRichStringCellValue().getString()); + assertEquals("row", row.get(2).getStringCellValue()); + assertEquals("row", row.get(2).getRichStringCellValue().getString()); + assertEquals("look", row.get(3).getStringCellValue()); + assertEquals("look", row.get(3).getRichStringCellValue().getString()); + assertEquals("at", row.get(4).getStringCellValue()); + assertEquals("at", row.get(4).getRichStringCellValue().getString()); + assertEquals("it", row.get(5).getStringCellValue()); + assertEquals("it", row.get(5).getRichStringCellValue().getString()); + assertEquals("go", row.get(6).getStringCellValue()); + assertEquals("go", row.get(6).getRichStringCellValue().getString()); + + row = obj.get(6); + assertEquals(3, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(BOOLEAN, row.get(1).getCellType()); + assertEquals(BOOLEAN, row.get(2).getCellType()); + assertEquals("boolean", row.get(0).getStringCellValue()); + assertEquals("boolean", row.get(0).getRichStringCellValue().getString()); + assertEquals(true, row.get(1).getBooleanCellValue()); + assertEquals(false, row.get(2).getBooleanCellValue()); + } + } + + @Test + public void testGetDateCellValue() throws Exception { + try( + InputStream is = new FileInputStream("src/test/resources/data_types.xlsx"); + Workbook wb = StreamingReader.builder().open(is); + ) { + + List> obj = new ArrayList<>(); + + for(Row r : wb.getSheetAt(0)) { + List o = new ArrayList<>(); + for(Cell c : r) { + o.add(c); + } + obj.add(o); + } + + Date dt = obj.get(4).get(1).getDateCellValue(); + assertNotNull(dt); + final GregorianCalendar cal = new GregorianCalendar(); + cal.setTime(dt); + assertEquals(cal.get(Calendar.YEAR), 2014); + + // Verify LocalDateTime version is correct as well + LocalDateTime localDateTime = obj.get(4).get(1).getLocalDateTimeCellValue(); + assertEquals(2014, localDateTime.getYear()); + + try { + obj.get(0).get(0).getDateCellValue(); + fail("Should have thrown IllegalStateException"); + } catch(IllegalStateException e) { } + } + } + + @Test + public void testGetFirstCellNum() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + + List> obj = new ArrayList<>(); + List rows = new ArrayList<>(); + for(Row r : wb.getSheetAt(0)) { + rows.add(r); + List o = new ArrayList<>(); + for(Cell c : r) { + o.add(c); + } + obj.add(o); + } + + assertEquals(3, rows.size()); + assertEquals(3, rows.get(2).getFirstCellNum()); + } + } + + @Test + public void testGaps() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + List> obj = new ArrayList<>(); + + for(Row r : wb.getSheetAt(0)) { + List o = new ArrayList<>(); + for(Cell c : r) { + o.add(c); + } + obj.add(o); + } + + assertEquals(3, obj.size()); + List row; + + row = obj.get(0); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(STRING, row.get(1).getCellType()); + assertEquals("Dat", row.get(0).getStringCellValue()); + assertEquals("Dat", row.get(0).getRichStringCellValue().getString()); + assertEquals(0, row.get(0).getColumnIndex()); + assertEquals(0, row.get(0).getRowIndex()); + assertEquals("gap", row.get(1).getStringCellValue()); + assertEquals("gap", row.get(1).getRichStringCellValue().getString()); + assertEquals(2, row.get(1).getColumnIndex()); + assertEquals(0, row.get(1).getRowIndex()); + + row = obj.get(1); + assertEquals(2, row.size()); + assertEquals(STRING, row.get(0).getCellType()); + assertEquals(STRING, row.get(1).getCellType()); + assertEquals("guuurrrrrl", row.get(0).getStringCellValue()); + assertEquals("guuurrrrrl", row.get(0).getRichStringCellValue().getString()); + assertEquals(0, row.get(0).getColumnIndex()); + assertEquals(6, row.get(0).getRowIndex()); + assertEquals("!", row.get(1).getStringCellValue()); + assertEquals("!", row.get(1).getRichStringCellValue().getString()); + assertEquals(6, row.get(1).getColumnIndex()); + assertEquals(6, row.get(1).getRowIndex()); + } + } + + @Test + public void testLeadingZeroes() throws Exception { + File f = new File("src/test/resources/leadingZeroes.xlsx"); + + try(Workbook wb = StreamingReader.builder().open(f)) { + Iterator iter = wb.getSheetAt(0).iterator(); + iter.hasNext(); + + Row r1 = iter.next(); + assertEquals(1, r1.getCell(0).getNumericCellValue(), 0); + assertEquals("1", r1.getCell(0).getStringCellValue()); + assertEquals(NUMERIC, r1.getCell(0).getCellType()); + + Row r2 = iter.next(); + assertEquals(2, r2.getCell(0).getNumericCellValue(), 0); + assertEquals("0002", r2.getCell(0).getStringCellValue()); + assertEquals("0002", r2.getCell(0).getRichStringCellValue().getString()); + assertEquals(STRING, r2.getCell(0).getCellType()); + } + } + @Test + public void testSpecialStyles() throws Exception { + File f = new File("src/test/resources/special_types.xlsx"); + + Map> contents = new HashMap<>(); + try(Workbook wb = StreamingReader.builder().open(f)) { + for(Row row : wb.getSheetAt(0)) { + contents.put(row.getRowNum(), new ArrayList()); + for(Cell c : row) { + if(c.getColumnIndex() > 0) { + contents.get(row.getRowNum()).add(c); + } + } + } + } + + SimpleDateFormat df = new SimpleDateFormat("dd/MM/yyyy"); + + assertThat(contents.size(), equalTo(2)); + assertThat(contents.get(0).size(), equalTo(4)); + assertThat(contents.get(0).get(0).getStringCellValue(), equalTo("Thu\", \"Dec 25\", \"14")); + assertThat(contents.get(0).get(0).getDateCellValue(), equalTo(df.parse("25/12/2014"))); + assertThat(contents.get(0).get(1).getStringCellValue(), equalTo("02/04/15")); + assertThat(contents.get(0).get(1).getDateCellValue(), equalTo(df.parse("04/02/2015"))); + assertThat(contents.get(0).get(2).getStringCellValue(), equalTo("14\". \"Mar\". \"2015")); + assertThat(contents.get(0).get(2).getDateCellValue(), equalTo(df.parse("14/03/2015"))); + assertThat(contents.get(0).get(3).getStringCellValue(), equalTo("2015-05-05")); + assertThat(contents.get(0).get(3).getDateCellValue(), equalTo(df.parse("05/05/2015"))); + + assertThat(contents.get(1).size(), equalTo(4)); + assertThat(contents.get(1).get(0).getStringCellValue(), equalTo("3.12")); + assertThat(contents.get(1).get(0).getNumericCellValue(), equalTo(3.12312312312)); + assertThat(contents.get(1).get(1).getStringCellValue(), equalTo("1,023,042")); + assertThat(contents.get(1).get(1).getNumericCellValue(), equalTo(1023042.0)); + assertThat(contents.get(1).get(2).getStringCellValue(), equalTo("-312,231.12")); + assertThat(contents.get(1).get(2).getNumericCellValue(), equalTo(-312231.12123145)); + assertThat(contents.get(1).get(3).getStringCellValue(), equalTo("(132)")); + assertThat(contents.get(1).get(3).getNumericCellValue(), equalTo(-132.0)); + } + + @Test + public void testFirstRowNumIs0() throws Exception { + File f = new File("src/test/resources/data_types.xlsx"); + try(Workbook wb = StreamingReader.builder().open(f)) { + Row row = wb.getSheetAt(0).iterator().next(); + assertThat(row.getRowNum(), equalTo(0)); + } + } + + @Test + public void testMissingRattrs() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/missing-r-attrs.xlsx")); + StreamingReader reader = StreamingReader.builder().read(is); + ) { + Row row = reader.iterator().next(); + assertEquals(0, row.getRowNum()); + assertEquals("1", row.getCell(0).getStringCellValue()); + assertEquals("5", row.getCell(4).getStringCellValue()); + row = reader.iterator().next(); + assertEquals(1, row.getRowNum()); + assertEquals("6", row.getCell(0).getStringCellValue()); + assertEquals("10", row.getCell(4).getStringCellValue()); + row = reader.iterator().next(); + assertEquals(6, row.getRowNum()); + assertEquals("11", row.getCell(0).getStringCellValue()); + assertEquals("15", row.getCell(4).getStringCellValue()); + + assertFalse(reader.iterator().hasNext()); + } + } + + @Test + public void testShouldReturnNullForMissingCellPolicy_RETURN_BLANK_AS_NULL() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/blank_cells.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + Row row = wb.getSheetAt(0).iterator().next(); + assertNotNull(row.getCell(0, RETURN_BLANK_AS_NULL)); //Remain unchanged + assertNull(row.getCell(1, RETURN_BLANK_AS_NULL)); + } + } + + @Test + public void testShouldReturnBlankForMissingCellPolicy_CREATE_NULL_AS_BLANK() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/null_cell.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + Row row = wb.getSheetAt(0).iterator().next(); + assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getStringCellValue()); //Remain unchanged + assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getRichStringCellValue().getString()); //Remain unchanged + assertThat(row.getCell(1), is(nullValue())); + assertNotNull(row.getCell(1, CREATE_NULL_AS_BLANK)); + } + } + + // 이 시트의 마지막 셀은 숫자여야 하지만 "f"가 남아 있습니다 + // 마지막 셀에 부착되어 있는 태그로 인해 FORULLA가 됩니다. + @Test + public void testForumulaOutsideCellIgnored() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/formula_outside_cell.xlsx")); + Workbook wb = StreamingReader.builder().open(is); + ) { + Iterator rows = wb.getSheetAt(0).iterator(); + Cell cell = null; + while(rows.hasNext()) { + Iterator cells = rows.next().iterator(); + while(cells.hasNext()) { + cell = cells.next(); + } + } + assertNotNull(cell); + assertThat(cell.getCellType(), is(CellType.NUMERIC)); + } + } + + @Test + public void testFormulaWithDifferentTypes() throws Exception { + try( + InputStream is = new FileInputStream(new File("src/test/resources/formula_test.xlsx")); + Workbook wb = StreamingReader.builder().open(is) + ) { + Sheet sheet = wb.getSheetAt(0); + Iterator rowIterator = sheet.rowIterator(); + + Row next = rowIterator.next(); + Cell cell = next.getCell(0); + + assertThat(cell.getCellType(), is(CellType.STRING)); + + next = rowIterator.next(); + cell = next.getCell(0); + + assertThat(cell.getCellType(), is(CellType.FORMULA)); + assertThat(cell.getCachedFormulaResultType(), is(CellType.STRING)); + + next = rowIterator.next(); + cell = next.getCell(0); + + assertThat(cell.getCellType(), is(CellType.FORMULA)); + assertThat(cell.getCachedFormulaResultType(), is(CellType.BOOLEAN)); + + next = rowIterator.next(); + cell = next.getCell(0); + + assertThat(cell.getCellType(), is(CellType.FORMULA)); + assertThat(cell.getCachedFormulaResultType(), is(CellType.NUMERIC)); + } + } +} diff --git a/mens-core/src/test/resources/blank_cells.xlsx b/mens-core/src/test/resources/blank_cells.xlsx new file mode 100644 index 0000000..3825255 Binary files /dev/null and b/mens-core/src/test/resources/blank_cells.xlsx differ diff --git a/mens-core/src/test/resources/data_types.xlsx b/mens-core/src/test/resources/data_types.xlsx new file mode 100644 index 0000000..0370507 Binary files /dev/null and b/mens-core/src/test/resources/data_types.xlsx differ diff --git a/mens-core/src/test/resources/encrypted.xlsx b/mens-core/src/test/resources/encrypted.xlsx new file mode 100644 index 0000000..1d2a3c8 Binary files /dev/null and b/mens-core/src/test/resources/encrypted.xlsx differ diff --git a/mens-core/src/test/resources/formula_outside_cell.xlsx b/mens-core/src/test/resources/formula_outside_cell.xlsx new file mode 100644 index 0000000..100b838 Binary files /dev/null and b/mens-core/src/test/resources/formula_outside_cell.xlsx differ diff --git a/mens-core/src/test/resources/formula_test.xlsx b/mens-core/src/test/resources/formula_test.xlsx new file mode 100644 index 0000000..5b892b8 Binary files /dev/null and b/mens-core/src/test/resources/formula_test.xlsx differ diff --git a/mens-core/src/test/resources/gaps.xlsx b/mens-core/src/test/resources/gaps.xlsx new file mode 100644 index 0000000..175d96c Binary files /dev/null and b/mens-core/src/test/resources/gaps.xlsx differ diff --git a/mens-core/src/test/resources/leadingZeroes.xlsx b/mens-core/src/test/resources/leadingZeroes.xlsx new file mode 100644 index 0000000..7a1e953 Binary files /dev/null and b/mens-core/src/test/resources/leadingZeroes.xlsx differ diff --git a/mens-core/src/test/resources/missing-r-attrs.xlsx b/mens-core/src/test/resources/missing-r-attrs.xlsx new file mode 100644 index 0000000..6abaddf Binary files /dev/null and b/mens-core/src/test/resources/missing-r-attrs.xlsx differ diff --git a/mens-core/src/test/resources/null_cell.xlsx b/mens-core/src/test/resources/null_cell.xlsx new file mode 100644 index 0000000..6340628 Binary files /dev/null and b/mens-core/src/test/resources/null_cell.xlsx differ diff --git a/mens-core/src/test/resources/special_types.xlsx b/mens-core/src/test/resources/special_types.xlsx new file mode 100644 index 0000000..8355a71 Binary files /dev/null and b/mens-core/src/test/resources/special_types.xlsx differ diff --git a/pom.xml b/pom.xml index fd56ac3..5ae061d 100644 --- a/pom.xml +++ b/pom.xml @@ -29,6 +29,8 @@ 2.6.0 7.3.2 9.0.10 + + 5.0.0