feat: excel parser

dev
gitea-관리자 1 year ago
parent e5799b86ea
commit 4cbd0ea1cc

@ -374,6 +374,28 @@
<scope>test</scope>
</dependency>
<!-- excel poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${apache.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${apache.poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${apache.poi.version}</version>
</dependency>
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.4.01</version>
</dependency>
</dependencies>
<build>

@ -0,0 +1,402 @@
package kr.xit.core.support.xlsx;
import static kr.xit.core.support.xlsx.XmlUtils.document;
import static kr.xit.core.support.xlsx.XmlUtils.searchForNodeList;
import static kr.xit.core.support.xlsx.impl.TempFileUtil.writeInputStreamToFile;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.security.GeneralSecurityException;
import java.util.Iterator;
import java.util.Objects;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import kr.xit.core.support.xlsx.exceptions.CloseException;
import kr.xit.core.support.xlsx.exceptions.MissingSheetException;
import kr.xit.core.support.xlsx.exceptions.OpenException;
import kr.xit.core.support.xlsx.exceptions.ReadException;
import kr.xit.core.support.xlsx.impl.StreamingSheetReader;
import kr.xit.core.support.xlsx.impl.StreamingWorkbook;
import kr.xit.core.support.xlsx.impl.StreamingWorkbookReader;
import kr.xit.core.support.xlsx.sst.BufferedStringsTable;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.util.StaxHelper;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Streaming Excel workbook implementation. Most advanced features of POI are not supported. Use
* this only if your application can handle iterating through an entire workbook, row by row.
*/
public class StreamingReader implements Iterable<Row>, AutoCloseable {
private static final Logger log = LoggerFactory.getLogger(StreamingReader.class);
private File tmp;
private final StreamingWorkbookReader workbook;
public StreamingReader(StreamingWorkbookReader workbook) {
this.workbook = workbook;
}
/**
* Returns a new streaming iterator to loop through rows. This iterator is not guaranteed to
* have all rows in memory, and any particular iteration may trigger a load from disk to read in
* new data.
*
* @return the streaming iterator
* @deprecated StreamingReader is equivalent to the POI Workbook object rather than the Sheet
* object. This method will be removed in a future release.
*/
@Override
public Iterator<Row> iterator() {
return workbook.first().iterator();
}
/**
* Closes the streaming resource, attempting to clean up any temporary files created.
*
* @throws CloseException if there is an issue closing the stream
*/
@Override
public void close() throws IOException {
try {
workbook.close();
} finally {
if (tmp != null) {
if (log.isDebugEnabled()) {
log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]");
}
tmp.delete();
}
}
}
public static Builder builder() {
return new Builder();
}
public static class Builder {
private int rowCacheSize = 10;
private int bufferSize = 1024;
private int sheetIndex = 0;
private int sstCacheSizeBytes = -1;
private String sheetName;
private String password;
public int getRowCacheSize() {
return rowCacheSize;
}
public int getBufferSize() {
return bufferSize;
}
/**
* @return The sheet index
* @deprecated This method will be removed in a future release.
*/
public int getSheetIndex() {
return sheetIndex;
}
/**
* @return The sheet name
* @deprecated This method will be removed in a future release.
*/
public String getSheetName() {
return sheetName;
}
/**
* @return The password to use to unlock this workbook
*/
public String getPassword() {
return password;
}
/**
* @return The size of the shared string table cache. If less than 0, no cache will be used
* and the entire table will be loaded into memory.
*/
public int getSstCacheSizeBytes() {
return sstCacheSizeBytes;
}
/**
* The number of rows to keep in memory at any given point.
* <p>
* Defaults to 10
* </p>
*
* @param rowCacheSize number of rows
* @return reference to current {@code Builder}
*/
public Builder rowCacheSize(int rowCacheSize) {
this.rowCacheSize = rowCacheSize;
return this;
}
/**
* The number of bytes to read into memory from the input resource.
* <p>
* Defaults to 1024
* </p>
*
* @param bufferSize buffer size in bytes
* @return reference to current {@code Builder}
*/
public Builder bufferSize(int bufferSize) {
this.bufferSize = bufferSize;
return this;
}
/**
* Which sheet to open. There can only be one sheet open for a single instance of
* {@code StreamingReader}. If more sheets need to be read, a new instance must be created.
* <p>
* Defaults to 0
* </p>
*
* @param sheetIndex index of sheet
* @return reference to current {@code Builder}
* @deprecated This method will be removed in a future release. Use
* {@link StreamingWorkbook#getSheetAt(int)} instead.
*/
public Builder sheetIndex(int sheetIndex) {
this.sheetIndex = sheetIndex;
return this;
}
/**
* Which sheet to open. There can only be one sheet open for a single instance of
* {@code StreamingReader}. If more sheets need to be read, a new instance must be created.
*
* @param sheetName name of sheet
* @return reference to current {@code Builder}
* @deprecated This method will be removed in a future release. Use
* {@link StreamingWorkbook#getSheet(String)} instead.
*/
public Builder sheetName(String sheetName) {
this.sheetName = sheetName;
return this;
}
/**
* For password protected files specify password to open file. If the password is incorrect
* a {@code ReadException} is thrown on {@code read}.
* <p>NULL indicates that no password should be used, this is the
* default value.</p>
*
* @param password to use when opening file
* @return reference to current {@code Builder}
*/
public Builder password(String password) {
this.password = password;
return this;
}
/**
* <h1>!!! This option is experimental !!!</h1>
* <p>
* Set the size of the Shared Strings Table cache. This option exists to accommodate
* extremely large workbooks with millions of unique strings. Normally the SST is entirely
* loaded into memory, but with large workbooks with high cardinality (i.e., very few
* duplicate values) the SST may not fit entirely into memory.
* <p>
* By default, the entire SST *will* be loaded into memory. Setting a value greater than 0
* for this option will only cache up to this many entries in memory.
* <strong>However</strong>, enabling this option at all will have some noticeable
* performance degredation as you are trading memory for disk space.
*
* @param sstCacheSizeBytes size of SST cache
* @return reference to current {@code Builder}
*/
public Builder sstCacheSizeBytes(int sstCacheSizeBytes) {
this.sstCacheSizeBytes = sstCacheSizeBytes;
return this;
}
/**
* Reads a given {@code InputStream} and returns a new instance of {@code Workbook}. Due to
* Apache POI limitations, a temporary file must be written in order to create a streaming
* iterator. This process will use the same buffer size as specified in
* {@link #bufferSize(int)}.
*
* @param is input stream to read in
* @return A {@link Workbook} that can be read from
* @throws ReadException if there is an issue reading the stream
*/
public Workbook open(InputStream is) {
StreamingWorkbookReader workbook = new StreamingWorkbookReader(this);
workbook.init(is);
return new StreamingWorkbook(workbook);
}
/**
* Reads a given {@code File} and returns a new instance of {@code Workbook}.
*
* @param file file to read in
* @return built streaming reader instance
* @throws OpenException if there is an issue opening the file
* @throws ReadException if there is an issue reading the file
*/
public Workbook open(File file) {
StreamingWorkbookReader workbook = new StreamingWorkbookReader(this);
workbook.init(file);
return new StreamingWorkbook(workbook);
}
/**
* Reads a given {@code InputStream} and returns a new instance of {@code StreamingReader}.
* Due to Apache POI limitations, a temporary file must be written in order to create a
* streaming iterator. This process will use the same buffer size as specified in
* {@link #bufferSize(int)}.
*
* @param is input stream to read in
* @return built streaming reader instance
* @throws ReadException if there is an issue reading the stream
* @deprecated This method will be removed in a future release. Use
* {@link Builder#open(InputStream)} instead
*/
public StreamingReader read(InputStream is) {
File f = null;
try {
f = writeInputStreamToFile(is, bufferSize);
log.debug("Created temp file [" + f.getAbsolutePath() + "]");
StreamingReader r = read(f);
r.tmp = f;
return r;
} catch (IOException e) {
throw new ReadException("Unable to read input stream", e);
} catch (RuntimeException e) {
if (f != null) {
f.delete();
}
throw e;
}
}
/**
* {@code File} {@code StreamingReader} return.
*
* @param f file to read in
* @return built streaming reader instance
* @throws OpenException if there is an issue opening the file
* @throws ReadException if there is an issue reading the file
* @deprecated This method will be removed in a future release. Use
* {@link Builder#open(File)} instead
*/
public StreamingReader read(File f) {
try {
OPCPackage pkg;
if (password != null) {
// Based on: https://poi.apache.org/encryption.html
POIFSFileSystem poifs = new POIFSFileSystem(f);
EncryptionInfo info = new EncryptionInfo(poifs);
Decryptor d = Decryptor.getInstance(info);
d.verifyPassword(password);
pkg = OPCPackage.open(d.getDataStream(poifs));
} else {
pkg = OPCPackage.open(f);
}
boolean use1904Dates = false;
XSSFReader reader = new XSSFReader(pkg);
SharedStringsTable sst;
File sstCache = null;
if (sstCacheSizeBytes > 0) {
sstCache = Files.createTempFile("", "").toFile();
log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]");
sst = BufferedStringsTable.getSharedStringsTable(sstCache, sstCacheSizeBytes,
pkg);
} else {
sst = reader.getSharedStringsTable();
}
StylesTable styles = reader.getStylesTable();
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()),
"/ss:workbook/ss:workbookPr");
if (workbookPr.getLength() == 1) {
final Node date1904 = workbookPr.item(0).getAttributes()
.getNamedItem("date1904");
if (date1904 != null) {
use1904Dates = ("1".equals(date1904.getTextContent()));
}
}
InputStream sheet = findSheet(reader);
if (sheet == null) {
throw new MissingSheetException(
"Unable to find sheet at index [" + sheetIndex + "]");
}
XMLEventReader parser = StaxHelper.newXMLInputFactory().createXMLEventReader(sheet);
return new StreamingReader(new StreamingWorkbookReader(sst, sstCache, pkg,
new StreamingSheetReader(sst, styles, parser, use1904Dates, rowCacheSize),
this));
} catch (IOException e) {
throw new OpenException("Failed to open file", e);
} catch (OpenXML4JException | XMLStreamException e) {
throw new ReadException("Unable to read workbook", e);
} catch (GeneralSecurityException e) {
throw new ReadException("Unable to read workbook - Decryption failed", e);
}
}
/**
* @deprecated This will be removed when the transition to the 1.x API is complete
*/
private InputStream findSheet(XSSFReader reader)
throws IOException, InvalidFormatException {
int index = sheetIndex;
if (sheetName != null) {
index = -1;
//This file is separate from the worksheet data, and should be fairly small
NodeList nl = searchForNodeList(document(reader.getWorkbookData()),
"/ss:workbook/ss:sheets/ss:sheet");
for (int i = 0; i < nl.getLength(); i++) {
if (Objects.equals(
nl.item(i).getAttributes().getNamedItem("name").getTextContent(),
sheetName)) {
index = i;
}
}
if (index < 0) {
return null;
}
}
Iterator<InputStream> iter = reader.getSheetsData();
InputStream sheet = null;
int i = 0;
while (iter.hasNext()) {
InputStream is = iter.next();
if (i++ == index) {
sheet = is;
log.debug("Found sheet at index [" + sheetIndex + "]");
break;
}
}
return sheet;
}
}
}

@ -0,0 +1,94 @@
package kr.xit.core.support.xlsx;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import kr.xit.core.support.xlsx.exceptions.ParseException;
import org.apache.poi.ooxml.util.DocumentHelper;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XmlUtils {
public static Document document(InputStream is) {
try {
return DocumentHelper.readDocument(is);
} catch (SAXException | IOException e) {
throw new ParseException(e);
}
}
public static NodeList searchForNodeList(Document document, String xpath) {
try {
XPath xp = XPathFactory.newInstance().newXPath();
NamespaceContextImpl nc = new NamespaceContextImpl();
nc.addNamespace("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main");
xp.setNamespaceContext(nc);
return (NodeList) xp.compile(xpath)
.evaluate(document, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new ParseException(e);
}
}
private static class NamespaceContextImpl implements NamespaceContext {
private Map<String, String> urisByPrefix = new HashMap<>();
private Map<String, Set> prefixesByURI = new HashMap<>();
public NamespaceContextImpl() {
addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
addNamespace(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
}
public void addNamespace(String prefix, String namespaceURI) {
urisByPrefix.put(prefix, namespaceURI);
if (prefixesByURI.containsKey(namespaceURI)) {
(prefixesByURI.get(namespaceURI)).add(prefix);
} else {
Set<String> set = new HashSet<>();
set.add(prefix);
prefixesByURI.put(namespaceURI, set);
}
}
public String getNamespaceURI(String prefix) {
if (prefix == null) {
throw new IllegalArgumentException("prefix cannot be null");
}
if (urisByPrefix.containsKey(prefix)) {
return (String) urisByPrefix.get(prefix);
} else {
return XMLConstants.NULL_NS_URI;
}
}
public String getPrefix(String namespaceURI) {
return (String) getPrefixes(namespaceURI).next();
}
public Iterator getPrefixes(String namespaceURI) {
if (namespaceURI == null) {
throw new IllegalArgumentException("namespaceURI cannot be null");
}
if (prefixesByURI.containsKey(namespaceURI)) {
return ((Set) prefixesByURI.get(namespaceURI)).iterator();
} else {
return Collections.EMPTY_SET.iterator();
}
}
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class CloseException extends RuntimeException {
public CloseException() {
super();
}
public CloseException(String msg) {
super(msg);
}
public CloseException(Exception e) {
super(e);
}
public CloseException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class MissingSheetException extends RuntimeException {
public MissingSheetException() {
super();
}
public MissingSheetException(String msg) {
super(msg);
}
public MissingSheetException(Exception e) {
super(e);
}
public MissingSheetException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class NotSupportedException extends RuntimeException {
public NotSupportedException() {
super();
}
public NotSupportedException(String msg) {
super(msg);
}
public NotSupportedException(Exception e) {
super(e);
}
public NotSupportedException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class OpenException extends RuntimeException {
public OpenException() {
super();
}
public OpenException(String msg) {
super(msg);
}
public OpenException(Exception e) {
super(e);
}
public OpenException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class ParseException extends RuntimeException {
public ParseException() {
super();
}
public ParseException(String msg) {
super(msg);
}
public ParseException(Exception e) {
super(e);
}
public ParseException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,20 @@
package kr.xit.core.support.xlsx.exceptions;
public class ReadException extends RuntimeException {
public ReadException() {
super();
}
public ReadException(String msg) {
super(msg);
}
public ReadException(Exception e) {
super(e);
}
public ReadException(String msg, Exception e) {
super(msg, e);
}
}

@ -0,0 +1,536 @@
package kr.xit.core.support.xlsx.impl;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.Calendar;
import java.util.Date;
import kr.xit.core.support.xlsx.exceptions.NotSupportedException;
import org.apache.poi.ss.formula.FormulaParseException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Hyperlink;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
public class StreamingCell implements Cell {
private static final Supplier NULL_SUPPLIER = () -> null;
private static final String FALSE_AS_STRING = "0";
private static final String TRUE_AS_STRING = "1";
private final Sheet sheet;
private int columnIndex;
private int rowIndex;
private final boolean use1904Dates;
private Supplier contentsSupplier = NULL_SUPPLIER;
private Object rawContents;
private String formula;
private String numericFormat;
private Short numericFormatIndex;
private String type;
private CellStyle cellStyle;
private Row row;
private boolean formulaType;
public StreamingCell(Sheet sheet, int columnIndex, int rowIndex, boolean use1904Dates) {
this.sheet = sheet;
this.columnIndex = columnIndex;
this.rowIndex = rowIndex;
this.use1904Dates = use1904Dates;
}
public void setContentSupplier(Supplier contentsSupplier) {
this.contentsSupplier = contentsSupplier;
}
public void setRawContents(Object rawContents) {
this.rawContents = rawContents;
}
public String getNumericFormat() {
return numericFormat;
}
public void setNumericFormat(String numericFormat) {
this.numericFormat = numericFormat;
}
public Short getNumericFormatIndex() {
return numericFormatIndex;
}
public void setNumericFormatIndex(Short numericFormatIndex) {
this.numericFormatIndex = numericFormatIndex;
}
public void setFormula(String formula) {
this.formula = formula;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public boolean isFormulaType() {
return formulaType;
}
public void setFormulaType(boolean formulaType) {
this.formulaType = formulaType;
}
@Override
public void setCellStyle(CellStyle cellStyle) {
this.cellStyle = cellStyle;
}
/* Supported */
/**
* Returns column index of this cell
*
* @return zero-based column index of a column in a sheet.
*/
@Override
public int getColumnIndex() {
return columnIndex;
}
/**
* Returns row index of a row in the sheet that contains this cell
*
* @return zero-based row index of a row in the sheet that contains this cell
*/
@Override
public int getRowIndex() {
return rowIndex;
}
/**
* Returns the Row this cell belongs to. Note that keeping references to cell rows around after
* the iterator window has passed <b>will</b> preserve them.
*
* @return the Row that owns this cell
*/
@Override
public Row getRow() {
return row;
}
/**
* Sets the Row this cell belongs to. Note that keeping references to cell rows around after the
* iterator window has passed <b>will</b> preserve them.
* <p>
* The row is not automatically set.
*
* @param row The row
*/
public void setRow(Row row) {
this.row = row;
}
/**
* Return the cell type.
*
* @return the cell type
*/
@Override
public CellType getCellType() {
if (formulaType) {
return CellType.FORMULA;
} else if (contentsSupplier.getContent() == null || type == null) {
return CellType.BLANK;
} else if ("n".equals(type)) {
return CellType.NUMERIC;
} else if ("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) {
return CellType.STRING;
} else if ("str".equals(type)) {
return CellType.FORMULA;
} else if ("b".equals(type)) {
return CellType.BOOLEAN;
} else if ("e".equals(type)) {
return CellType.ERROR;
} else {
throw new UnsupportedOperationException("Unsupported cell type '" + type + "'");
}
}
/**
* Get the value of the cell as a string. For blank cells we return an empty string.
*
* @return the value of the cell as a string
*/
@Override
public String getStringCellValue() {
Object c = contentsSupplier.getContent();
return c == null ? "" : c.toString();
}
/**
* Get the value of the cell as a number. For strings we throw an exception. For blank cells we
* return a 0.
*
* @return the value of the cell as a number
* @throws NumberFormatException if the cell value isn't a parsable <code>double</code>.
*/
@Override
public double getNumericCellValue() {
return rawContents == null ? 0.0 : Double.parseDouble((String) rawContents);
}
/**
* Get the value of the cell as a date. For strings we throw an exception. For blank cells we
* return a null.
*
* @return the value of the cell as a date
* @throws IllegalStateException if the cell type returned by {@link #getCellType()} is
* CELL_TYPE_STRING
* @throws NumberFormatException if the cell value isn't a parsable <code>double</code>.
*/
@Override
public Date getDateCellValue() {
if (getCellType() == CellType.STRING) {
throw new IllegalStateException("Cell type cannot be CELL_TYPE_STRING");
}
return rawContents == null ? null
: DateUtil.getJavaDate(getNumericCellValue(), use1904Dates);
}
@Override
public LocalDateTime getLocalDateTimeCellValue() {
return LocalDateTime.ofInstant(Instant.ofEpochMilli(getDateCellValue().getTime()),
ZoneOffset.systemDefault());
}
/**
* Get the value of the cell as a boolean. For strings we throw an exception. For blank cells we
* return a false.
*
* @return the value of the cell as a date
*/
@Override
public boolean getBooleanCellValue() {
CellType cellType = getCellType();
switch (cellType) {
case BLANK:
return false;
case BOOLEAN:
return rawContents != null && TRUE_AS_STRING.equals(rawContents);
case FORMULA:
throw new NotSupportedException();
default:
throw typeMismatch(CellType.BOOLEAN, cellType, false);
}
}
/**
* Get the value of the cell as a XSSFRichTextString
* <p>
* For numeric cells we throw an exception. For blank cells we return an empty string. For
* formula cells we return the pre-calculated value if a string, otherwise an exception
* </p>
*
* @return the value of the cell as a XSSFRichTextString
*/
@Override
public XSSFRichTextString getRichStringCellValue() {
CellType cellType = getCellType();
XSSFRichTextString rt;
switch (cellType) {
case BLANK:
rt = new XSSFRichTextString("");
break;
case STRING:
rt = new XSSFRichTextString(getStringCellValue());
break;
default:
throw new NotSupportedException();
}
return rt;
}
@Override
public Sheet getSheet() {
return sheet;
}
private static RuntimeException typeMismatch(CellType expectedType, CellType actualType,
boolean isFormulaCell) {
String msg = "Cannot get a "
+ getCellTypeName(expectedType) + " value from a "
+ getCellTypeName(actualType) + " " + (isFormulaCell ? "formula " : "") + "cell";
return new IllegalStateException(msg);
}
/**
* Used to help format error messages
*/
private static String getCellTypeName(CellType cellType) {
switch (cellType) {
case BLANK:
return "blank";
case STRING:
return "text";
case BOOLEAN:
return "boolean";
case ERROR:
return "error";
case NUMERIC:
return "numeric";
case FORMULA:
return "formula";
}
return "#unknown cell type (" + cellType + ")#";
}
/**
* @return the style of the cell
*/
@Override
public CellStyle getCellStyle() {
return this.cellStyle;
}
/**
* Return a formula for the cell, for example, <code>SUM(C4:E4)</code>
*
* @return a formula for the cell
* @throws IllegalStateException if the cell type returned by {@link #getCellType()} is not
* CELL_TYPE_FORMULA
*/
@Override
public String getCellFormula() {
if (!formulaType) {
throw new IllegalStateException("This cell does not have a formula");
}
return formula;
}
/**
* Only valid for formula cells
*
* @return one of ({@link CellType#NUMERIC}, {@link CellType#STRING}, {@link CellType#BOOLEAN},
* {@link CellType#ERROR}) depending on the cached value of the formula
*/
@Override
public CellType getCachedFormulaResultType() {
if (formulaType) {
if (contentsSupplier.getContent() == null || type == null) {
return CellType.BLANK;
} else if ("n".equals(type)) {
return CellType.NUMERIC;
} else if ("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) {
return CellType.STRING;
} else if ("b".equals(type)) {
return CellType.BOOLEAN;
} else if ("e".equals(type)) {
return CellType.ERROR;
} else {
throw new UnsupportedOperationException("Unsupported cell type '" + type + "'");
}
} else {
throw new IllegalStateException("Only formula cells have cached results");
}
}
/* Not supported */
/**
* Not supported
*/
@Override
public void setCellType(CellType cellType) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(double value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(Date value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(LocalDateTime value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(Calendar value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(RichTextString value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(String value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellFormula(String formula) throws FormulaParseException {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellValue(boolean value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellErrorValue(byte value) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public byte getErrorCellValue() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setAsActiveCell() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public CellAddress getAddress() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setCellComment(Comment comment) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public Comment getCellComment() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void removeCellComment() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public Hyperlink getHyperlink() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setHyperlink(Hyperlink link) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void removeHyperlink() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public CellRangeAddress getArrayFormulaRange() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public boolean isPartOfArrayFormulaGroup() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setBlank() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void removeFormula() throws IllegalStateException {
throw new NotSupportedException();
}
}

@ -0,0 +1,263 @@
package kr.xit.core.support.xlsx.impl;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import kr.xit.core.support.xlsx.exceptions.NotSupportedException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
public class StreamingRow implements Row {
private final Sheet sheet;
private int rowIndex;
private boolean isHidden;
private TreeMap<Integer, Cell> cellMap = new TreeMap<>();
public StreamingRow(Sheet sheet, int rowIndex, boolean isHidden) {
this.sheet = sheet;
this.rowIndex = rowIndex;
this.isHidden = isHidden;
}
@Override
public Sheet getSheet() {
return sheet;
}
public Map<Integer, Cell> getCellMap() {
return cellMap;
}
public void setCellMap(TreeMap<Integer, Cell> cellMap) {
this.cellMap = cellMap;
}
/* Supported */
/**
* Get row number this row represents
*
* @return the row number (0 based)
*/
@Override
public int getRowNum() {
return rowIndex;
}
/**
* @return Cell iterator of the physically defined cells for this row.
*/
@Override
public Iterator<Cell> cellIterator() {
return cellMap.values().iterator();
}
/**
* @return Cell iterator of the physically defined cells for this row.
*/
@Override
public Iterator<Cell> iterator() {
return cellMap.values().iterator();
}
/**
* Get the cell representing a given column (logical cell) 0-based. If you ask for a cell that
* is not defined, you get a null.
*
* @param cellnum 0 based column number
* @return Cell representing that column or null if undefined.
*/
@Override
public Cell getCell(int cellnum) {
return cellMap.get(cellnum);
}
/**
* Gets the index of the last cell contained in this row <b>PLUS ONE</b>.
*
* @return short representing the last logical cell in the row <b>PLUS ONE</b>, or -1 if the row
* does not contain any cells.
*/
@Override
public short getLastCellNum() {
return (short) (cellMap.size() == 0 ? -1
: cellMap.lastEntry().getValue().getColumnIndex() + 1);
}
/**
* Get whether or not to display this row with 0 height
*
* @return - zHeight height is zero or not.
*/
@Override
public boolean getZeroHeight() {
return isHidden;
}
/**
* Gets the number of defined cells (NOT number of cells in the actual row!). That is to say if
* only columns 0,4,5 have values then there would be 3.
*
* @return int representing the number of defined cells in the row.
*/
@Override
public int getPhysicalNumberOfCells() {
return cellMap.size();
}
/**
* {@inheritDoc}
*/
@Override
public short getFirstCellNum() {
if (cellMap.size() == 0) {
return -1;
}
return cellMap.firstKey().shortValue();
}
/**
* {@inheritDoc}
*/
@Override
public Cell getCell(int cellnum, MissingCellPolicy policy) {
StreamingCell cell = (StreamingCell) cellMap.get(cellnum);
if (policy == MissingCellPolicy.CREATE_NULL_AS_BLANK) {
if (cell == null) {
return new StreamingCell(sheet, cellnum, rowIndex, false);
}
} else if (policy == MissingCellPolicy.RETURN_BLANK_AS_NULL) {
if (cell == null || cell.getCellType() == CellType.BLANK) {
return null;
}
}
return cell;
}
/* Not supported */
/**
* Not supported
*/
@Override
public Cell createCell(int column) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public Cell createCell(int i, CellType cellType) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void removeCell(Cell cell) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setRowNum(int rowNum) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setHeight(short height) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setZeroHeight(boolean zHeight) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setHeightInPoints(float height) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public short getHeight() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public float getHeightInPoints() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public boolean isFormatted() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public CellStyle getRowStyle() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void setRowStyle(CellStyle style) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public int getOutlineLevel() {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void shiftCellsRight(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) {
throw new NotSupportedException();
}
/**
* Not supported
*/
@Override
public void shiftCellsLeft(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) {
throw new NotSupportedException();
}
}

@ -0,0 +1,434 @@
package kr.xit.core.support.xlsx.impl;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import kr.xit.core.support.xlsx.exceptions.CloseException;
import kr.xit.core.support.xlsx.exceptions.ParseException;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class StreamingSheetReader implements Iterable<Row> {
private static final Logger log = LoggerFactory.getLogger(StreamingSheetReader.class);
private final SharedStringsTable sst;
private final StylesTable stylesTable;
private final XMLEventReader parser;
private final DataFormatter dataFormatter = new DataFormatter();
private final Set<Integer> hiddenColumns = new HashSet<>();
private int lastRowNum;
private int currentRowNum;
private int firstColNum = 0;
private int currentColNum;
private int rowCacheSize;
private List<Row> rowCache = new ArrayList<>();
private Iterator<Row> rowCacheIterator;
private String lastContents;
private Sheet sheet;
private StreamingRow currentRow;
private StreamingCell currentCell;
private boolean use1904Dates;
public StreamingSheetReader(SharedStringsTable sst, StylesTable stylesTable,
XMLEventReader parser,
final boolean use1904Dates, int rowCacheSize) {
this.sst = sst;
this.stylesTable = stylesTable;
this.parser = parser;
this.use1904Dates = use1904Dates;
this.rowCacheSize = rowCacheSize;
}
void setSheet(StreamingSheet sheet) {
this.sheet = sheet;
}
/**
* Read through a number of rows equal to the rowCacheSize field or until there is no more data
* to read
*
* @return true if data was read
*/
private boolean getRow() {
try {
rowCache.clear();
while (rowCache.size() < rowCacheSize && parser.hasNext()) {
handleEvent(parser.nextEvent());
}
rowCacheIterator = rowCache.iterator();
return rowCacheIterator.hasNext();
} catch (XMLStreamException e) {
throw new ParseException("Error reading XML stream", e);
}
}
private String[] splitCellRef(String ref) {
int splitPos = -1;
// start at pos 1, since the first char is expected to always be a letter
for (int i = 1; i < ref.length(); i++) {
char c = ref.charAt(i);
if (c >= '0' && c <= '9') {
splitPos = i;
break;
}
}
return new String[]{
ref.substring(0, splitPos),
ref.substring(splitPos)
};
}
/**
* Handles a SAX event.
*
* @param event
*/
private void handleEvent(XMLEvent event) {
if (event.getEventType() == XMLStreamConstants.CHARACTERS) {
Characters c = event.asCharacters();
lastContents += c.getData();
} else if (event.getEventType() == XMLStreamConstants.START_ELEMENT
&& isSpreadsheetTag(event.asStartElement().getName())) {
StartElement startElement = event.asStartElement();
String tagLocalName = startElement.getName().getLocalPart();
if ("row".equals(tagLocalName)) {
Attribute rowNumAttr = startElement.getAttributeByName(new QName("r"));
int rowIndex = currentRowNum;
if (rowNumAttr != null) {
rowIndex = Integer.parseInt(rowNumAttr.getValue()) - 1;
currentRowNum = rowIndex;
}
Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden"));
boolean isHidden =
isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals(
isHiddenAttr.getValue()));
currentRow = new StreamingRow(sheet, rowIndex, isHidden);
currentColNum = firstColNum;
} else if ("col".equals(tagLocalName)) {
Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden"));
boolean isHidden =
isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals(
isHiddenAttr.getValue()));
if (isHidden) {
Attribute minAttr = startElement.getAttributeByName(new QName("min"));
Attribute maxAttr = startElement.getAttributeByName(new QName("max"));
int min = Integer.parseInt(minAttr.getValue()) - 1;
int max = Integer.parseInt(maxAttr.getValue()) - 1;
for (int columnIndex = min; columnIndex <= max; columnIndex++) {
hiddenColumns.add(columnIndex);
}
}
} else if ("c".equals(tagLocalName)) {
Attribute ref = startElement.getAttributeByName(new QName("r"));
if (ref != null) {
String[] coord = splitCellRef(ref.getValue());
currentColNum = CellReference.convertColStringToIndex(coord[0]);
currentCell = new StreamingCell(sheet, currentColNum,
Integer.parseInt(coord[1]) - 1, use1904Dates);
} else {
currentCell = new StreamingCell(sheet, currentColNum, currentRowNum,
use1904Dates);
}
setFormatString(startElement, currentCell);
Attribute type = startElement.getAttributeByName(new QName("t"));
if (type != null) {
currentCell.setType(type.getValue());
} else {
currentCell.setType("n");
}
Attribute style = startElement.getAttributeByName(new QName("s"));
if (style != null) {
String indexStr = style.getValue();
try {
int index = Integer.parseInt(indexStr);
currentCell.setCellStyle(stylesTable.getStyleAt(index));
} catch (NumberFormatException nfe) {
log.warn("Ignoring invalid style index {}", indexStr);
}
} else {
currentCell.setCellStyle(stylesTable.getStyleAt(0));
}
} else if ("dimension".equals(tagLocalName)) {
Attribute refAttr = startElement.getAttributeByName(new QName("ref"));
String ref = refAttr != null ? refAttr.getValue() : null;
if (ref != null) {
// ref is formatted as A1 or A1:F25. Take the last numbers of this string and use it as lastRowNum
for (int i = ref.length() - 1; i >= 0; i--) {
if (!Character.isDigit(ref.charAt(i))) {
try {
lastRowNum = Integer.parseInt(ref.substring(i + 1)) - 1;
} catch (NumberFormatException ignore) {
}
break;
}
}
for (int i = 0; i < ref.length(); i++) {
if (!Character.isAlphabetic(ref.charAt(i))) {
firstColNum = CellReference.convertColStringToIndex(
ref.substring(0, i));
break;
}
}
}
} else if ("f".equals(tagLocalName)) {
if (currentCell != null) {
currentCell.setFormulaType(true);
}
}
// Clear contents cache
lastContents = "";
} else if (event.getEventType() == XMLStreamConstants.END_ELEMENT
&& isSpreadsheetTag(event.asEndElement().getName())) {
EndElement endElement = event.asEndElement();
String tagLocalName = endElement.getName().getLocalPart();
if ("v".equals(tagLocalName) || "t".equals(tagLocalName)) {
currentCell.setRawContents(unformattedContents());
currentCell.setContentSupplier(formattedContents());
} else if ("row".equals(tagLocalName) && currentRow != null) {
rowCache.add(currentRow);
currentRowNum++;
} else if ("c".equals(tagLocalName)) {
currentRow.getCellMap().put(currentCell.getColumnIndex(), currentCell);
currentCell = null;
currentColNum++;
} else if ("f".equals(tagLocalName)) {
if (currentCell != null) {
currentCell.setFormula(lastContents);
}
}
}
}
/**
* Returns true if a tag is part of the main namespace for SpreadsheetML:
* <ul>
* <li>http://schemas.openxmlformats.org/spreadsheetml/2006/main
* <li>http://purl.oclc.org/ooxml/spreadsheetml/main
* </ul>
* As opposed to http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing, etc.
*
* @param name
* @return
*/
private boolean isSpreadsheetTag(QName name) {
return (name.getNamespaceURI() != null
&& name.getNamespaceURI().endsWith("/main"));
}
/**
* Get the hidden state for a given column
*
* @param columnIndex - the column to set (0-based)
* @return hidden - <code>false</code> if the column is visible
*/
boolean isColumnHidden(int columnIndex) {
if (rowCacheIterator == null) {
getRow();
}
return hiddenColumns.contains(columnIndex);
}
/**
* Gets the last row on the sheet
*
* @return
*/
int getLastRowNum() {
if (rowCacheIterator == null) {
getRow();
}
return lastRowNum;
}
/**
* Read the numeric format string out of the styles table for this cell. Stores the result in
* the Cell.
*
* @param startElement
* @param cell
*/
void setFormatString(StartElement startElement, StreamingCell cell) {
Attribute cellStyle = startElement.getAttributeByName(new QName("s"));
String cellStyleString = (cellStyle != null) ? cellStyle.getValue() : null;
XSSFCellStyle style = null;
if (cellStyleString != null) {
style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString));
} else if (stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
if (style != null) {
cell.setNumericFormatIndex(style.getDataFormat());
String formatString = style.getDataFormatString();
if (formatString != null) {
cell.setNumericFormat(formatString);
} else {
cell.setNumericFormat(
BuiltinFormats.getBuiltinFormat(cell.getNumericFormatIndex()));
}
} else {
cell.setNumericFormatIndex(null);
cell.setNumericFormat(null);
}
}
/**
* Tries to format the contents of the last contents appropriately based on the type of cell and
* the discovered numeric format.
*
* @return
*/
Supplier formattedContents() {
return getFormatterForType(currentCell.getType());
}
/**
* Tries to format the contents of the last contents appropriately based on the provided type
* and the discovered numeric format.
*
* @return
*/
private Supplier getFormatterForType(String type) {
switch (type) {
case "s": //string stored in shared table
if (!lastContents.isEmpty()) {
int idx = Integer.parseInt(lastContents);
return new StringSupplier(sst.getItemAt(idx).toString());
}
return new StringSupplier(lastContents);
case "inlineStr": //inline string (not in sst)
case "str":
return new StringSupplier(new XSSFRichTextString(lastContents).toString());
case "e": //error type
return new StringSupplier("ERROR: " + lastContents);
case "n": //numeric type
if (currentCell.getNumericFormat() != null && lastContents.length() > 0) {
// the formatRawCellContents operation incurs a significant overhead on large sheets,
// and we want to defer the execution of this method until the value is actually needed.
// it is not needed in all cases..
final String currentLastContents = lastContents;
final int currentNumericFormatIndex = currentCell.getNumericFormatIndex();
final String currentNumericFormat = currentCell.getNumericFormat();
return new Supplier() {
String cachedContent;
@Override
public Object getContent() {
if (cachedContent == null) {
cachedContent = dataFormatter.formatRawCellContents(
Double.parseDouble(currentLastContents),
currentNumericFormatIndex,
currentNumericFormat);
}
return cachedContent;
}
};
} else {
return new StringSupplier(lastContents);
}
default:
return new StringSupplier(lastContents);
}
}
/**
* Returns the contents of the cell, with no formatting applied
*
* @return
*/
String unformattedContents() {
switch (currentCell.getType()) {
case "s": //string stored in shared table
if (!lastContents.isEmpty()) {
int idx = Integer.parseInt(lastContents);
return sst.getItemAt(idx).toString();
}
return lastContents;
case "inlineStr": //inline string (not in sst)
return new XSSFRichTextString(lastContents).toString();
default:
return lastContents;
}
}
/**
* Returns a new streaming iterator to loop through rows. This iterator is not guaranteed to
* have all rows in memory, and any particular iteration may trigger a load from disk to read in
* new data.
*
* @return the streaming iterator
*/
@Override
public Iterator<Row> iterator() {
return new StreamingRowIterator();
}
public void close() {
try {
parser.close();
} catch (XMLStreamException e) {
throw new CloseException(e);
}
}
class StreamingRowIterator implements Iterator<Row> {
public StreamingRowIterator() {
if (rowCacheIterator == null) {
hasNext();
}
}
@Override
public boolean hasNext() {
return (rowCacheIterator != null && rowCacheIterator.hasNext()) || getRow();
}
@Override
public Row next() {
return rowCacheIterator.next();
}
@Override
public void remove() {
throw new RuntimeException("NotSupported");
}
}
}

@ -0,0 +1,518 @@
package kr.xit.core.support.xlsx.impl;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import kr.xit.core.support.xlsx.exceptions.MissingSheetException;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.formula.EvaluationWorkbook;
import org.apache.poi.ss.formula.udf.UDFFinder;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CreationHelper;
import org.apache.poi.ss.usermodel.DataFormat;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.Name;
import org.apache.poi.ss.usermodel.PictureData;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.SheetVisibility;
import org.apache.poi.ss.usermodel.Workbook;
public class StreamingWorkbook implements Workbook, AutoCloseable {
private final StreamingWorkbookReader reader;
public StreamingWorkbook(StreamingWorkbookReader reader) {
this.reader = reader;
}
int findSheetByName(String name) {
for (int i = 0; i < reader.getSheetProperties().size(); i++) {
if (reader.getSheetProperties().get(i).get("name").equals(name)) {
return i;
}
}
return -1;
}
/* Supported */
/**
* {@inheritDoc}
*/
@Override
public Iterator<Sheet> iterator() {
return reader.iterator();
}
/**
* {@inheritDoc}
*/
@Override
public Iterator<Sheet> sheetIterator() {
return iterator();
}
/**
* {@inheritDoc}
*/
@Override
public String getSheetName(int sheet) {
return reader.getSheetProperties().get(sheet).get("name");
}
/**
* {@inheritDoc}
*/
@Override
public int getSheetIndex(String name) {
return findSheetByName(name);
}
/**
* {@inheritDoc}
*/
@Override
public int getSheetIndex(Sheet sheet) {
if (sheet instanceof StreamingSheet) {
return findSheetByName(sheet.getSheetName());
} else {
throw new UnsupportedOperationException("Cannot use non-StreamingSheet sheets");
}
}
/**
* {@inheritDoc}
*/
@Override
public int getNumberOfSheets() {
return reader.getSheets().size();
}
/**
* {@inheritDoc}
*/
@Override
public Sheet getSheetAt(int index) {
return reader.getSheets().get(index);
}
/**
* {@inheritDoc}
*/
@Override
public Sheet getSheet(String name) {
int index = getSheetIndex(name);
if (index == -1) {
throw new MissingSheetException("Sheet '" + name + "' does not exist");
}
return reader.getSheets().get(index);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isSheetHidden(int sheetIx) {
return "hidden".equals(reader.getSheetProperties().get(sheetIx).get("state"));
}
/**
* {@inheritDoc}
*/
@Override
public boolean isSheetVeryHidden(int sheetIx) {
return "veryHidden".equals(reader.getSheetProperties().get(sheetIx).get("state"));
}
/**
* {@inheritDoc}
*/
@Override
public void close() throws IOException {
reader.close();
}
/* Not supported */
/**
* Not supported
*/
@Override
public int getActiveSheetIndex() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setActiveSheet(int sheetIndex) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int getFirstVisibleTab() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setFirstVisibleTab(int sheetIndex) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setSheetOrder(String sheetname, int pos) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setSelectedTab(int index) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setSheetName(int sheet, String name) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Sheet createSheet() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Sheet createSheet(String sheetname) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Sheet cloneSheet(int sheetNum) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void removeSheetAt(int index) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Font createFont() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Font findFont(boolean b, short i, short i1, String s, boolean b1, boolean b2, short i2,
byte b3) {
throw new UnsupportedOperationException();
}
@Override
public int getNumberOfFonts() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int getNumberOfFontsAsInt() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Font getFontAt(int i) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public CellStyle createCellStyle() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int getNumCellStyles() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public CellStyle getCellStyleAt(int i) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void write(OutputStream stream) throws IOException {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int getNumberOfNames() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Name getName(String name) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public List<? extends Name> getNames(String s) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public List<? extends Name> getAllNames() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public Name createName() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void removeName(Name name) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int linkExternalWorkbook(String name, Workbook workbook) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setPrintArea(int sheetIndex, String reference) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setPrintArea(int sheetIndex, int startColumn, int endColumn, int startRow,
int endRow) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public String getPrintArea(int sheetIndex) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void removePrintArea(int sheetIndex) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public MissingCellPolicy getMissingCellPolicy() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setMissingCellPolicy(MissingCellPolicy missingCellPolicy) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public DataFormat createDataFormat() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int addPicture(byte[] pictureData, int format) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public List<? extends PictureData> getAllPictures() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public CreationHelper getCreationHelper() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public boolean isHidden() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setHidden(boolean hiddenFlag) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setSheetHidden(int sheetIx, boolean hidden) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public SheetVisibility getSheetVisibility(int i) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setSheetVisibility(int i, SheetVisibility sheetVisibility) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void addToolPack(UDFFinder toopack) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public void setForceFormulaRecalculation(boolean value) {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public boolean getForceFormulaRecalculation() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public SpreadsheetVersion getSpreadsheetVersion() {
throw new UnsupportedOperationException();
}
/**
* Not supported
*/
@Override
public int addOlePackage(byte[] bytes, String s, String s1, String s2) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public EvaluationWorkbook createEvaluationWorkbook() {
return null;
}
}

@ -0,0 +1,245 @@
package kr.xit.core.support.xlsx.impl;
import static java.util.Arrays.asList;
import static kr.xit.core.support.xlsx.XmlUtils.document;
import static kr.xit.core.support.xlsx.XmlUtils.searchForNodeList;
import static kr.xit.core.support.xlsx.impl.TempFileUtil.writeInputStreamToFile;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Files;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import kr.xit.core.support.xlsx.StreamingReader.Builder;
import kr.xit.core.support.xlsx.exceptions.OpenException;
import kr.xit.core.support.xlsx.exceptions.ReadException;
import kr.xit.core.support.xlsx.sst.BufferedStringsTable;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.util.StaxHelper;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class StreamingWorkbookReader implements Iterable<Sheet>, AutoCloseable {
private static final Logger log = LoggerFactory.getLogger(StreamingWorkbookReader.class);
private final List<StreamingSheet> sheets;
private final List<Map<String, String>> sheetProperties = new ArrayList<>();
private final Builder builder;
private File tmp;
private File sstCache;
private OPCPackage pkg;
private SharedStringsTable sst;
private boolean use1904Dates = false;
/**
* This constructor exists only so the StreamingReader can instantiate a StreamingWorkbook using
* its own reader implementation. Do not use going forward.
*
* @param sst The SST data for this workbook
* @param sstCache The backing cache file for the SST data
* @param pkg The POI package that should be closed when this workbook is closed
* @param reader A single streaming reader instance
* @param builder The builder containing all options
*/
@Deprecated
public StreamingWorkbookReader(SharedStringsTable sst, File sstCache, OPCPackage pkg,
StreamingSheetReader reader, Builder builder) {
this.sst = sst;
this.sstCache = sstCache;
this.pkg = pkg;
this.sheets = asList(new StreamingSheet(null, reader));
this.builder = builder;
}
public StreamingWorkbookReader(Builder builder) {
this.sheets = new ArrayList<>();
this.builder = builder;
}
public StreamingSheetReader first() {
return sheets.get(0).getReader();
}
public void init(InputStream is) {
File f = null;
try {
f = writeInputStreamToFile(is, builder.getBufferSize());
log.debug("Created temp file [" + f.getAbsolutePath() + "]");
init(f);
tmp = f;
} catch (IOException e) {
throw new ReadException("Unable to read input stream", e);
} catch (RuntimeException e) {
if (f != null) {
f.delete();
}
throw e;
}
}
public void init(File f) {
try {
if (builder.getPassword() != null) {
// Based on: https://poi.apache.org/encryption.html
POIFSFileSystem poifs = new POIFSFileSystem(f);
EncryptionInfo info = new EncryptionInfo(poifs);
Decryptor d = Decryptor.getInstance(info);
d.verifyPassword(builder.getPassword());
pkg = OPCPackage.open(d.getDataStream(poifs));
} else {
pkg = OPCPackage.open(f);
}
XSSFReader reader = new XSSFReader(pkg);
if (builder.getSstCacheSizeBytes() > 0) {
sstCache = Files.createTempFile("", "").toFile();
log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]");
sst = BufferedStringsTable.getSharedStringsTable(sstCache,
builder.getSstCacheSizeBytes(), pkg);
} else {
sst = reader.getSharedStringsTable();
}
StylesTable styles = reader.getStylesTable();
NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()),
"/ss:workbook/ss:workbookPr");
if (workbookPr.getLength() == 1) {
final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904");
if (date1904 != null) {
use1904Dates = ("1".equals(date1904.getTextContent()));
}
}
loadSheets(reader, sst, styles, builder.getRowCacheSize());
} catch (IOException e) {
throw new OpenException("Failed to open file", e);
} catch (OpenXML4JException | XMLStreamException e) {
throw new ReadException("Unable to read workbook", e);
} catch (GeneralSecurityException e) {
throw new ReadException("Unable to read workbook - Decryption failed", e);
}
}
void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable,
int rowCacheSize)
throws IOException, InvalidFormatException, XMLStreamException {
lookupSheetNames(reader);
//Some workbooks have multiple references to the same sheet. Need to filter
//them out before creating the XMLEventReader by keeping track of their URIs.
//The sheets are listed in order, so we must keep track of insertion order.
SheetIterator iter = (SheetIterator) reader.getSheetsData();
Map<URI, InputStream> sheetStreams = new LinkedHashMap<>();
while (iter.hasNext()) {
InputStream is = iter.next();
sheetStreams.put(iter.getSheetPart().getPartName().getURI(), is);
}
//Iterate over the loaded streams
int i = 0;
for (URI uri : sheetStreams.keySet()) {
XMLEventReader parser = StaxHelper.newXMLInputFactory()
.createXMLEventReader(sheetStreams.get(uri));
sheets.add(new StreamingSheet(sheetProperties.get(i++).get("name"),
new StreamingSheetReader(sst, stylesTable, parser, use1904Dates, rowCacheSize)));
}
}
void lookupSheetNames(XSSFReader reader) throws IOException, InvalidFormatException {
sheetProperties.clear();
NodeList nl = searchForNodeList(document(reader.getWorkbookData()),
"/ss:workbook/ss:sheets/ss:sheet");
for (int i = 0; i < nl.getLength(); i++) {
Map<String, String> props = new HashMap<>();
props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent());
Node state = nl.item(i).getAttributes().getNamedItem("state");
props.put("state", state == null ? "visible" : state.getTextContent());
sheetProperties.add(props);
}
}
List<? extends Sheet> getSheets() {
return sheets;
}
public List<Map<String, String>> getSheetProperties() {
return sheetProperties;
}
@Override
public Iterator<Sheet> iterator() {
return new StreamingSheetIterator(sheets.iterator());
}
@Override
public void close() throws IOException {
try {
for (StreamingSheet sheet : sheets) {
sheet.getReader().close();
}
pkg.revert();
} finally {
if (tmp != null) {
if (log.isDebugEnabled()) {
log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]");
}
tmp.delete();
}
if (sst instanceof BufferedStringsTable) {
if (log.isDebugEnabled()) {
log.debug("Deleting sst cache file [" + this.sstCache.getAbsolutePath() + "]");
}
((BufferedStringsTable) sst).close();
sstCache.delete();
}
}
}
static class StreamingSheetIterator implements Iterator<Sheet> {
private final Iterator<StreamingSheet> iterator;
public StreamingSheetIterator(Iterator<StreamingSheet> iterator) {
this.iterator = iterator;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public Sheet next() {
return iterator.next();
}
@Override
public void remove() {
throw new RuntimeException("NotSupported");
}
}
}

@ -0,0 +1,15 @@
package kr.xit.core.support.xlsx.impl;
class StringSupplier implements Supplier {
private final String val;
StringSupplier(String val) {
this.val = val;
}
@Override
public Object getContent() {
return val;
}
}

@ -0,0 +1,6 @@
package kr.xit.core.support.xlsx.impl;
interface Supplier {
Object getContent();
}

@ -0,0 +1,24 @@
package kr.xit.core.support.xlsx.impl;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
public class TempFileUtil {
public static File writeInputStreamToFile(InputStream is, int bufferSize) throws IOException {
File f = Files.createTempFile("tmp-", ".xlsx").toFile();
try (FileOutputStream fos = new FileOutputStream(f)) {
int read;
byte[] bytes = new byte[bufferSize];
while ((read = is.read(bytes)) != -1) {
fos.write(bytes, 0, read);
}
return f;
} finally {
is.close();
}
}
}

@ -0,0 +1,126 @@
package kr.xit.core.support.xlsx.sst;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.util.StaxHelper;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
public class BufferedStringsTable extends SharedStringsTable implements AutoCloseable {
private final FileBackedList list;
public static BufferedStringsTable getSharedStringsTable(File tmp, int cacheSizeBytes,
OPCPackage pkg)
throws IOException {
List<PackagePart> parts = pkg.getPartsByContentType(
XSSFRelation.SHARED_STRINGS.getContentType());
return parts.size() == 0 ? null
: new BufferedStringsTable(parts.get(0), tmp, cacheSizeBytes);
}
private BufferedStringsTable(PackagePart part, File file, int cacheSizeBytes)
throws IOException {
this.list = new FileBackedList(file, cacheSizeBytes);
readFrom(part.getInputStream());
}
@Override
public void readFrom(InputStream is) throws IOException {
try {
XMLEventReader xmlEventReader = StaxHelper.newXMLInputFactory()
.createXMLEventReader(is);
while (xmlEventReader.hasNext()) {
XMLEvent xmlEvent = xmlEventReader.nextEvent();
if (xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart()
.equals("si")) {
list.add(parseCT_Rst(xmlEventReader));
}
}
} catch (XMLStreamException e) {
throw new IOException(e);
}
}
/**
* Parses a {@code <si>} String Item. Returns just the text and drops the formatting. See <a
* href="https://msdn.microsoft.com/en-us/library/documentformat.openxml.spreadsheet.sharedstringitem.aspx">xmlschema
* type {@code CT_Rst}</a>.
*/
private String parseCT_Rst(XMLEventReader xmlEventReader) throws XMLStreamException {
// Precondition: pointing to <si>; Post condition: pointing to </si>
StringBuilder buf = new StringBuilder();
XMLEvent xmlEvent;
while ((xmlEvent = xmlEventReader.nextTag()).isStartElement()) {
switch (xmlEvent.asStartElement().getName().getLocalPart()) {
case "t": // Text
buf.append(xmlEventReader.getElementText());
break;
case "r": // Rich Text Run
parseCT_RElt(xmlEventReader, buf);
break;
case "rPh": // Phonetic Run
case "phoneticPr": // Phonetic Properties
skipElement(xmlEventReader);
break;
default:
throw new IllegalArgumentException(
xmlEvent.asStartElement().getName().getLocalPart());
}
}
return buf.toString();
}
/**
* Parses a {@code <r>} Rich Text Run. Returns just the text and drops the formatting. See <a
* href="https://msdn.microsoft.com/en-us/library/documentformat.openxml.spreadsheet.run.aspx">xmlschema
* type {@code CT_RElt}</a>.
*/
private void parseCT_RElt(XMLEventReader xmlEventReader, StringBuilder buf)
throws XMLStreamException {
// Precondition: pointing to <r>; Post condition: pointing to </r>
XMLEvent xmlEvent;
while ((xmlEvent = xmlEventReader.nextTag()).isStartElement()) {
switch (xmlEvent.asStartElement().getName().getLocalPart()) {
case "t": // Text
buf.append(xmlEventReader.getElementText());
break;
case "rPr": // Run Properties
skipElement(xmlEventReader);
break;
default:
throw new IllegalArgumentException(
xmlEvent.asStartElement().getName().getLocalPart());
}
}
}
private void skipElement(XMLEventReader xmlEventReader) throws XMLStreamException {
// Precondition: pointing to start element; Post condition: pointing to end element
while (xmlEventReader.nextTag().isStartElement()) {
skipElement(xmlEventReader); // recursively skip over child
}
}
@Override
public RichTextString getItemAt(int idx) {
return new XSSFRichTextString(list.getAt(idx));
}
@Override
public void close() throws IOException {
super.close();
list.close();
}
}

@ -0,0 +1,108 @@
package kr.xit.core.support.xlsx.sst;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
/**
* File-backed list-like class. Allows addition of arbitrary numbers of array entries (serialized to
* JSON) in a binary packed file. Reading of entries is done with an NIO channel that seeks to the
* entry in the file.
* <p>
* File entry format:
* <ul>
* <li>4 bytes: length of entry</li>
* <li><i>length</i> bytes: JSON string containing the entry data</li>
* </ul>
* <p>
* Pointers to the offset of each entry are kept in a {@code List<Long>}.
* The values loaded from the the file are cached up to a maximum of
* {@code cacheSize}. Items are evicted from the cache with an LRU algorithm.
*/
public class FileBackedList implements AutoCloseable {
private final List<Long> pointers = new ArrayList<>();
private final RandomAccessFile raf;
private final FileChannel channel;
private final LRUCache cache;
private long filesize;
public FileBackedList(File file, final int cacheSizeBytes) throws IOException {
this.raf = new RandomAccessFile(file, "rw");
this.channel = raf.getChannel();
this.filesize = raf.length();
this.cache = new LRUCache(cacheSizeBytes);
}
public void add(String str) {
try {
writeToFile(str);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public String getAt(int index) {
String s = cache.getIfPresent(index);
if (s != null) {
return s;
}
try {
String val = readFromFile(pointers.get(index));
cache.store(index, val);
return val;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void writeToFile(String str) throws IOException {
synchronized (channel) {
ByteBuffer bytes = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8));
ByteBuffer length = ByteBuffer.allocate(4).putInt(bytes.array().length);
channel.position(filesize);
pointers.add(channel.position());
length.flip();
channel.write(length);
channel.write(bytes);
filesize += 4 + bytes.array().length;
}
}
private String readFromFile(long pointer) throws IOException {
synchronized (channel) {
FileChannel fc = channel.position(pointer);
//get length of entry
ByteBuffer buffer = ByteBuffer.wrap(new byte[4]);
fc.read(buffer);
buffer.flip();
int length = buffer.getInt();
//read entry
buffer = ByteBuffer.wrap(new byte[length]);
fc.read(buffer);
buffer.flip();
return new String(buffer.array(), StandardCharsets.UTF_8);
}
}
@Override
public void close() {
try {
raf.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

@ -0,0 +1,47 @@
package kr.xit.core.support.xlsx.sst;
import java.util.Iterator;
import java.util.LinkedHashMap;
class LRUCache {
private long sizeBytes;
private final long capacityBytes;
private final LinkedHashMap<Integer, String> map = new LinkedHashMap<>();
LRUCache(long capacityBytes) {
this.capacityBytes = capacityBytes;
}
String getIfPresent(int key) {
String s = map.get(key);
if (s != null) {
map.remove(key);
map.put(key, s);
}
return s;
}
void store(int key, String val) {
long valSize = strSize(val);
if (valSize > capacityBytes) {
throw new RuntimeException("Insufficient cache space.");
}
Iterator<String> it = map.values().iterator();
while (valSize + sizeBytes > capacityBytes) {
String s = it.next();
sizeBytes -= strSize(s);
it.remove();
}
map.put(key, val);
sizeBytes += valSize;
}
// just an estimation
private static long strSize(String str) {
long size = Integer.BYTES; // hashCode
size += Character.BYTES * str.length(); // characters
return size;
}
}

@ -0,0 +1,452 @@
package kr.xit.core.support.xlsx;
import kr.xit.core.support.xlsx.exceptions.MissingSheetException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static org.apache.poi.ss.usermodel.CellType.BOOLEAN;
import static org.apache.poi.ss.usermodel.CellType.NUMERIC;
import static org.apache.poi.ss.usermodel.CellType.STRING;
import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.CREATE_NULL_AS_BLANK;
import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.RETURN_BLANK_AS_NULL;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
public class StreamingReaderTest {
@BeforeAll
public static void init() {
Locale.setDefault(Locale.ENGLISH);
}
@Test
public void testEncryption() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/encrypted.xlsx"));
Workbook wb = StreamingReader.builder().password("test").open(is)) {
OUTER:
for(Row r : wb.getSheetAt(0)) {
int i = 0;
for(Cell c : r) {
System.out.println(++i + " : "+ c.getStringCellValue());
//System.out.println(c.getRichStringCellValue().getString());
//assertEquals("Demo", c.getStringCellValue());
//assertEquals("Demo", c.getRichStringCellValue().getString());
//break OUTER;
}
}
}
}
@Test
public void testTypes() throws Exception {
SimpleDateFormat df = new SimpleDateFormat("MM/dd/yyyy");
try(
InputStream is = new FileInputStream(new File("src/test/resources/data_types.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
List<List<Cell>> obj = new ArrayList<>();
for(Row r : wb.getSheetAt(0)) {
List<Cell> o = new ArrayList<>();
for(Cell c : r) {
o.add(c);
}
obj.add(o);
}
assertEquals(7, obj.size());
List<Cell> row;
row = obj.get(0);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(STRING, row.get(1).getCellType());
assertEquals("Type", row.get(0).getStringCellValue());
assertEquals("Type", row.get(0).getRichStringCellValue().getString());
assertEquals("Value", row.get(1).getStringCellValue());
assertEquals("Value", row.get(1).getRichStringCellValue().getString());
row = obj.get(1);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(STRING, row.get(1).getCellType());
assertEquals("string", row.get(0).getStringCellValue());
assertEquals("string", row.get(0).getRichStringCellValue().getString());
assertEquals("jib-jab", row.get(1).getStringCellValue());
assertEquals("jib-jab", row.get(1).getRichStringCellValue().getString());
row = obj.get(2);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(NUMERIC, row.get(1).getCellType());
assertEquals("int", row.get(0).getStringCellValue());
assertEquals("int", row.get(0).getRichStringCellValue().getString());
assertEquals(10, row.get(1).getNumericCellValue(), 0);
row = obj.get(3);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(NUMERIC, row.get(1).getCellType());
assertEquals("double", row.get(0).getStringCellValue());
assertEquals("double", row.get(0).getRichStringCellValue().getString());
assertEquals(3.14, row.get(1).getNumericCellValue(), 0);
row = obj.get(4);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(NUMERIC, row.get(1).getCellType());
assertEquals("date", row.get(0).getStringCellValue());
assertEquals("date", row.get(0).getRichStringCellValue().getString());
assertEquals(df.parse("1/1/2014"), row.get(1).getDateCellValue());
assertTrue(DateUtil.isCellDateFormatted(row.get(1)));
row = obj.get(5);
assertEquals(7, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(STRING, row.get(1).getCellType());
assertEquals(STRING, row.get(2).getCellType());
assertEquals(STRING, row.get(3).getCellType());
assertEquals(STRING, row.get(4).getCellType());
assertEquals(STRING, row.get(5).getCellType());
assertEquals(STRING, row.get(6).getCellType());
assertEquals("long", row.get(0).getStringCellValue());
assertEquals("long", row.get(0).getRichStringCellValue().getString());
assertEquals("ass", row.get(1).getStringCellValue());
assertEquals("ass", row.get(1).getRichStringCellValue().getString());
assertEquals("row", row.get(2).getStringCellValue());
assertEquals("row", row.get(2).getRichStringCellValue().getString());
assertEquals("look", row.get(3).getStringCellValue());
assertEquals("look", row.get(3).getRichStringCellValue().getString());
assertEquals("at", row.get(4).getStringCellValue());
assertEquals("at", row.get(4).getRichStringCellValue().getString());
assertEquals("it", row.get(5).getStringCellValue());
assertEquals("it", row.get(5).getRichStringCellValue().getString());
assertEquals("go", row.get(6).getStringCellValue());
assertEquals("go", row.get(6).getRichStringCellValue().getString());
row = obj.get(6);
assertEquals(3, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(BOOLEAN, row.get(1).getCellType());
assertEquals(BOOLEAN, row.get(2).getCellType());
assertEquals("boolean", row.get(0).getStringCellValue());
assertEquals("boolean", row.get(0).getRichStringCellValue().getString());
assertEquals(true, row.get(1).getBooleanCellValue());
assertEquals(false, row.get(2).getBooleanCellValue());
}
}
@Test
public void testGetDateCellValue() throws Exception {
try(
InputStream is = new FileInputStream("src/test/resources/data_types.xlsx");
Workbook wb = StreamingReader.builder().open(is);
) {
List<List<Cell>> obj = new ArrayList<>();
for(Row r : wb.getSheetAt(0)) {
List<Cell> o = new ArrayList<>();
for(Cell c : r) {
o.add(c);
}
obj.add(o);
}
Date dt = obj.get(4).get(1).getDateCellValue();
assertNotNull(dt);
final GregorianCalendar cal = new GregorianCalendar();
cal.setTime(dt);
assertEquals(cal.get(Calendar.YEAR), 2014);
// Verify LocalDateTime version is correct as well
LocalDateTime localDateTime = obj.get(4).get(1).getLocalDateTimeCellValue();
assertEquals(2014, localDateTime.getYear());
try {
obj.get(0).get(0).getDateCellValue();
fail("Should have thrown IllegalStateException");
} catch(IllegalStateException e) { }
}
}
@Test
public void testGetFirstCellNum() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
List<List<Cell>> obj = new ArrayList<>();
List<Row> rows = new ArrayList<>();
for(Row r : wb.getSheetAt(0)) {
rows.add(r);
List<Cell> o = new ArrayList<>();
for(Cell c : r) {
o.add(c);
}
obj.add(o);
}
assertEquals(3, rows.size());
assertEquals(3, rows.get(2).getFirstCellNum());
}
}
@Test
public void testGaps() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
List<List<Cell>> obj = new ArrayList<>();
for(Row r : wb.getSheetAt(0)) {
List<Cell> o = new ArrayList<>();
for(Cell c : r) {
o.add(c);
}
obj.add(o);
}
assertEquals(3, obj.size());
List<Cell> row;
row = obj.get(0);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(STRING, row.get(1).getCellType());
assertEquals("Dat", row.get(0).getStringCellValue());
assertEquals("Dat", row.get(0).getRichStringCellValue().getString());
assertEquals(0, row.get(0).getColumnIndex());
assertEquals(0, row.get(0).getRowIndex());
assertEquals("gap", row.get(1).getStringCellValue());
assertEquals("gap", row.get(1).getRichStringCellValue().getString());
assertEquals(2, row.get(1).getColumnIndex());
assertEquals(0, row.get(1).getRowIndex());
row = obj.get(1);
assertEquals(2, row.size());
assertEquals(STRING, row.get(0).getCellType());
assertEquals(STRING, row.get(1).getCellType());
assertEquals("guuurrrrrl", row.get(0).getStringCellValue());
assertEquals("guuurrrrrl", row.get(0).getRichStringCellValue().getString());
assertEquals(0, row.get(0).getColumnIndex());
assertEquals(6, row.get(0).getRowIndex());
assertEquals("!", row.get(1).getStringCellValue());
assertEquals("!", row.get(1).getRichStringCellValue().getString());
assertEquals(6, row.get(1).getColumnIndex());
assertEquals(6, row.get(1).getRowIndex());
}
}
@Test
public void testLeadingZeroes() throws Exception {
File f = new File("src/test/resources/leadingZeroes.xlsx");
try(Workbook wb = StreamingReader.builder().open(f)) {
Iterator<Row> iter = wb.getSheetAt(0).iterator();
iter.hasNext();
Row r1 = iter.next();
assertEquals(1, r1.getCell(0).getNumericCellValue(), 0);
assertEquals("1", r1.getCell(0).getStringCellValue());
assertEquals(NUMERIC, r1.getCell(0).getCellType());
Row r2 = iter.next();
assertEquals(2, r2.getCell(0).getNumericCellValue(), 0);
assertEquals("0002", r2.getCell(0).getStringCellValue());
assertEquals("0002", r2.getCell(0).getRichStringCellValue().getString());
assertEquals(STRING, r2.getCell(0).getCellType());
}
}
@Test
public void testSpecialStyles() throws Exception {
File f = new File("src/test/resources/special_types.xlsx");
Map<Integer, List<Cell>> contents = new HashMap<>();
try(Workbook wb = StreamingReader.builder().open(f)) {
for(Row row : wb.getSheetAt(0)) {
contents.put(row.getRowNum(), new ArrayList<Cell>());
for(Cell c : row) {
if(c.getColumnIndex() > 0) {
contents.get(row.getRowNum()).add(c);
}
}
}
}
SimpleDateFormat df = new SimpleDateFormat("dd/MM/yyyy");
assertThat(contents.size(), equalTo(2));
assertThat(contents.get(0).size(), equalTo(4));
assertThat(contents.get(0).get(0).getStringCellValue(), equalTo("Thu\", \"Dec 25\", \"14"));
assertThat(contents.get(0).get(0).getDateCellValue(), equalTo(df.parse("25/12/2014")));
assertThat(contents.get(0).get(1).getStringCellValue(), equalTo("02/04/15"));
assertThat(contents.get(0).get(1).getDateCellValue(), equalTo(df.parse("04/02/2015")));
assertThat(contents.get(0).get(2).getStringCellValue(), equalTo("14\". \"Mar\". \"2015"));
assertThat(contents.get(0).get(2).getDateCellValue(), equalTo(df.parse("14/03/2015")));
assertThat(contents.get(0).get(3).getStringCellValue(), equalTo("2015-05-05"));
assertThat(contents.get(0).get(3).getDateCellValue(), equalTo(df.parse("05/05/2015")));
assertThat(contents.get(1).size(), equalTo(4));
assertThat(contents.get(1).get(0).getStringCellValue(), equalTo("3.12"));
assertThat(contents.get(1).get(0).getNumericCellValue(), equalTo(3.12312312312));
assertThat(contents.get(1).get(1).getStringCellValue(), equalTo("1,023,042"));
assertThat(contents.get(1).get(1).getNumericCellValue(), equalTo(1023042.0));
assertThat(contents.get(1).get(2).getStringCellValue(), equalTo("-312,231.12"));
assertThat(contents.get(1).get(2).getNumericCellValue(), equalTo(-312231.12123145));
assertThat(contents.get(1).get(3).getStringCellValue(), equalTo("(132)"));
assertThat(contents.get(1).get(3).getNumericCellValue(), equalTo(-132.0));
}
@Test
public void testFirstRowNumIs0() throws Exception {
File f = new File("src/test/resources/data_types.xlsx");
try(Workbook wb = StreamingReader.builder().open(f)) {
Row row = wb.getSheetAt(0).iterator().next();
assertThat(row.getRowNum(), equalTo(0));
}
}
@Test
public void testMissingRattrs() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/missing-r-attrs.xlsx"));
StreamingReader reader = StreamingReader.builder().read(is);
) {
Row row = reader.iterator().next();
assertEquals(0, row.getRowNum());
assertEquals("1", row.getCell(0).getStringCellValue());
assertEquals("5", row.getCell(4).getStringCellValue());
row = reader.iterator().next();
assertEquals(1, row.getRowNum());
assertEquals("6", row.getCell(0).getStringCellValue());
assertEquals("10", row.getCell(4).getStringCellValue());
row = reader.iterator().next();
assertEquals(6, row.getRowNum());
assertEquals("11", row.getCell(0).getStringCellValue());
assertEquals("15", row.getCell(4).getStringCellValue());
assertFalse(reader.iterator().hasNext());
}
}
@Test
public void testShouldReturnNullForMissingCellPolicy_RETURN_BLANK_AS_NULL() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/blank_cells.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
Row row = wb.getSheetAt(0).iterator().next();
assertNotNull(row.getCell(0, RETURN_BLANK_AS_NULL)); //Remain unchanged
assertNull(row.getCell(1, RETURN_BLANK_AS_NULL));
}
}
@Test
public void testShouldReturnBlankForMissingCellPolicy_CREATE_NULL_AS_BLANK() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/null_cell.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
Row row = wb.getSheetAt(0).iterator().next();
assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getStringCellValue()); //Remain unchanged
assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getRichStringCellValue().getString()); //Remain unchanged
assertThat(row.getCell(1), is(nullValue()));
assertNotNull(row.getCell(1, CREATE_NULL_AS_BLANK));
}
}
// 이 시트의 마지막 셀은 숫자여야 하지만 "f"가 남아 있습니다
// 마지막 셀에 부착되어 있는 태그로 인해 FORULLA가 됩니다.
@Test
public void testForumulaOutsideCellIgnored() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/formula_outside_cell.xlsx"));
Workbook wb = StreamingReader.builder().open(is);
) {
Iterator<Row> rows = wb.getSheetAt(0).iterator();
Cell cell = null;
while(rows.hasNext()) {
Iterator<Cell> cells = rows.next().iterator();
while(cells.hasNext()) {
cell = cells.next();
}
}
assertNotNull(cell);
assertThat(cell.getCellType(), is(CellType.NUMERIC));
}
}
@Test
public void testFormulaWithDifferentTypes() throws Exception {
try(
InputStream is = new FileInputStream(new File("src/test/resources/formula_test.xlsx"));
Workbook wb = StreamingReader.builder().open(is)
) {
Sheet sheet = wb.getSheetAt(0);
Iterator<Row> rowIterator = sheet.rowIterator();
Row next = rowIterator.next();
Cell cell = next.getCell(0);
assertThat(cell.getCellType(), is(CellType.STRING));
next = rowIterator.next();
cell = next.getCell(0);
assertThat(cell.getCellType(), is(CellType.FORMULA));
assertThat(cell.getCachedFormulaResultType(), is(CellType.STRING));
next = rowIterator.next();
cell = next.getCell(0);
assertThat(cell.getCellType(), is(CellType.FORMULA));
assertThat(cell.getCachedFormulaResultType(), is(CellType.BOOLEAN));
next = rowIterator.next();
cell = next.getCell(0);
assertThat(cell.getCellType(), is(CellType.FORMULA));
assertThat(cell.getCachedFormulaResultType(), is(CellType.NUMERIC));
}
}
}

@ -29,6 +29,8 @@
<proguard-plugin-version>2.6.0</proguard-plugin-version> <!-- 2.5.3 / 2.6.0 -->
<proguard-base-version>7.3.2</proguard-base-version> <!-- 7.2.2 / 7.3.1 / 7.3.2 -->
<proguard-core-version>9.0.10</proguard-core-version> <!-- 9.0.10 -->
<!-- excel poi -->
<apache.poi.version>5.0.0</apache.poi.version> <!-- 4.1.2 -->
</properties>
<repositories>

Loading…
Cancel
Save