diff --git a/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractImageEngine.java b/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractImageEngine.java new file mode 100644 index 00000000..a7b9a6cd --- /dev/null +++ b/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractImageEngine.java @@ -0,0 +1,120 @@ +package cokr.xit.fims.cmmn.pdf; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.contentstream.PDFStreamEngine; +import org.apache.pdfbox.contentstream.operator.DrawObject; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.contentstream.operator.state.Concatenate; +import org.apache.pdfbox.contentstream.operator.state.Restore; +import org.apache.pdfbox.contentstream.operator.state.Save; +import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters; +import org.apache.pdfbox.contentstream.operator.state.SetMatrix; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.util.Matrix; + +public class ExtractImageEngine extends PDFStreamEngine { + + private List base64List = new ArrayList(); + + public List getBase64List() { + return base64List; + } + + public void clearBase64List() { + this.base64List = new ArrayList(); + } + + /** + * Default constructor. + * + * @throws IOException If there is an error loading text stripper properties. + */ + public ExtractImageEngine() throws IOException { + addOperator(new Concatenate(this)); + addOperator(new DrawObject(this)); + addOperator(new SetGraphicsStateParameters(this)); + addOperator(new Save(this)); + addOperator(new Restore(this)); + addOperator(new SetMatrix(this)); + } + + + /** + * This is used to handle an operation. + * + * @param operator The operation to perform. + * @param operands The list of arguments. + * + * @throws IOException If there is an error processing the operation. + */ + @Override + protected void processOperator( Operator operator, List operands) throws IOException { + String operation = operator.getName(); + if (OperatorName.DRAW_OBJECT.equals(operation)) { + COSName objectName = (COSName) operands.get( 0 ); + PDXObject xobject = getResources().getXObject( objectName ); + if( xobject instanceof PDImageXObject) { + PDImageXObject image = (PDImageXObject)xobject; + int imageWidth = image.getWidth(); + int imageHeight = image.getHeight(); + System.out.println("*******************************************************************"); + System.out.println("Found image [" + objectName.getName() + "]"); + + Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); + float imageXScale = ctmNew.getScalingFactorX(); + float imageYScale = ctmNew.getScalingFactorY(); + + // position in user space units. 1 unit = 1/72 inch at 72 dpi + System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY() + " in user space units"); + // raw size in pixels + System.out.println("raw image size = " + imageWidth + ", " + imageHeight + " in pixels"); + // displayed size in user space units + System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in user space units"); + // displayed size in inches at 72 dpi rendering + imageXScale /= 72; + imageYScale /= 72; + System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in inches at 72 dpi rendering"); + // displayed size in millimeters at 72 dpi rendering + imageXScale *= 25.4f; + imageYScale *= 25.4f; + System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in millimeters at 72 dpi rendering"); + + if(imageXScale > 30 && imageYScale > 30 && imageXScale < 120 && imageYScale < 120) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BufferedImage buffImage = image.getImage(); + ImageIO.write(buffImage, "png", baos); + byte[] bytesimage = baos.toByteArray(); + String imageStr = Base64.getEncoder().encodeToString(bytesimage); + base64List.add(imageStr); + } + System.out.println(); + } else if(xobject instanceof PDFormXObject) { + PDFormXObject form = (PDFormXObject)xobject; + showForm(form); + } + } else { + super.processOperator( operator, operands); + } + } + + /** + * This will print the usage for this document. + */ + private static void usage() + { + System.err.println( "Usage: java " + ExtractImageEngine.class.getName() + " " ); + } +} diff --git a/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractText.java b/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractText.java new file mode 100644 index 00000000..1fcd5d3b --- /dev/null +++ b/src/main/java/cokr/xit/fims/cmmn/pdf/ExtractText.java @@ -0,0 +1,41 @@ +package cokr.xit.fims.cmmn.pdf; + +import java.io.IOException; +import java.util.regex.Pattern; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +public class ExtractText { + + public static final String REGEXP_PATTERN_NUMBER = "^[\\d]*$"; + + public static String getExtractEpayNo(PDDocument pdDocument, int pageNum) { + String epayNo = ""; + + PDFTextStripper Tstripper = new PDFTextStripper(); + + Tstripper.setStartPage(pageNum); + Tstripper.setEndPage(pageNum); + String summaryText = null; + + try { + summaryText = Tstripper.getText(pdDocument); + } catch (IOException e) { + e.printStackTrace(); + } + + String[] textArr = summaryText.split("\r\n"); + + for(int i=0; i < textArr.length; i++) { + boolean isNumber = Pattern.matches(REGEXP_PATTERN_NUMBER, textArr[i]); + if(isNumber) { + if(textArr[i].length() == 19) { + epayNo = textArr[i]; + } + } + } + return epayNo; + } + +} diff --git a/src/main/java/cokr/xit/fims/cmmn/pdf/Extraction.java b/src/main/java/cokr/xit/fims/cmmn/pdf/Extraction.java new file mode 100644 index 00000000..523f5dfe --- /dev/null +++ b/src/main/java/cokr/xit/fims/cmmn/pdf/Extraction.java @@ -0,0 +1,25 @@ +package cokr.xit.fims.cmmn.pdf; + +import java.util.List; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class Extraction { + + /** + * 페이지번호 + */ + int page; + /** + * 전자납부번호 + */ + String epayNo; + /** + * 단속사진 base64 문자열 + */ + List base64List; + +} diff --git a/src/main/java/cokr/xit/fims/cmmn/pdf/PDFUtil.java b/src/main/java/cokr/xit/fims/cmmn/pdf/PDFUtil.java new file mode 100644 index 00000000..62b5ef16 --- /dev/null +++ b/src/main/java/cokr/xit/fims/cmmn/pdf/PDFUtil.java @@ -0,0 +1,73 @@ +package cokr.xit.fims.cmmn.pdf; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.springframework.util.ResourceUtils; + +public class PDFUtil { + + public static List extract(String pdfName) { + + List extractList = new ArrayList(); + + File file = null; + try { + file = ResourceUtils.getFile("classpath:sample/"+pdfName); + } catch (FileNotFoundException e2) { + e2.printStackTrace(); + } + PDDocument pdDocument = null; + try { + pdDocument = Loader.loadPDF(file); + } catch (IOException e) { + e.printStackTrace(); + } + + + PDPageTree pages = pdDocument.getPages(); + int pageCount = pages.getCount(); + + int pageIndex = 0; + + Extraction extraction = new Extraction(); + + for(;pageIndex < pageCount; pageIndex++) { + + extraction = new Extraction(); + extraction.setPage(pageIndex+1); + + String epayNo = ExtractText.getExtractEpayNo(pdDocument, pageIndex+1); + + extraction.setEpayNo(epayNo); + + + PDPage page = pages.get(pageIndex); + List b64imageList = new ArrayList<>(); + try { + ExtractImageEngine printer = new ExtractImageEngine(); + printer.processPage(page); + b64imageList = printer.getBase64List(); + printer.clearBase64List(); + + } catch (IOException e1) { + e1.printStackTrace(); + } + extraction.setBase64List(b64imageList); + + + extractList.add(extraction); + } + + return extractList; + } + + +}