PDF형식의 고지서에서 전자납부번호와 단속사진을 추출하는 class추가

main
이범준 1 year ago
parent 80c1bbaf46
commit 2b5ee6b014

@ -0,0 +1,120 @@
package cokr.xit.fims.cmmn.pdf;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
public class ExtractImageEngine extends PDFStreamEngine {
private List<String> base64List = new ArrayList<String>();
public List<String> getBase64List() {
return base64List;
}
public void clearBase64List() {
this.base64List = new ArrayList<String>();
}
/**
* Default constructor.
*
* @throws IOException If there is an error loading text stripper properties.
*/
public ExtractImageEngine() throws IOException {
addOperator(new Concatenate(this));
addOperator(new DrawObject(this));
addOperator(new SetGraphicsStateParameters(this));
addOperator(new Save(this));
addOperator(new Restore(this));
addOperator(new SetMatrix(this));
}
/**
* This is used to handle an operation.
*
* @param operator The operation to perform.
* @param operands The list of arguments.
*
* @throws IOException If there is an error processing the operation.
*/
@Override
protected void processOperator( Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if (OperatorName.DRAW_OBJECT.equals(operation)) {
COSName objectName = (COSName) operands.get( 0 );
PDXObject xobject = getResources().getXObject( objectName );
if( xobject instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject)xobject;
int imageWidth = image.getWidth();
int imageHeight = image.getHeight();
System.out.println("*******************************************************************");
System.out.println("Found image [" + objectName.getName() + "]");
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
float imageXScale = ctmNew.getScalingFactorX();
float imageYScale = ctmNew.getScalingFactorY();
// position in user space units. 1 unit = 1/72 inch at 72 dpi
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY() + " in user space units");
// raw size in pixels
System.out.println("raw image size = " + imageWidth + ", " + imageHeight + " in pixels");
// displayed size in user space units
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in user space units");
// displayed size in inches at 72 dpi rendering
imageXScale /= 72;
imageYScale /= 72;
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in inches at 72 dpi rendering");
// displayed size in millimeters at 72 dpi rendering
imageXScale *= 25.4f;
imageYScale *= 25.4f;
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in millimeters at 72 dpi rendering");
if(imageXScale > 30 && imageYScale > 30 && imageXScale < 120 && imageYScale < 120) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BufferedImage buffImage = image.getImage();
ImageIO.write(buffImage, "png", baos);
byte[] bytesimage = baos.toByteArray();
String imageStr = Base64.getEncoder().encodeToString(bytesimage);
base64List.add(imageStr);
}
System.out.println();
} else if(xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject)xobject;
showForm(form);
}
} else {
super.processOperator( operator, operands);
}
}
/**
* This will print the usage for this document.
*/
private static void usage()
{
System.err.println( "Usage: java " + ExtractImageEngine.class.getName() + " <input-pdf>" );
}
}

@ -0,0 +1,41 @@
package cokr.xit.fims.cmmn.pdf;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class ExtractText {
public static final String REGEXP_PATTERN_NUMBER = "^[\\d]*$";
public static String getExtractEpayNo(PDDocument pdDocument, int pageNum) {
String epayNo = "";
PDFTextStripper Tstripper = new PDFTextStripper();
Tstripper.setStartPage(pageNum);
Tstripper.setEndPage(pageNum);
String summaryText = null;
try {
summaryText = Tstripper.getText(pdDocument);
} catch (IOException e) {
e.printStackTrace();
}
String[] textArr = summaryText.split("\r\n");
for(int i=0; i < textArr.length; i++) {
boolean isNumber = Pattern.matches(REGEXP_PATTERN_NUMBER, textArr[i]);
if(isNumber) {
if(textArr[i].length() == 19) {
epayNo = textArr[i];
}
}
}
return epayNo;
}
}

@ -0,0 +1,25 @@
package cokr.xit.fims.cmmn.pdf;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class Extraction {
/**
*
*/
int page;
/**
*
*/
String epayNo;
/**
* base64
*/
List<String> base64List;
}

@ -0,0 +1,73 @@
package cokr.xit.fims.cmmn.pdf;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.springframework.util.ResourceUtils;
public class PDFUtil {
public static List<Extraction> extract(String pdfName) {
List<Extraction> extractList = new ArrayList<Extraction>();
File file = null;
try {
file = ResourceUtils.getFile("classpath:sample/"+pdfName);
} catch (FileNotFoundException e2) {
e2.printStackTrace();
}
PDDocument pdDocument = null;
try {
pdDocument = Loader.loadPDF(file);
} catch (IOException e) {
e.printStackTrace();
}
PDPageTree pages = pdDocument.getPages();
int pageCount = pages.getCount();
int pageIndex = 0;
Extraction extraction = new Extraction();
for(;pageIndex < pageCount; pageIndex++) {
extraction = new Extraction();
extraction.setPage(pageIndex+1);
String epayNo = ExtractText.getExtractEpayNo(pdDocument, pageIndex+1);
extraction.setEpayNo(epayNo);
PDPage page = pages.get(pageIndex);
List<String> b64imageList = new ArrayList<>();
try {
ExtractImageEngine printer = new ExtractImageEngine();
printer.processPage(page);
b64imageList = printer.getBase64List();
printer.clearBase64List();
} catch (IOException e1) {
e1.printStackTrace();
}
extraction.setBase64List(b64imageList);
extractList.add(extraction);
}
return extractList;
}
}
Loading…
Cancel
Save