PDF형식의 고지서에서 전자납부번호와 단속사진을 추출하는 class추가
parent
80c1bbaf46
commit
2b5ee6b014
@ -0,0 +1,120 @@
|
||||
package cokr.xit.fims.cmmn.pdf;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.DrawObject;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Restore;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Save;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
|
||||
public class ExtractImageEngine extends PDFStreamEngine {
|
||||
|
||||
private List<String> base64List = new ArrayList<String>();
|
||||
|
||||
public List<String> getBase64List() {
|
||||
return base64List;
|
||||
}
|
||||
|
||||
public void clearBase64List() {
|
||||
this.base64List = new ArrayList<String>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*
|
||||
* @throws IOException If there is an error loading text stripper properties.
|
||||
*/
|
||||
public ExtractImageEngine() throws IOException {
|
||||
addOperator(new Concatenate(this));
|
||||
addOperator(new DrawObject(this));
|
||||
addOperator(new SetGraphicsStateParameters(this));
|
||||
addOperator(new Save(this));
|
||||
addOperator(new Restore(this));
|
||||
addOperator(new SetMatrix(this));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is used to handle an operation.
|
||||
*
|
||||
* @param operator The operation to perform.
|
||||
* @param operands The list of arguments.
|
||||
*
|
||||
* @throws IOException If there is an error processing the operation.
|
||||
*/
|
||||
@Override
|
||||
protected void processOperator( Operator operator, List<COSBase> operands) throws IOException {
|
||||
String operation = operator.getName();
|
||||
if (OperatorName.DRAW_OBJECT.equals(operation)) {
|
||||
COSName objectName = (COSName) operands.get( 0 );
|
||||
PDXObject xobject = getResources().getXObject( objectName );
|
||||
if( xobject instanceof PDImageXObject) {
|
||||
PDImageXObject image = (PDImageXObject)xobject;
|
||||
int imageWidth = image.getWidth();
|
||||
int imageHeight = image.getHeight();
|
||||
System.out.println("*******************************************************************");
|
||||
System.out.println("Found image [" + objectName.getName() + "]");
|
||||
|
||||
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
|
||||
float imageXScale = ctmNew.getScalingFactorX();
|
||||
float imageYScale = ctmNew.getScalingFactorY();
|
||||
|
||||
// position in user space units. 1 unit = 1/72 inch at 72 dpi
|
||||
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY() + " in user space units");
|
||||
// raw size in pixels
|
||||
System.out.println("raw image size = " + imageWidth + ", " + imageHeight + " in pixels");
|
||||
// displayed size in user space units
|
||||
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in user space units");
|
||||
// displayed size in inches at 72 dpi rendering
|
||||
imageXScale /= 72;
|
||||
imageYScale /= 72;
|
||||
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in inches at 72 dpi rendering");
|
||||
// displayed size in millimeters at 72 dpi rendering
|
||||
imageXScale *= 25.4f;
|
||||
imageYScale *= 25.4f;
|
||||
System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in millimeters at 72 dpi rendering");
|
||||
|
||||
if(imageXScale > 30 && imageYScale > 30 && imageXScale < 120 && imageYScale < 120) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
BufferedImage buffImage = image.getImage();
|
||||
ImageIO.write(buffImage, "png", baos);
|
||||
byte[] bytesimage = baos.toByteArray();
|
||||
String imageStr = Base64.getEncoder().encodeToString(bytesimage);
|
||||
base64List.add(imageStr);
|
||||
}
|
||||
System.out.println();
|
||||
} else if(xobject instanceof PDFormXObject) {
|
||||
PDFormXObject form = (PDFormXObject)xobject;
|
||||
showForm(form);
|
||||
}
|
||||
} else {
|
||||
super.processOperator( operator, operands);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This will print the usage for this document.
|
||||
*/
|
||||
private static void usage()
|
||||
{
|
||||
System.err.println( "Usage: java " + ExtractImageEngine.class.getName() + " <input-pdf>" );
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package cokr.xit.fims.cmmn.pdf;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
public class ExtractText {
|
||||
|
||||
public static final String REGEXP_PATTERN_NUMBER = "^[\\d]*$";
|
||||
|
||||
public static String getExtractEpayNo(PDDocument pdDocument, int pageNum) {
|
||||
String epayNo = "";
|
||||
|
||||
PDFTextStripper Tstripper = new PDFTextStripper();
|
||||
|
||||
Tstripper.setStartPage(pageNum);
|
||||
Tstripper.setEndPage(pageNum);
|
||||
String summaryText = null;
|
||||
|
||||
try {
|
||||
summaryText = Tstripper.getText(pdDocument);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
String[] textArr = summaryText.split("\r\n");
|
||||
|
||||
for(int i=0; i < textArr.length; i++) {
|
||||
boolean isNumber = Pattern.matches(REGEXP_PATTERN_NUMBER, textArr[i]);
|
||||
if(isNumber) {
|
||||
if(textArr[i].length() == 19) {
|
||||
epayNo = textArr[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return epayNo;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package cokr.xit.fims.cmmn.pdf;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class Extraction {
|
||||
|
||||
/**
|
||||
* 페이지번호
|
||||
*/
|
||||
int page;
|
||||
/**
|
||||
* 전자납부번호
|
||||
*/
|
||||
String epayNo;
|
||||
/**
|
||||
* 단속사진 base64 문자열
|
||||
*/
|
||||
List<String> base64List;
|
||||
|
||||
}
|
@ -0,0 +1,73 @@
|
||||
package cokr.xit.fims.cmmn.pdf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageTree;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
|
||||
public class PDFUtil {
|
||||
|
||||
public static List<Extraction> extract(String pdfName) {
|
||||
|
||||
List<Extraction> extractList = new ArrayList<Extraction>();
|
||||
|
||||
File file = null;
|
||||
try {
|
||||
file = ResourceUtils.getFile("classpath:sample/"+pdfName);
|
||||
} catch (FileNotFoundException e2) {
|
||||
e2.printStackTrace();
|
||||
}
|
||||
PDDocument pdDocument = null;
|
||||
try {
|
||||
pdDocument = Loader.loadPDF(file);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
|
||||
PDPageTree pages = pdDocument.getPages();
|
||||
int pageCount = pages.getCount();
|
||||
|
||||
int pageIndex = 0;
|
||||
|
||||
Extraction extraction = new Extraction();
|
||||
|
||||
for(;pageIndex < pageCount; pageIndex++) {
|
||||
|
||||
extraction = new Extraction();
|
||||
extraction.setPage(pageIndex+1);
|
||||
|
||||
String epayNo = ExtractText.getExtractEpayNo(pdDocument, pageIndex+1);
|
||||
|
||||
extraction.setEpayNo(epayNo);
|
||||
|
||||
|
||||
PDPage page = pages.get(pageIndex);
|
||||
List<String> b64imageList = new ArrayList<>();
|
||||
try {
|
||||
ExtractImageEngine printer = new ExtractImageEngine();
|
||||
printer.processPage(page);
|
||||
b64imageList = printer.getBase64List();
|
||||
printer.clearBase64List();
|
||||
|
||||
} catch (IOException e1) {
|
||||
e1.printStackTrace();
|
||||
}
|
||||
extraction.setBase64List(b64imageList);
|
||||
|
||||
|
||||
extractList.add(extraction);
|
||||
}
|
||||
|
||||
return extractList;
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue