문자열 인코딩 관련 처리 기능 프로젝트이동

1 week ago · be7e818ce9
parent 3cf3339b21
commit be7e818ce9
2 changed files with 246 additions and 17 deletions
--- a/pom.xml
+++ b/pom.xml
@ -148,6 +148,12 @@
 			<version>0.0.1-SNAPSHOT</version>
 		</dependency>

+		<!-- 유니코드, 캐릭터셋 탐지 -->
+		<dependency>
+		    <groupId>com.ibm.icu</groupId>
+		    <artifactId>icu4j</artifactId>
+		    <version>75.1</version>
+		</dependency>
 	</dependencies>

 	<build>
--- a/src/main/java/cokr/xit/fims/cmmn/Hangul.java
+++ b/src/main/java/cokr/xit/fims/cmmn/Hangul.java
@ -0,0 +1,223 @@
+package cokr.xit.fims.cmmn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.commons.io.IOUtils;
+
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
+public class Hangul {
+
+	public Hangul(int hangulIsNByte){
+		this.hangulIsNByte = hangulIsNByte;
+	}
+
+	private int hangulIsNByte;
+
+	public int is() {
+		return this.hangulIsNByte;
+	}
+
+	/**
+	 * 문자열의 바이트 수 구하기
+	 *
+	 * @param str
+	 * @return
+	 */
+	public int getByteLength(String str) {
+		if(str == null) {
+			return 0;
+		}
+		int byteLen = 0;
+
+		for(int i=0;i<str.length();i++) {
+			char ch = str.charAt(i);
+
+			if((ch > 127) || (ch < 0)) {
+				byteLen += this.is();
+			} else {
+				byteLen += 1;
+			}
+		}
+
+		return byteLen;
+	}
+
+	/**
+	 * 문자열을 바이트 단위로 패딩
+	 *
+	 * @param str
+	 * @param byteLen
+	 * @param ch
+	 * @return
+	 */
+	public String lpadByte(String str, int byteLen, String ch) {
+		String result = str;
+
+		int strLen = this.getByteLength(str);
+
+		for(int i=0; i < byteLen - strLen ; i++) {
+			result = ch + result;
+		}
+
+		return result;
+	}
+
+	/**
+	 * 문자열을 바이트 단위로 패딩
+	 *
+	 * @param str
+	 * @param byteLen
+	 * @param ch
+	 * @return
+	 */
+	public String rpadByte(String str, int byteLen, String ch) {
+		String result = str;
+
+		int strLen = this.getByteLength(str);
+
+		for(int i=0; i < byteLen - strLen ; i++) {
+			result += ch;
+		}
+
+		return result;
+	}
+
+	/**
+	 * 문자열을 바이트 단위로 substring하기
+	 *
+	 * @param str
+	 * @param beginBytes
+	 * @param endBytes
+	 * @return
+	 */
+	public String substringByBytes(String str, int beginBytes, int endBytes) {
+	    if (str == null || str.length() == 0) {
+	        return "";
+	    }
+
+	     if (beginBytes < 0) {
+	        beginBytes = 0;
+	    }
+
+	    if (endBytes < 1) {
+	        return "";
+	    }
+
+	    int len = str.length();
+
+	    int beginIndex = -1;
+	    int endIndex = 0;
+
+	    int curBytes = 0;
+	    String ch = null;
+	    for (int i = 0; i < len; i++) {
+	        ch = str.substring(i, i + 1);
+	        curBytes += this.getByteLength(ch);
+
+
+	        if (beginIndex == -1 && curBytes >= beginBytes) {
+	            beginIndex = i;
+	        }
+
+	        if (curBytes > endBytes) {
+	            break;
+	        } else {
+	            endIndex = i + 1;
+	        }
+	    }
+
+	    return str.substring(beginIndex, endIndex);
+	}
+
+	/**
+	 * 문자열을 바이트 단위로 substring하기
+	 *
+	 * @param str
+	 * @param beginBytes
+	 * @return
+	 */
+	public String substringByBytes(String str, int beginBytes) {
+	    if (str == null || str.length() == 0) {
+	        return "";
+	    }
+
+	     if (beginBytes < 0) {
+	        beginBytes = 0;
+	    }
+
+	    int len = str.length();
+
+	    int beginIndex = -1;
+
+	    int curBytes = 0;
+	    String ch = null;
+	    for (int i = 0; i < len; i++) {
+	        ch = str.substring(i, i + 1);
+	        curBytes += this.getByteLength(ch);
+
+
+	        if (beginIndex == -1 && curBytes >= beginBytes) {
+	            beginIndex = i;
+	        }
+
+	    }
+
+	    return str.substring(beginIndex);
+	}
+
+	/**
+	 * 텍스트파일 인코딩 확인
+	 *
+	 * @param path 파일경로
+	 * @return 캐릭터셋
+	 */
+	public String encodingDetect(String path) throws IOException {
+		File f = new File(path);
+
+		return encodingDetect(f);
+	}
+
+	/**
+	 * 텍스트파일 인코딩 확인
+	 *
+	 * @param file 파일
+	 * @return 캐릭터셋
+	 */
+	public String encodingDetect(File file) throws IOException {
+		CharsetDetector detector;
+	    CharsetMatch match;
+
+	    FileInputStream fis = null;
+	    try {
+	    	String result = "";
+
+		    fis = new FileInputStream(file);
+
+		    byte[] byteData = new byte[(int) file.length()];
+
+		    fis.read(byteData);
+			fis.close();
+		    detector = new CharsetDetector();
+
+		    detector.setText(byteData);
+		    match = detector.detect();
+
+		    System.out.println("encoding is \"" + match.getName() + "\"");
+
+		    if(match.getName().equals("UTF-8") || match.getName().equals("EUC-KR")) {
+		    	result = match.getName();
+		    } else {
+		    	result = "EUC-KR";
+		    }
+
+		    return result;
+	    }
+	    finally {
+	    	IOUtils.closeQuietly(fis);
+	    }
+	}
+}