package kr.ac.kaist.swrc.jhannanum.module.ma;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.LinkedList;
import java.util.StringTokenizer;

import kr.ac.kaist.swrc.jhannanum.module.Module;
import kr.ac.kaist.swrc.jhannanum.share.Code;
import kr.ac.kaist.swrc.jhannanum.share.TagSet;

/**
 * ¼ м Ѵ.
 * @author Sangwon Park
 */
public class MorphAnalyzer implements Module {
	final static private String CHI_REPLACE = "HAN_CHI";
	final static private String ENG_REPLACE = "HAN_ENG";
	final static private int CHI_REPLACE_LEN = 7;
	final static private int ENG_REPLACE_LEN = 7;
	
	final static private String MODULE_NAME = "MorphAnalyzer";
	
	private AnalyzedDic analyzedDic = null;
	private Trie systemDic = null;
	private Trie userDic = null;
	private NumberDic numDic = null;
	private TagSet tagSet = null;
	private Connection connection = null;
	private ConnectionNot connectionNot = null;
	
	private MorphemeChart chart = null;
	private Simti simti = null;
	
	private BufferedReader in = null;
	private PrintWriter out = null;
	
	private LinkedList<String> chiReplacementList = null;
	private LinkedList<String> engReplacementList = null;
	
	/**   Ģ  */
	private String fileConnectionsNot = "";
	
	/**   Ģ  */
	private String fileConnections = "";
	
	/** м  */
	private String fileDicAnalyzed = "";
	
	/** ý  */
	private String fileDicSystem = "";
	
	/**   */
	private String fileDicUser = "";
	
	/** ±  */
	private String fileTagSet = "";
	
	
	public String getName() {
		return MODULE_NAME;
	}
	
	public void initialize(Reader in, Writer out, String configFile) throws IOException {
		BufferedReader brConfig = new BufferedReader(new InputStreamReader(new FileInputStream(configFile)));
		
		String str = "";
		while ((str = brConfig.readLine()) != null) {
			if (str.contains("dic_system=")) {
				fileDicSystem = str.substring(11);
			} else if(str.contains("dic_user=")) {
				fileDicUser = str.substring(9);
			} else if(str.contains("connections=")) {
				fileConnections = str.substring(12);
			} else if(str.contains("connections_not=")) {
				fileConnectionsNot = str.substring(16);
			} else if(str.contains("dic_analyzed=")) {
				fileDicAnalyzed = str.substring(13);
			} else if(str.contains("tagset=")) {
				fileTagSet = str.substring(7);
			}
		}
		
		tagSet = new TagSet();
		tagSet.init(fileTagSet, TagSet.TAG_SET_KAIST);
		
		connection = new Connection();
		connection.init(fileConnections, tagSet.getTagCount(), tagSet);
		
		connectionNot = new ConnectionNot();
		connectionNot.init(fileConnectionsNot, tagSet);
		
		analyzedDic = new AnalyzedDic();
		analyzedDic.readDic(fileDicAnalyzed);
		
		systemDic = new Trie(Trie.DEFAULT_TRIE_BUF_SIZE_SYS);
		systemDic.read_dic(fileDicSystem, tagSet);

		userDic = new Trie(Trie.DEFAULT_TRIE_BUF_SIZE_USER);
		userDic.read_dic(fileDicUser, tagSet);
		
		numDic = new NumberDic();
		simti = new Simti();
		simti.init();
		
		chart = new MorphemeChart(tagSet, connection, systemDic, userDic, numDic, simti);
		
		chiReplacementList = new LinkedList<String>();
		engReplacementList = new LinkedList<String>();
		
		if (in != null) {
			this.in = new BufferedReader(in);
		} else {
			this.in = null;
		}
		
		if (out != null) {
			this.out = new PrintWriter(out);
		} else {
			this.out = null;
		}
	}
	
	private String postReplace(String str) {
		int idx = -1;
		String replace = null;
		
		int i = 0;
		
		while ((idx = str.indexOf(ENG_REPLACE)) != -1) {
			if (i == engReplacementList.size()) {
				i = 0;
			}
			replace = engReplacementList.get(i++);
			str = str.substring(0, idx) + replace + str.substring(idx + ENG_REPLACE_LEN);
		}
		
		i = 0;
		while ((idx = str.indexOf(CHI_REPLACE)) != -1) {
			if (i == chiReplacementList.size()) {
				i = 0;
			}
			replace = chiReplacementList.get(i++);
			str = str.substring(0, idx) + replace + str.substring(idx + CHI_REPLACE_LEN);
		}
		
		engReplacementList.clear();
		chiReplacementList.clear();
		
		return str;
	}
	
	private String preReplace(String str) {
		String result = "";
		boolean engFlag = false;
		boolean chiFlag = false;
		String buf = "";

		for (int i = 0; i < str.length(); i++) {
			char c = str.charAt(i);
			
			if (((c >= 'a' && c <= 'z') || c >= 'A' && c <= 'Z')) {
				// 
				if (engFlag) {
					buf += c;
				} else {
					if (chiFlag) {
						chiFlag = false;
						chiReplacementList.add(buf);
						buf = "";
					}
					result += ENG_REPLACE;
					buf += c;
					engFlag = true;
				}
				
			} else if (((c >= 0x2E80 && c <= 0x2EFF) || (c >= 0x3400 && c <= 0x4DBF)) || (c >= 0x4E00 && c < 0x9FBF) ||
					(c >= 0xF900 && c <= 0xFAFF) && chiFlag) {
				// 
				if (chiFlag) {
					buf += c;
				} else {
					if (engFlag) {
						engFlag = false;
						engReplacementList.add(buf);
						buf = "";
					}
					result += CHI_REPLACE;
					buf += c;
					chiFlag = true;
				}
			} else {
				result += c;
				if (engFlag) {
					engFlag = false;
					engReplacementList.add(buf);
					buf = "";
				}
				if (chiFlag) {
					chiFlag = false;
					chiReplacementList.add(buf);
					buf = "";
				}
			}
		}
		if (engFlag) {
			engReplacementList.add(buf);
		}
		if (chiFlag) {
			chiReplacementList.add(buf);
		}
		return result;
	}
	
	private void processWord(String word) {
		if (word.indexOf("BOS") != -1) {
			out.write("BOS\n\n");
		} else if (word.indexOf("EOS") != -1) {
			out.write("EOS\n\n");
		} else if (word.indexOf("EOF") != -1) {
			out.write("EOF\n\n");
		} else {
			String analysis = analyzedDic.get(word);

			if (analysis != null) {
				// м  ϵǾ ִ 
				out.write("#" + word + '\n');

				StringTokenizer st = new StringTokenizer(analysis, "^");
				while (st.hasMoreTokens()) {
					out.write("\t" + st.nextToken() + '\n');
				}
			} else {
				// м  ϵǾ   
				out.write(" " + word + '\n');
				
				word = preReplace(word);

				/**
				 * TODO
				 * 
				 *  ѳ ȣ ԵǾ ִ 忡  ó ־µ
				 *  ڹ  ƹ ó Ѿ.
				 */

				/*
				if (word.indexOf('(') != -1) {
				}
				*/
				
				// ȣ  
				chart.init(Code.toTripleString(word));
				chart.analyze();
				out.write(postReplace(chart.getResult() + '\n'));
			}
		}
	}
	
	public void run() throws Exception {
		String line = null;

		while ((line = in.readLine()) != null) {
			StringTokenizer st = new StringTokenizer(line, " \t");
			while (st.hasMoreTokens()) {
				processWord(st.nextToken());
				out.flush();
			}
		}
	}
	
	public void setReader(Reader in) {
		if (in != null) {
			this.in = new BufferedReader(in);
		} else {
			this.in = null;
		}
	}
	
	public void setWriter(Writer out) {
		if (out != null) {
			this.out = new PrintWriter(out);
		} else {
			this.out = null;
		}
	}
	
	public void shutdown() {
		
	}
}
