ImplementationLexicalAnalyzer

/*
 * Zemucan: A Syntax Assistant for DB2
 * Copyright (C) 2009, 2010 Andres Gomez Casanova
 *
 * This file is part of Zemucan.
 *
 * Zemucan is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Zemucan is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, see <http://www.gnu.org/licenses/>.
 *
 * Contact:
 * a n g o c a  at  y a h o o  dot  c o m
 * Cra. 45 No 61 - 31, Bogota, Colombia.
 *
 * Author:   $LastChangedBy: angoca $:
 * Date:     $LastChangedDate: 2011-03-06 22:24:44 -0500 (dom, 06 mar 2011) $:
 * Revision: $LastChangedRevision: 1915 $:
 * URL:      $HeadURL: https://zemucan.svn.sourceforge.net/svnroot/zemucan/branches/zemucan_v1/source-code/analyzers/src/main/java/name/angoca/zemucan/core/lexical/impl/ImplementationLexicalAnalyzer.java $:
 */
package name.angoca.zemucan.core.lexical.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import name.angoca.zemucan.AbstractZemucanException;
import name.angoca.zemucan.GeneralException;
import name.angoca.zemucan.ParameterNullException;
import name.angoca.zemucan.core.lexical.api.AbstractLexicalAnalyzer;
import name.angoca.zemucan.core.lexical.model.Token;
import name.angoca.zemucan.core.syntactic.impl.ImplementationSyntacticAnalyzer;
import name.angoca.zemucan.core.syntactic.model.GraphAnswer;
import name.angoca.zemucan.grammarReader.api.GrammarReaderController;
import name.angoca.zemucan.interfaze.model.ReturnOptions;
import name.angoca.zemucan.tools.Base64;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is the implementation of the lexical analyzer. This class is not thread
 * safe, because the singleton does not have synchronized.
 * <p>
 * <b>Control Version</b>
 * <p>
 * <ul>
 * <li>0.0.1 Class creation.</li>
 * <li>0.1.0</li>
 * <li>0.2.0</li>
 * <li>0.3.0 Recommendations from PMD.</li>
 * <li>0.3.1 Organized.</li>
 * <li>0.4.0 Invalid Graph Exception.</li>
 * <li>0.5.0 Destroy instance.</li>
 * <li>0.5.1 Logger messages.</li>
 * <li>0.6.0 Delimiters not set in a static way.</li>
 * <li>1.0.0 Moved to version 1.</li>
 * <li>1.1.0 Exception hierarchy changed.</li>
 * <li>1.1.1 Not synchronized, not thread safe.</li>
 * <li>1.2.0 throws and asserts.</li>
 * <li>1.2.1 Space after phrase.</li>
 * <li>1.2.2 Delimiters from grammar controller.</li>
 * <li>1.2.3 Synchronization.</li>
 * <li>1.3.0 Method renamed.</li>
 * <li>1.3.1 Clonable.</li>
 * <li>1.3.2 Assistance fix.</li>
 * <li>1.3.3 Ends with space.</li>
 * <li>1.4.0 Method deleted and javadoc for new methods.</li>
 * <li>1.4.1 Analysis improved, implementing 9 possible cases.</li>
 * </ul>
 *
 * @author Andres Gomez Casanova <a
 *         href="mailto:a n g o c a at y a h o o dot c o m" >(AngocA)</a>
 * @version 1.4.1 2010-08-08
 */
public final class ImplementationLexicalAnalyzer extends
		AbstractLexicalAnalyzer {

	/**
	 * Huevo's code.
	 */
	private static byte[] CODE = new byte[] { 97, 110, 103, 111, 99, 97 };
	/**
	 * Huevo.
	 */
	private static String HUEVO = "VGhpcyBzb2Z0d2FyZSB3YXMgd3JpdHRlbiBieSBBbmRyZXMgR29tZXogQ2FzYW5vdmEgKEFuZ29jQSksIGFuZCBpdCBpcyBkZWRpY2F0ZWQgdG8gbXkgbG92ZWx5IExpbGlhbmEgT3JqdWVsYSwgd2hvIGlzIHRoZSBnaXJsIHRoYXQgSSBsb3ZlIGZyb20gdGhlIGRlZXAgb2YgbXkgaGVhcnQhIFRoaXMgYXBwbGljYXRpb24gaGFzIGEgaGlnaCBxdWFsaXR5IGluIHNldmVyYWwgYXNwZWN0cyBiZWNhdXNlIExpbGkgaXMgbXkgaW5zcGlyYXRpb24u";
	/**
	 * The only instance of this object.
	 */
	private static ImplementationLexicalAnalyzer instance;

	/**
	 * Logger.
	 */
	private static final Logger LOGGER = LoggerFactory
			.getLogger(ImplementationLexicalAnalyzer.class);

	/**
	 * Symbol that represents the separation of token with no meaning.
	 */
	private static final String WHITE_SPACE = " "; //$NON-NLS-1$

	/**
	 * Destroys the instance. It is useful for testings purposes.
	 */
	public static void destroyInstance() {
		ImplementationLexicalAnalyzer.LOGGER
				.debug("Destroying ImplementationLexicalAnalyzer instance"); //$NON-NLS-1$
		if (ImplementationLexicalAnalyzer.instance != null) {
			ImplementationSyntacticAnalyzer.destroyInstance();
			ImplementationLexicalAnalyzer.instance.delimiters = null;
			ImplementationLexicalAnalyzer.instance = null;
		}

		assert ImplementationLexicalAnalyzer.instance == null;
	}

	/**
	 * Creates the only possible instance of this object. This method can be
	 * called after defining the delimiters for this object. It is not thread
	 * safe because there is not a synchronized part.
	 * <p>
	 * This method has a part where it is synchronized, however it is not thread
	 * safe because of the problem with the Single Pattern in Java
	 * (http://www.ibm.com/developerworks/java/library/j-dcl.html)
	 *
	 * @return This object instanced.
	 * @throws AbstractZemucanException
	 *             When there is a problem creating the graph or a null
	 *             parameter.
	 */
	public static ImplementationLexicalAnalyzer/* ! */getInstance()
			throws AbstractZemucanException {
		if (ImplementationLexicalAnalyzer.instance == null) {
			ImplementationLexicalAnalyzer.LOGGER
					.debug("Creating ImplementationLexicalAnalyzer instance"); //$NON-NLS-1$
			synchronized (ImplementationLexicalAnalyzer.class) {
				ImplementationLexicalAnalyzer.instance = new ImplementationLexicalAnalyzer();
			}
		}

		assert ImplementationLexicalAnalyzer.instance != null;
		return ImplementationLexicalAnalyzer.instance;
	}

	/**
	 * Set of delimiters.
	 */
	private String delimiters;

	/**
	 * Constructor that defines the delimiters of the tokens.
	 *
	 * @throws AbstractZemucanException
	 *             If there is a null parameter. When there is a problem when
	 *             creating the graph or a null parameter.
	 */
	private ImplementationLexicalAnalyzer() throws AbstractZemucanException {
		super();
		// This helps to run the processPhrase method faster the first
		// time, because the grammar has to be read.
		this.delimiters = GrammarReaderController.getInstance().getDelimiters();
	}

	/*
	 * (non-Javadoc)
	 *
	 * @see
	 * name.angoca.zemucan.core.lexical.api.AbstractLexicalAnalyzer#analyzePhrase
	 * (java.lang.String)
	 */
	@Override
	public ReturnOptions/* ? */analyzePhrase(final String/* ! */phrase)
			throws AbstractZemucanException {
		assert phrase != null;

		// TODO v1.1 reducir el cyclomatic complexity.
		if (ImplementationLexicalAnalyzer.LOGGER.isDebugEnabled()) {
			ImplementationLexicalAnalyzer.LOGGER.debug(
					"Entering to lexical analyzer: '{}'", //$NON-NLS-1$
					phrase);
		}

		// Delete all the unnecessary spaces.
		final String newPhrase = phrase.trim();
		final boolean endsWithSpace = phrase
				.endsWith(ImplementationLexicalAnalyzer.WHITE_SPACE);

		final StringTokenizer tokenizer = new StringTokenizer(newPhrase,
				this.delimiters, true);
		final ReturnOptions ret1 = this.pascua(phrase);

		final List<Token> tokens = new ArrayList<Token>(tokenizer.countTokens());
		String currentToken;
		// Scan the tokens and gets only the non space tokens.
		while (tokenizer.hasMoreTokens()) {
			currentToken = tokenizer.nextToken();
			if (!(currentToken
					.equals(ImplementationLexicalAnalyzer.WHITE_SPACE))) {
				// FIXME v1.1 por que true? probablemente va tocar
				// crear un token de entrada que es sencillo, y otro de
				// salida que sera mas complejo y que tendra mas
				// informacion. En este caso se usaria el sencillo.
				tokens.add(new Token(currentToken, true));
				ImplementationLexicalAnalyzer.LOGGER.debug(
						"Token: {{}}", currentToken); //$NON-NLS-1$
			}
		}

		final GraphAnswer answer = ImplementationSyntacticAnalyzer
				.getInstance().analyzeTokens(tokens, endsWithSpace);

		if (ImplementationLexicalAnalyzer.LOGGER.isDebugEnabled()) {
			ImplementationLexicalAnalyzer.LOGGER.debug("{} : {}", //$NON-NLS-1$
					answer.getPhrases().toString(), answer.getOptions()
							.toString());
		}

		// Process the syntax answer to create a set of options for
		// the UI.
		ReturnOptions ret = this.processAnswer(newPhrase, endsWithSpace,
				tokens, answer);
		if (ret1 != null) {
			ret = ret1;
		}

		assert ret != null;

		return ret;
	}

	/**
	 * Huevo de pascua.
	 *
	 * @param phrase
	 *            User's phrase.
	 * @throws ParameterNullException
	 *             Never.
	 */
	private ReturnOptions/* ? */pascua(final String/* ! */phrase)
			throws ParameterNullException {
		assert phrase != null;

		ReturnOptions ret = null;
		if (phrase.equals(new String(ImplementationLexicalAnalyzer.CODE))) {
			final String decode = Base64
					.decodeString(ImplementationLexicalAnalyzer.HUEVO);
			ret = new ReturnOptions(phrase, new String[] {},
					new String[] { decode });
		}

		return ret;
	}

	private ReturnOptions phraseMultiple(String newPhrase,
			final List<Token> tokens, final GraphAnswer answer,
			final int sizeOptions, String[] setOfOptions, String[] setOfPhrases)
			throws GeneralException, ParameterNullException {
		if (sizeOptions > 1) {
			// There are multiples options and at multiple phrases, then show
			// all of them.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 1!");
		} else if (sizeOptions == 1) {
			// There are just one option and multiple phrases, then show all of
			// them.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 2!");
			// TODO v1.1 this is just for to know this unknown case.
			throw new RuntimeException("Notify this to AngocA, case 2: "
					+ newPhrase);
		} else if (sizeOptions == 0) {
			// There are multiple phrases and 0 options.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 3!");
			final String prefix = this
					.samePrefixForPhrases(answer.getPhrases());
			if (prefix.length() > 0) {
				// All phrases have the same prefix, then replace last token.
				newPhrase = this.replaceLastToken(newPhrase, tokens, prefix);
			}
		}
		setOfOptions = this.returnCandidates(answer.getOptions());
		setOfPhrases = this.returnCandidates(answer.getPhrases());
		final ReturnOptions ret = new ReturnOptions(newPhrase, setOfPhrases,
				setOfOptions);
		return ret;
	}

	private ReturnOptions phraseOne(String newPhrase, final List<Token> tokens,
			final GraphAnswer answer, final int sizeOptions,
			String[] setOfOptions, String[] setOfPhrases)
			throws GeneralException, ParameterNullException {
		if (sizeOptions > 1) {
			// There are one phrase and multiple options, then add them.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 4!");
			setOfPhrases = this.returnCandidates(answer.getPhrases());
		} else if (sizeOptions == 1) {
			// There are one phrase and one option, then add them.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 5!");
			setOfPhrases = this.returnCandidates(answer.getPhrases());
		} else if (sizeOptions == 0) {
			// There is just one phrase, then complete the command.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 6!");
			// Replace the last token with the phrase.
			newPhrase = this.replaceLastToken(newPhrase, tokens, answer
					.getPhrases().get(0).getToken());
			setOfPhrases = new String[] {};
		}
		setOfOptions = this.returnCandidates(answer.getOptions());
		final ReturnOptions ret = new ReturnOptions(newPhrase, setOfPhrases,
				setOfOptions);
		return ret;
	}

	private ReturnOptions phraseZero(String newPhrase,
			final boolean endsWithSpace, final GraphAnswer answer,
			final int sizeOptions, String[] setOfOptions, String[] setOfPhrases)
			throws GeneralException, ParameterNullException {
		if (sizeOptions > 1) {
			// There are multiple options and 0 phrases, then show all options
			// and add a space at the end.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 7");
			if (newPhrase.length() > 0) {
				newPhrase += " ";
			}
			newPhrase += this.samePrefixForPhrases(answer.getOptions());
			setOfOptions = this.returnCandidates(answer.getOptions());
		} else if (sizeOptions == 1) {
			// There is just one option.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 8!");
			newPhrase += " ";
			if (answer.getOptions().get(0).isReservedWord()) {
				// The option is a reserved word, then concatenate the
				// option.
				ImplementationLexicalAnalyzer.LOGGER.debug("Case 8a");
				newPhrase += answer.getOptions().get(0).getToken();
				setOfOptions = new String[] {};
			} else {
				// The option is not a reserved word, then show option.
				ImplementationLexicalAnalyzer.LOGGER.debug("Case 8b");
				setOfOptions = this.returnCandidates(answer.getOptions());
			}
		} else if (sizeOptions == 0) {
			// There are not options nor phrases. This is due to a valid
			// command or an unknown or miswritten command.
			ImplementationLexicalAnalyzer.LOGGER.debug("Case 9!");
			// If the unknown command has a trailing space, it conserves it.
			if (endsWithSpace) {
				newPhrase += " ";
			}
			// The arrays are empty, no assistance.
			setOfOptions = new String[] {};
		}
		setOfPhrases = new String[] {};
		final ReturnOptions ret = new ReturnOptions(newPhrase, setOfPhrases,
				setOfOptions);
		return ret;
	}

	/**
	 * Process the answer of the syntactic analyzer and creates the arrays for
	 * the UI.
	 *
	 * @param newPhrase
	 *            User's phrase converted.
	 * @param endsWithSpace
	 *            If the phrase finishes with an empty space.
	 * @param tokens
	 *            List of tokens that represent the phrase.
	 * @param answer
	 *            Answer of the syntactic analyzer.
	 * @return List of candidates for the UI.
	 * @throws AbstractZemucanException
	 *             If there is a problem retrieving the options, phrases or
	 *             creating the arrays.
	 */
	private ReturnOptions processAnswer(final String newPhrase,
			final boolean endsWithSpace, final List<Token> tokens,
			final GraphAnswer answer) throws AbstractZemucanException {
		final int sizePhrases = answer.getPhrases().size();
		final int sizeOptions = answer.getOptions().size();
		final String[] setOfOptions = null;
		final String[] setOfPhrases = null;
		ReturnOptions ret = null;
		if (sizePhrases > 1) {
			ret = this.phraseMultiple(newPhrase, tokens, answer, sizeOptions,
					setOfOptions, setOfPhrases);
		} else if (sizePhrases == 1) {
			ret = this.phraseOne(newPhrase, tokens, answer, sizeOptions,
					setOfOptions, setOfPhrases);
		} else if (sizePhrases == 0) {
			ret = this.phraseZero(newPhrase, endsWithSpace, answer,
					sizeOptions, setOfOptions, setOfPhrases);
		}

		return ret;
	}

	/**
	 * Replaces the last token of the typed command.
	 *
	 * @param phrase
	 *            Current phrase.
	 * @param tokens
	 *            List of tokens that compose the phrase.
	 * @param prefix
	 *            Common prefix to the candidates.
	 * @return New phrase.
	 */
	String/* ! */replaceLastToken(final String/* ! */phrase,
			final List<Token>/* <!>! */tokens, final String/* ! */prefix) {
		assert phrase != null;
		assert tokens != null;
		boolean assertsEnabled = false;
		// Intentional side-effect!
		assert assertsEnabled = true;
		if (assertsEnabled) {
			for (final Token token : tokens) {
				assert token != null;
			}
		}
		assert prefix != null;

		final int lastIndexPhrase = tokens.size() - 1;
		// Retrieves the last token of the given phrase.
		final String lastTokenPhrase = tokens.get(lastIndexPhrase).getToken();
		// Search the beginning of last occurrence of the last token.
		final int lastTokenIndex = phrase.lastIndexOf(lastTokenPhrase);
		String newPhrase = "";
		if (lastIndexPhrase != 0) {
			// Creates a new phrase without the last token.
			newPhrase = phrase.substring(0, lastTokenIndex) + prefix;
		} else {
			newPhrase = prefix;
		}

		assert newPhrase != null;

		return newPhrase;
	}

	/**
	 * Converts the candidate's list in an array.
	 *
	 * @param candidates
	 *            List of candidates.
	 * @return Array of candidates.
	 */
	private String[]/* [!]! */returnCandidates(
			final List<Token>/* <!>! */candidates) {
		assert candidates != null;
		boolean assertsEnabled = false;
		// Intentional side-effect!
		assert assertsEnabled = true;
		if (assertsEnabled) {
			for (final Token token : candidates) {
				assert token != null;
			}
		}

		final int size = candidates.size();
		final String[] ret = new String[size];
		for (int i = 0; i < size; i += 1) {
			final Token token = candidates.get(i);
			ret[i] = token.getToken();
		}

		assert ret != null;
		// Intentional side-effect!
		assert assertsEnabled = true;
		if (assertsEnabled) {
			for (int i = 0; i < ret.length; i++) {
				assert ret[i] != null;
			}
		}

		return ret;
	}

	/**
	 * Calculates the prefix of a set of candidates. That means that it returns
	 * the common part of a set of
	 *
	 * @param tokens
	 *            List of phrase candidates.
	 * @return Prefix. Empty if there are not common part between phrases.
	 */
	private String/* ! */samePrefixForPhrases(final List<Token> /* ! */tokens) {
		assert tokens != null;
		boolean assertsEnabled = false;
		// Intentional side-effect!
		assert assertsEnabled = true;
		if (assertsEnabled) {
			for (final Token token : tokens) {
				assert token != null;
			}
		}

		final String example = tokens.get(0).getToken();
		final int sizePrefix = example.length();
		String prefix = "";
		boolean allSamePrefix = true;
		for (int i = 0; (i < sizePrefix) && allSamePrefix; i += 1) {
			final String temp = example.substring(0, i + 1);
			// Checks all phrases against the example.
			for (final Token token : tokens) {
				allSamePrefix &= token.getToken().startsWith(temp);
			}
			// All phrases starts with the same prefix.
			if (allSamePrefix) {
				prefix = temp;
			}
		}

		assert prefix != null;

		return prefix;
	}
}