package tools;

/** Tokenizer that works for programming languges with comments. */
class Tokenizer(in: Iterator[char], delimiters: String) extends Iterator[String] {

  /** End of input */
  val EOI: char = 0;

  /** Is ch a comment start character? */
  def isComment(ch: char): Boolean = {
    ch == '%'
  }

  /** Update ch to point to the next character. */
  def nextChar(): Unit = {
    ch = fetchChar();
  }

  /** Fetch the next character, if any, from the input. */
  private def fetchChar() = 
    if (in.hasNext) {
      in.next
    } else {
      EOI
    }

  /** The next character examined. */
  private var ch: char = fetchChar();

  /** Is ch a delimiter? */
  def isDelimiter(ch: Char) = {
      var i = 0;
      while (i < delimiters.length() && delimiters.charAt(i) != ch) {
        i = i + 1
      }
      i < delimiters.length()
    }

  /** Does the iteration have any more strings to return? */
  def hasNext: boolean = ch != EOI;

  /** A buffer for accumulating the next string. */
  private val buf = new StringBuffer;

  /** Return the next string. */
  def next: String = {
    while (ch != EOI && (ch <= ' ' || isComment(ch))) {
      // eat white space
      if (ch <= ' ') {
        nextChar()
      } else {
        // eat EOPL-style comments
        if (isComment(ch)) {
          while (ch != EOI || ch != '\n') nextChar();
        }
      }
    }
    if (ch == EOI) {
      ""
    } else if (isDelimiter(ch)) {
        val ret = ch.toString();
        nextChar();
        ret
    } else {
        buf.setLength(0); 
        buf append ch;
        nextChar();
        while (ch > ' ' && ch != EOI && !isDelimiter(ch) && !isComment(ch)) {
          buf append ch;
          nextChar();
        }
        buf.toString()
    }
  }
}

/** Testing for the Tokenizer. */
object TokenizerTest {
  def main(args: Array[String]) = {
    val input = " +  (i, add1(v, x))  342  % a comment\n 541 + ( sub1 \n ( x) )\n";
    val tokens: Iterator[String] = new Tokenizer(input.elements, "(),;");
    for (val s: String <- tokens)
      Console.println(s);
  }
}
