I am trying to build a compiler using Java Code. I am stuck at the Lexical Analyzer Stage and am not getting the output I want.
The Code:
1)Lexical Analyzer
public class LexicalAnalyzer {
int lexemebegin;
int state;
Symbol tokenValue;
TokenType currentTokenType;
String Shura;
int lineNumber;
int position=-1;
String lexeme;
SymbolTable Symb;
Scanner scan;
public LexicalAnalyzer(SymbolTable hash)
{
Symb=hash;
}
public TokenType nextToken()
{
char c;
state = 0;
c = nextChar();
while (true)//there is a token
{
switch (state)
{
case 0: if (c=='<')
state=1;
else
if(c=='=')
{
tokenValue = Symb.get("=");
return currentTokenType = TokenType.EQ;
}
else
if(c=='>')
state=6;
else
fail();
break;
case 1:
{
c = nextChar ( );
if (c=='=')
{
tokenValue = Symb.get("<=");
return currentTokenType = TokenType.GE;
}
else
if (c=='>')
{
tokenValue = Symb.get("<>");
return currentTokenType = TokenType.NE;
}
else
{
retract();
tokenValue= Symb.get("<");
return currentTokenType = TokenType.LT;
}
}
case 6:
{
c = nextChar ( );
if (c=='=')
{
tokenValue= Symb.get(">=");
return currentTokenType = TokenType.GE;
}
else
{
retract();
tokenValue = Symb.get(">");
return currentTokenType = TokenType.GT;
}
}
case 9: if ((c>='A' && c<='Z')||(c>='a' && c<='z'))
{
lexeme = "" +c;
state = 10;
}
else
fail();
break;
case 10:
{
c = nextChar();
while ((c>='A' && c<='Z')||(c>='a' && c<='z')||(c>='0' && c<='9'))
{
lexeme = lexeme + c;
c = nextChar();
}
//retract();
if (Symb.containsKey(lexeme))
tokenValue = Symb.get(lexeme);
else
{
tokenValue=new Symbol(TokenType.ID,lineNumber,position,lexeme);
Symb.put(lexeme, tokenValue);
}
return currentTokenType= TokenType.ID;
}
case 12:
if (c==':')
state=13;
else
if (c==';')
{
tokenValue = Symb.get(";");
return currentTokenType = TokenType.SCOLON;
}
else
if (c=='.')
{
tokenValue = Symb.get(".");
return currentTokenType = TokenType.PERIOD;
}
else
if (c==',')
{
tokenValue = Symb.get(",");
return currentTokenType = TokenType.COMMA;
}
else
if (c=='(')
{
tokenValue = Symb.get("(");
return currentTokenType = TokenType.OP;
}
else
if (c==')')
{
tokenValue = Symb.get(")");
return currentTokenType = TokenType.CP;
}
else
fail();
break;
case 13:
{
c = nextChar();
if (c=='=')
{
tokenValue = Symb.get(":"+c);
return currentTokenType = TokenType.ASSIGN;
}
else
{
retract();
tokenValue= Symb.get(":");
return currentTokenType = TokenType.COLON;
}
}
case 21:
if (c=='+')
{
tokenValue = Symb.get("+");
return currentTokenType = TokenType.PLUS;
}
else
if (c=='-')
{
tokenValue = Symb.get("-");
return currentTokenType = TokenType.MINUS;
}
else
if (c=='*')
{
tokenValue = Symb.get("*");
return currentTokenType = TokenType.MULT;
}
else
if (c=='/')
{
tokenValue = Symb.get("/");
return currentTokenType = TokenType.DIV;
}
else
fail();
break;
case 26:
if (c>='0' && c<='9')
{
lexeme = "" + c;
state = 27;
}
else
fail();
break;
case 27:
{
c = nextChar();
while (c>='0' && c<='9')
{
lexeme = lexeme + c;
c = nextChar();
}
retract();
if (Symb.containsKey(lexeme))
tokenValue = Symb.get(lexeme);
else
{
tokenValue=new Symbol(TokenType.NUM,lineNumber,position,lexeme);
Symb.put(lexeme, tokenValue);
}
return currentTokenType = TokenType.NUM;
}
case 29:
if(c==' ')
state = 30;
else
return currentTokenType = TokenType.DELIM;
break;
case 30:
{
c = nextChar();
while (c==' ')
c = nextChar();
state = 0;
break;
}
}
}
}
//îçæéø àú òøê äèå÷ï
public Symbol GetTokenValue()
{
return this.tokenValue;
}
//÷åøà ùåøåú åî÷ãí àåúï
public char nextChar()
{
position++;
if (position >= Shura.length())//end on line
if(scan.hasNext())
{
Shura=scan.nextLine();
lineNumber++;
position=0;
return Shura.charAt(position);
}
else // end of file
{
currentTokenType = TokenType.FINISH;
return (' ');
}
else //middle of line
return Shura.charAt(position);
}
//î÷èéï àú äîé÷åí á1
public void retract()
{
position--;
}
//îùðä àú äòøê ùì äñèééè
public void fail()
{
switch (state)
{
case 0: state=9; break;
case 9: state=12; break;
case 12: state=21; break;
case 21: state=26; break;
case 26: state=29; break;
}
}
//îãôéñ àú äèå÷ï
public void PrintToken()
{
System.out.println(tokenValue.value.toString());
}
public void inputFileName(String fname)
{
try
{
scan=new Scanner(new File(fname));
Shura=scan.nextLine();
}
catch (Exception e)
{
System.out.println (e.getMessage());
}
}
}
2)Symbol Table:
enum TokenType {NUMBER, RESERVEDWORD, ID, LT, GT, EQ, LE, GE, NE, OP, CP, ASSIGN, PLUS, MINUS, MULT, DIV, DELIM, COMMA, COLON, PERIOD, SCOLON, NUM, FINISH}
public class SymbolTable
{
Hashtable <String, Symbol> hash;
public Symbol get (String str)
{
return hash.get(str);
}
public boolean containsKey(String lexeme)
{
return hash.containsKey(lexeme);
}
public void put (String lexeme,Symbol sym)
{
hash.put(lexeme, sym);
}
public SymbolTable ( )
{
hash = new Hashtable <String,Symbol> ();
//àéúçåì èáìú äòøáåì
Symbol semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "program");
hash.put("program", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "var");
hash.put("var", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "begin");
hash.put("begin", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "end");
hash.put("end", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "if");
hash.put("if", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "then");
hash.put("then", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "else");
hash.put("else", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "while");
hash.put("while", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "do");
hash.put("do", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "integer");
hash.put("integer", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "read");
hash.put("read", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "write");
hash.put("write", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "or");
hash.put("or", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "and");
hash.put("and", semel);
semel = new Symbol(TokenType.RESERVEDWORD, 0, 0, "not");
hash.put("not", semel);
semel = new Symbol(TokenType.ASSIGN, 0, 0, ":=");
hash.put(":=", semel);
semel = new Symbol(TokenType.SCOLON, 0, 0, ";");
hash.put(";", semel);
semel = new Symbol(TokenType.PERIOD, 0, 0, ".");
hash.put(".", semel);
semel = new Symbol(TokenType.COMMA, 0, 0, ",");
hash.put(",", semel);
semel = new Symbol(TokenType.OP, 0, 0, "(");
hash.put("(", semel);
semel = new Symbol(TokenType.CP, 0, 0, ")");
hash.put(")", semel);
semel = new Symbol(TokenType.COLON, 0, 0, ":");
hash.put(":", semel);
semel = new Symbol(TokenType.LT, 0, 0, "<");
hash.put("<", semel);
semel = new Symbol(TokenType.GT, 0, 0, ">");
hash.put(">", semel);
semel = new Symbol(TokenType.LE, 0, 0, "<=");
hash.put("<=", semel);
semel = new Symbol(TokenType.GE, 0, 0, ">=");
hash.put(">=", semel);
semel = new Symbol(TokenType.EQ, 0, 0, "=");
hash.put("=", semel);
semel = new Symbol(TokenType.NE, 0, 0, "<>");
hash.put("<>", semel);
semel = new Symbol(TokenType.PLUS, 0, 0, "+");
hash.put("+", semel);
semel = new Symbol(TokenType.MINUS, 0, 0, "-");
hash.put("-", semel);
semel = new Symbol(TokenType.MULT, 0, 0, "*");
hash.put("*", semel);
semel = new Symbol(TokenType.DIV, 0, 0, "/");
hash.put("/", semel);
}
}
3) Class Symbol:
class Symbol
{
TokenType sym; //token type
int lineNumber;
int position;
Object value; //semantic value of token
Symbol (TokenType symb, int ln, int pos, Object val)
{
sym = symb;
lineNumber = ln;
position = pos;
value = val;
}
}
4)Main Class:
public class Run
{
public static void main(String[] args)
{
try
{
Scanner input = new Scanner (System.in);
String fname;
SymbolTable St = new SymbolTable();
LexicalAnalyzer la = new LexicalAnalyzer(St);
System.out.println("Type in the file name you wish to Compile");
fname = input.nextLine();
la.inputFileName(fname);
TokenType currentToken=la.nextToken();
while(currentToken != TokenType.FINISH)
{
la.PrintToken();
System.out.println();
currentToken=la.nextToken();
}
}
catch (Exception e)
{
System.out.println(e.getMessage());
}
}
}
The results of the code when run on a text file has a few words repeating themselves. And it doesn't stop running.
If anyone can give me a little direction on where to continue from here I would greatly appreciate it.
Thank You!!