I am having some problems with my java code, i am trying to write a C compiler and am currently working on developing a string tokenizer for the C programming language using java regex. my problem is first my program wont recognize multiline comments and second i cant figure out how to implement a correct order of operations (for example if i have the number 5.5 in my C code it stores both 5.5 as a float and stores 5 as an int). Ive been working on this for a couple days and cant seem to figure it out, here is my code for the tokenizer:
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class tokenizer
{
static int count;
public static void main(String[] args)
{
Scanner scan = new Scanner(System.in);
String file;
System.out.println("Enter file name:");
file = scan.nextLine();
try
{
FileInputStream fstream = new FileInputStream(file);
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String line;
Pattern pint = Pattern.compile("int");
Pattern pd = Pattern.compile("double");
Pattern pc = Pattern.compile("char");
Pattern pif = Pattern.compile("if");
Pattern pw = Pattern.compile("while");
Pattern pv = Pattern.compile("void");
Pattern plb = Pattern.compile("\\{");
Pattern prb = Pattern.compile("\\}");
Pattern plp = Pattern.compile("\\(");
Pattern prp = Pattern.compile("\\)");
Pattern plr = Pattern.compile("\\[");
Pattern prr = Pattern.compile("\\]");
Pattern ps = Pattern.compile(";");
Pattern pp = Pattern.compile("\\+");
Pattern pm = Pattern.compile("-");
Pattern pt = Pattern.compile("\\*");
Pattern pe = Pattern.compile("=");
Pattern p = Pattern.compile("\\.");
Pattern pa = Pattern.compile("&&");
Pattern po = Pattern.compile("\\|\\|");
Pattern pn = Pattern.compile("!");
Pattern pg = Pattern.compile(">");
Pattern pl = Pattern.compile("<");
Pattern pge = Pattern.compile(">=");
Pattern ple = Pattern.compile("<=");
Pattern prs = Pattern.compile(">>");
Pattern pls = Pattern.compile("<<");
Pattern ptr = Pattern.compile("->");
Pattern per = Pattern.compile("%");
Pattern pne = Pattern.compile("!=");
Pattern pnu = Pattern.compile("[0-9]+");
Pattern pfu = Pattern.compile("[-+]?[0-9]*\\.[0-9]+");
Pattern pcm = Pattern.compile("//.*");
Pattern pmc = Pattern.compile("/\\*(?:.|[\\n\\r])*?\\*/");
Pattern pvar = Pattern.compile("[a-z]([a-z]|[0-1]|_)*");
for(count = 1;(line = br.readLine()) != null; count ++)
{
tokenizeProgram(pcm, line);
tokenizeProgram(pmc, line);
tokenizeProgram(pint, line);
tokenizeProgram(pd, line);
tokenizeProgram(pc, line);
tokenizeProgram(pif, line);
tokenizeProgram(pw, line);
tokenizeProgram(pv, line);
tokenizeProgram(ps, line);
tokenizeProgram(pp, line);
tokenizeProgram(pm, line);
tokenizeProgram(pt, line);
tokenizeProgram(pe, line);
tokenizeProgram(p, line);
tokenizeProgram(pa, line);
tokenizeProgram(po, line);
tokenizeProgram(pn, line);
tokenizeProgram(pg, line);
tokenizeProgram(pl, line);
tokenizeProgram(pge, line);
tokenizeProgram(ple, line);
tokenizeProgram(prs, line);
tokenizeProgram(pls, line);
tokenizeProgram(ptr, line);
tokenizeProgram(per, line);
tokenizeProgram(pne, line);
tokenizeProgram(pfu, line);
tokenizeProgram(pnu, line);
tokenizeProgram(pvar, line);
tokenizeProgram(plb, line);
tokenizeProgram(prb, line);
tokenizeProgram(plp, line);
tokenizeProgram(prp, line);
tokenizeProgram(plr, line);
tokenizeProgram(prr, line);
}
fstream.close();
in.close();
br.close();
}catch (Exception e)
{
System.err.println("Error: " + e.getMessage());
}
}
public static Vector<?> tokenizeProgram(Pattern pattern, String str)
{
Vector<Object> tokens = new Vector<Object>();
Matcher matcher = pattern.matcher(str);
while(matcher.find())
{
tokens.add(new Tokens(matcher.group(), pattern, count));
System.out.println(((Tokens) tokens.lastElement()).toString());
}
return tokens;
}
}
and Tokens:
import java.util.regex.Pattern;
public class Tokens
{
String token;
Pattern name;
int line;
public Tokens(String t, Pattern pattern, int l)
{
token = t;
name = pattern;
line = l;
}
public String getToken()
{
return token;
}
public String getTokenName()
{
if(name.toString().equals("int"))
return "INT";
else if(name.toString().equals("double"))
return "DOUBLE";
else if(name.toString().equals("char"))
return "CHAR";
else if(name.toString().equals("boolean"))
return "BOOLEAN";
else if(name.toString().equals("if"))
return "IF";
else if(name.toString().equals("while"))
return "WHILE";
else if(name.toString().equals("void"))
return "VOID";
else if(name.toString().equals("\\{"))
return "L_BRACE";
else if(name.toString().equals("\\}"))
return "R_BRACE";
else if(name.toString().equals("\\("))
return "L_PARA";
else if(name.toString().equals("\\)"))
return "R_PARA";
else if(name.toString().equals("\\["))
return "L_BRACKET";
else if(name.toString().equals("\\]"))
return "R_BRACKET";
else if(name.toString().equals(";"))
return "SEMI";
else if(name.toString().equals("\\+"))
return "PLUS";
else if(name.toString().equals("-"))
return "MINUS";
else if(name.toString().equals("\\*"))
return "MULT";
else if(name.toString().equals("="))
return "EQ";
else if(name.toString().equals("\\."))
return "PERIOD";
else if(name.toString().equals("&&"))
return "AND";
else if(name.toString().equals("\\|\\|"))
return "OR";
else if(name.toString().equals("!"))
return "NOT";
else if(name.toString().equals(">"))
return "GT";
else if(name.toString().equals("<"))
return "LT";
else if(name.toString().equals(">="))
return "GT_EQ";
else if(name.toString().equals("<="))
return "LT_EQ";
else if(name.toString().equals(">>"))
return "RT_SHIFT";
else if(name.toString().equals("<<"))
return "LFT_SHIFT";
else if(name.toString().equals("->"))
return "PTR";
else if(name.toString().equals("%"))
return "PERCENT";
else if(name.toString().equals("!="))
return "NOT_EQ";
else if(name.toString().equals("[0-9]+"))
return "INT_NUM";
else if(name.toString().equals("[-+]?[0-9]*\\.[0-9]+"))
return "FLOAT_NUM";
else if(name.toString().equals("//.*"))
return "SL_COMMENT";
else if(name.toString().equals("/\\*(?:.|[\\n\\r])*?\\*/"))
return "ML_COMMENT";
else if(name.toString().equals("[a-z]([a-z]|[0-1]|_)*"))
return "ID";
else
return "Trash";
}
public int getLineNumber()
{
return line;
}
public String toString()
{
return "(" + getToken() + ", " + getTokenName() + ", " + getLineNumber() + ")";
}
}