SyntacticAnalysis.java (7486B)
1 import java.io.File; 2 import java.io.FileNotFoundException; 3 import java.util.Scanner; 4 import java.util.StringTokenizer; 5 6 public class SyntacticAnalysis { 7 public static class Parser { 8 private StringBuilder buffer; 9 private LexicalAnalyzer lexer = new LexicalAnalyzer(); 10 11 private class LexicalAnalyzer { 12 // Only read tokens that *must* be read 13 public String read() { 14 if (buffer.length() == 0) return ""; 15 16 // Lexeme will be from beginning of buffer (location of current token) to where whitespace is 17 int i = buffer.indexOf(" "); 18 String lexeme = buffer.substring(0, i); 19 20 // Delete lexeme from buffer since it is now stored in variable 21 buffer.delete(0, i + 1); 22 23 //Print what tokens are read for debugging purposes 24 //System.out.println(lexeme); 25 26 return lexeme; 27 } 28 29 /* Peek without deleting when choice is an option 30 * E.g., A factor token can be identifier, integer constant, or expression 31 */ 32 public String lookAhead() { 33 int i = buffer.indexOf(" "); 34 String lexeme = buffer.substring(0, i); 35 36 return lexeme; 37 } 38 } 39 40 public Parser(File file) { 41 Scanner sc = null; 42 43 try { 44 sc = new Scanner(file); 45 buffer = new StringBuilder((int)file.length()); 46 47 while(sc.hasNext()) { 48 String token = sc.next(); 49 50 // Since token character sequence may contain characters such as ()+-*/=; 51 // We need to further break down token to separate special character and rest 52 // E.g., if(sum1<sum2) token will be further tokenized into if ( sum1 < sum2 ) 53 // This way spacing doesn't matter 54 StringTokenizer tokens = new StringTokenizer(token, "()+-*/=;", true); 55 56 while (tokens.hasMoreTokens()) 57 buffer.append(tokens.nextToken() + " "); 58 } 59 } catch (FileNotFoundException fnfe) { 60 System.out.print("File not found."); 61 fnfe.printStackTrace(); 62 } finally { 63 sc.close(); 64 } 65 } 66 67 // Returns program() since <program> -> program begin <statement_list> end is checked first 68 public boolean isValidCode() { 69 return program(); 70 } 71 72 private boolean program() { 73 // Every program must have program followed by begin 74 if (!lexer.read().toLowerCase().equals("program")) return false; 75 if (!lexer.read().toLowerCase().equals("begin")) return false; 76 77 if (!statementList()) return false; 78 79 // Every program must finish with end keyword 80 if (!lexer.read().toLowerCase().equals("end")) return false; 81 82 return true; 83 } 84 85 private boolean statementList() { 86 // At least one statement must be present in a statement list 87 if (!statement()) return false; 88 89 // Semicolon must be present if additional statements follow 90 while (lexer.lookAhead().equals(";")) { 91 lexer.read(); 92 93 if (!statement()) return false; 94 } 95 96 return true; 97 } 98 99 private boolean statement() { 100 // Check for assignment statement 101 // Not using variable() method because we do not want to delete token 102 if (lexer.lookAhead().matches("[a-zA-Z][a-zA-Z0-9]*") && !isReservedWord(lexer.lookAhead())) { 103 if (!assignmentStatement()) return false; 104 } else if (lexer.lookAhead().equals("if")) { // Check for if statement 105 if (!ifStatement()) return false; 106 } else if (lexer.lookAhead().equals("loop")) { // Check for loop statement 107 if (!loopStatement()) return false; 108 } else { 109 return false; // Can't be a statement if not any of the above 110 } 111 112 return true; 113 } 114 115 // <assignment_statement> -> <var> = <expr> 116 private boolean assignmentStatement() { 117 if (!variable()) return false; 118 if (!lexer.read().equals("=")) return false; 119 if (!expression()) return false; 120 121 return true; 122 } 123 124 // <if_statement> -> if (<logic_expr>) then <stmnt> 125 private boolean ifStatement() { 126 if (!lexer.read().equals("if")) return false; 127 if (!lexer.read().equals("(")) return false; 128 if (!logicExpression()) return false; 129 if (!lexer.read().equals(")")) return false; 130 if (!lexer.read().equals("then")) return false; 131 if (!statement()) return false; 132 133 return true; 134 } 135 136 // <loop_statement> -> loop (<logic_expr>) <statement> 137 private boolean loopStatement() { 138 if (!lexer.read().equals("loop")) return false; 139 if (!lexer.read().equals("(")) return false; 140 if (!logicExpression()) return false; 141 if (!lexer.read().equals(")")) return false; 142 if (!statement()) return false; 143 144 return true; 145 } 146 147 // <expr> -> <term> {(+|-) <term>} 148 private boolean expression() { 149 if (!term()) return false; 150 151 // Since + is a regex key symbol it must be escaped 152 while (lexer.lookAhead().matches("\\+|-")) { 153 lexer.read(); 154 155 if (!term()) return false; 156 } 157 158 return true; 159 } 160 161 // <logic_expr> -> <var> {(<|>) <var>} 162 private boolean logicExpression() { 163 if (!variable()) return false; 164 if (!(lexer.lookAhead().matches("<|>"))) return false; 165 166 lexer.read(); 167 168 if (!variable()) return false; 169 170 return true; 171 } 172 173 // <term> -> <factor> {(*|/) <factor>} 174 private boolean term() { 175 if (!factor()) return false; 176 177 // * is also a key symbol in regex 178 while (lexer.lookAhead().matches("\\*|/")) { 179 lexer.read(); 180 181 if (!factor()) return false; 182 } 183 184 return true; 185 } 186 187 // <factor> -> <identifier> | <int_const> | <expr> 188 private boolean factor() { 189 if (!identifier()) { 190 if (!integerConstant()) { 191 if (!lexer.lookAhead().equals("(")) return false; 192 193 lexer.read(); 194 195 if (!expression()) return false; 196 197 if (!lexer.lookAhead().equals(")")) return false; 198 199 lexer.read(); 200 } 201 202 return true; 203 } 204 205 return true; 206 } 207 208 // <var> -> <identifier> 209 private boolean variable() { 210 if (!identifier()) return false; 211 212 return true; 213 } 214 215 private boolean identifier() { 216 // Regular expression to determine valid identifier 217 // [a-zA-Z] means starts with any letter 218 // [a-zA-Z0-9]* means any amount of alphanumeric characters (0 - infinity) 219 if (!lexer.lookAhead().matches("[a-zA-Z][a-zA-Z0-9]*") && !isReservedWord(lexer.lookAhead())) return false; 220 221 lexer.read(); 222 223 return true; 224 } 225 226 // A keyword is a word that cannot be used as an identifier 227 // This method is just as a safety measure and should be a feature of code parsers 228 private boolean isReservedWord(String identifier) { 229 // Regex will recognize these as valid identifiers when they really aren't 230 final String[] reservedWords = {"program", "begin", "end", "if", "loop"}; 231 232 for (String rw: reservedWords) { 233 // Identifier is invalid if it is any of the above 234 if (identifier.toLowerCase().equals(rw)) return true; 235 } 236 237 return false; 238 } 239 240 // Maintaining consistency with methods for most symbols -- doesn't really need own method 241 private boolean integerConstant() { 242 // A regex to detect if string has only digits (i.e. is integer constant) 243 // \\d is escape sequence for digit and .* means any amount 244 if (!lexer.lookAhead().matches(".*\\d.*")) return false; 245 246 lexer.read(); 247 248 return true; 249 } 250 } 251 252 // Test method 253 public static void main(String[] args) { 254 Scanner sc = new Scanner(System.in); 255 256 System.out.print("\nEnter file name: "); 257 String name = sc.next(); 258 259 File file = new File(name); 260 261 Parser parser = new Parser(file); 262 263 if (parser.isValidCode()) 264 System.out.println("\nThere are no syntax errors in the program.\n"); 265 else 266 System.out.println("\nThe program contains one or more syntax errors.\n"); 267 } 268 }