I. Overview of Symbol Tables and Declaration Checking A. Goals ------------------------------------------ GOALS OF DECLARATION CHECKING Check that: 1. Each declaration has a 2. Each use of an identifier is ------------------------------------------ What should be done if the programming language doesn't have declarations? B. What is a symbol table 1. definition and data structures ------------------------------------------ SYMBOL TABLE def: a *symbol table* is Data Structures: ------------------------------------------ What kind of data structures/algorithms could be used for a symbol table? Which is easiest to implement? When used to check a program, which is more common: adding/inserting a new mapping (name, attributes) or looking up the attributes of a name? 2. Attributes ------------------------------------------ ATTRIBUTES def: an *attribute* is Examples of attributes: ------------------------------------------ a. operations ------------------------------------------ OPERATIONS FOR SYMBOL TABLES void initialize(); int size(); bool full(); bool defined(name); id_use *lookup(name); bool defined_in_current_scope(name); void insert(name, id_attrs); void enter_scope(); void leave_scope(); ------------------------------------------ 3. interaction with nested scopes a. A program can have multiple potential scopes ------------------------------------------ SYMBOL TABLES AND POTENTIAL SCOPES def: a declaration's *scope* is def: a *potential scope* is Grammar of a PL/0 program ::= . ::= { } ::= procedure ; ; Example: procedure p; var x; x := 2; begin x := 1; # is this use declared? call p end. Example: What's the final value of x? var x; procedure p; var x; x := 2; procedure q; x := 3; begin x := 1; call q; call p; end. ------------------------------------------ Is a block a potential scope? What is the final value of the global x in this program? b. strategies for nested scopes ------------------------------------------ STRATEGIES FOR NESTED SCOPES Needs: Approaches: - [Active Deletion] - [Stack-Based] - [Functional] - [Decorations in AST] ------------------------------------------ II. Declaration Checking A. goals ------------------------------------------ GOALS OF DECLARATION CHECKING Check that: 1. Each declaration has a unique name (in its potential scope) 2. Each use of an identifier is for a name that has already been declared (in a surrounding scope) ------------------------------------------ B. process ------------------------------------------ PROCESS OF DECLARATION CHECKING In each potential scope: In each statement, expression: ------------------------------------------ 1. example using the FLOAT language ------------------------------------------ EXAMPLE: FLOAT LANGUAGE See: http://www.cs.ucf.edu/~leavens/COP3402/ example-code/index.html#FloatCalc ------------------------------------------ a. ASTs ------------------------------------------ ASTs for the FLOAT language (1/3) // file ast.h typedef struct { file_location *file_loc; void *next; // for lists } generic_t; // empty ::= typedef struct { file_location *file_loc; } empty_t; // ident typedef struct ident_s { file_location *file_loc; struct ident_s *next; // for lists const char *name; } ident_t; // floating point numbers typedef struct { file_location *file_loc; const char *text; word_type value; } number_t; // tokens as ASTs typedef struct { file_location *file_loc; const char *text; int code; } token_t; // tokens as ASTs typedef struct { file_location *file_loc; const char *text; int code; } token_t; // kinds of expressions typedef enum { expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // arithOp ::= + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr arithOp expr | ident | number typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; // ... ------------------------------------------ ------------------------------------------ ASTs for the FLOAT language (2/3) // file ast.h (continued) // idents typedef struct { file_location *file_loc; ident_t *idents; } idents_t; // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType ident typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // var-decls ::= { varDecl } typedef struct { file_location *file_loc; var_decl_t *var_decls; } var_decls_t; // kinds of statements typedef enum { assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmts } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ifStmt ::= if expr stmt typedef struct { file_location *file_loc; expr_t expr; struct stmt_s *body; } if_stmt_t; // readStmt ::= read ident typedef struct { file_location *file_loc; const char *name; } read_stmt_t; // writeStmt ::= write expr typedef struct { file_location *file_loc; expr_t expr; } write_stmt_t; // stmt ::= assignStmt | beginStmt // | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; ------------------------------------------ ------------------------------------------ ASTs for the FLOAT language (3/3) // file ast.h // ... // program ::= varDecls stmt typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; // The AST type used by bison typedef union AST_u { generic_t generic; var_decls_t var_decls; var_decl_t var_decl; idents_t idents; stmt_t stmt; assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; stmts_t stmts; expr_t expr; binary_op_expr_t binary_op_expr; token_t token; number_t number; ident_t ident; empty_t empty; } AST; // file parser_types.h // ... typedef AST YYSTYPE; // ... ------------------------------------------ b. attributes (type id_attrs) ------------------------------------------ THE TYPE ID_ATTRS FOR ATTRIBUTES // file id_attrs.h // attributes of identifiers typedef struct { file_location file_loc; var_type_e var_type; // num. of variable decls before // this one in this scope unsigned int offset_count; } id_attrs; // Allocate and return an id_attrs struct // with field file_loc set to floc, // var_type set to vt, // and offset_count set to ofst_cnt. // This should never return NULL. extern id_attrs *id_attrs_loc_create( file_location floc, var_type_e vt, unsigned int ofst_cnt); ------------------------------------------ c. symbol table ------------------------------------------ SYMBOL TABLE API // file symtab.h // Maximum nesting of potential scopes #define MAX_NESTING 100 // initialize the symbol table extern void symtab_initialize(); // Return the number of scopes // currently in the symbol table. extern unsigned int symtab_size(); // Return the current scope's // count of variables declared extern unsigned int symtab_scope_loc_count(); // Return the current scope's size // (the number of declared ids). extern unsigned int symtab_scope_size(); // Is the current scope full? extern bool symtab_scope_full(); // Return the current nesting level // (num. of symtab_enter_scope() calls // - num. of symtab_leave_scope() calls extern unsigned int symtab_current_nesting_level(); // Is the symbol table itself full? extern bool symtab_full(); // Is name declared? // (this looks back through all scopes) extern bool symtab_declared(const char *name); // Is name declared in the current scope? // (this only looks in the current scope) extern bool symtab_declared_in_current_scope( const char *name); // Requires: attrs != NULL && // !symtab_declared_in_current_scope(name) // Add an association from the given name // to the given attributes extern void symtab_insert( const char *name, id_attrs *attrs); // Requires: !symtab_full() // Start a new scope (for a block) extern void symtab_enter_scope(); // Requires: !symtab_empty() extern void symtab_leave_scope(); // If name is declared, return // an id_use pointer for it, otherwise // return NULL if name isn't declared extern id_use *symtab_lookup( const char *name); ------------------------------------------ d. identifier uses (type id_use) ------------------------------------------ THE TYPE ID_USE // file id_use.h // An id_use struct gives information from // looking up name in the symbol table: // the attributes (id_attrs *), // and the number of lexical levels out // from the current scope, // where the name was declared. typedef struct { id_attrs *attrs; unsigned int levelsOutward; } id_use; // Requires: attrs != NULL // Allocate and return an id_use struct // containing attrs and levelsOut. extern id_use *id_use_create( id_attrs *attrs, unsigned int levelsOut); ------------------------------------------ e. scope mappings (type scope_t) ------------------------------------------ SCOPE MAPPING API // file scope.h // Maximum number of declarations // that can be stored in a scope #define MAX_SCOPE_SIZE 4096 typedef struct { const char *id; id_attrs *attrs; } scope_assoc_t; // Invariant: 0 <= size < MAX_SCOPE_SIZE; typedef struct scope_s { unsigned int size; // num. of associations in this scope unsigned int loc_count; scope_assoc_t *entries[MAX_SCOPE_SIZE]; } scope_t; // Allocate and return a fresh scope_t // (Bail with an error if no space) extern scope_t *scope_create(); // The num. of non-procedure // declartions that have been added to s extern unsigned int scope_loc_count( scope_t *s); // Return the number of declared identifier // associations in s extern unsigned int scope_size(scope_t *s); // Is the current scope full? extern bool scope_full(scope_t *s); // Is the given name declared in s? extern bool scope_declared(scope_t *s, const char *name); // Requires: attrs != NULL && // !scope_declared(name); // Add an association from name to attrs, // store the next_loc_offset value into // attrs->loc_offset, then increase // the next_loc_offset for s by 1. extern void scope_insert(scope_t *s, const char *name, id_attrs *attrs); // Return (a pointer to) the attributes // of the given name in s // or NULL if name is not declared in s extern id_attrs *scope_lookup(scope_t *s, const char *name); ------------------------------------------ f. Building the symbol table i. programs ------------------------------------------ BUILDING THE SYMBOL TABLE Abstract Syntax of FLOAT: program ::= varDecls stmt // in file ast.h typedef struct block_s { file_location *file_loc; var_decls_t var_decls; stmt_t stmt; } program_t; // In file scope_check.c // ... #include "ast.h" #include "symtab.h" // ... // Build the symbol table for prog // and check for duplicate declarations // or uses of undeclared identifiers void scope_check_program(program_t prog) { symtab_enter_scope(); scope_check_varDecls(prog.var_decls); scope_check_stmt(prog.stmt); symtab_leave_scope(); } ------------------------------------------ ------------------------------------------ WALKING THE ASTS: UNPARSER VS. SCOPE_CHECK // file unparser.c // ... #include "ast.h" // ... // Unparse the given program AST // with output going to the file out void unparseProgram(FILE *out, program_t prog) { unparseVarDecls(out, prog.var_decls, 0); unparseStmt(out, prog.stmt, 0); } // In file scope_check.c // ... #include "ast.h" #include "symtab.h" // ... // Build the symbol table for prog // and check for duplicate declarations // or uses of undeclared identifiers void scope_check_program(program_t prog) { symtab_enter_scope(); scope_check_varDecls(prog.var_decls); scope_check_stmt(prog.stmt); symtab_leave_scope(); } ------------------------------------------ Why are both of these similar? ii. variable declarations (1). handling the lists ------------------------------------------ LISTS OF VARIABLE DECLARATIONS // file unparser.c // ... // Unparse the var_decls_t vds to out // with the given nesting level // (note that the list is empty, // then nothing is printed) void unparseVarDecls(FILE *out, var_decls_t vds, int level) { var_decl_t *vdp = vds.var_decls; while (vdp != NULL) { unparseVarDecl(out, *vdp, level); vdp = vdp->next; } } // file scope_check.c // build the symbol table // and check the declarations in vds void scope_check_varDecls(var_decls_t vds) { } ------------------------------------------ (2). handling each variable declaration ------------------------------------------ INDIVIDUAL VARIABLE DECLARATIONS (1/2) // Abstract syntax: // varDecl ::= varType idents // varType ::= float | bool // file ast.h // ... // varType ::= float | bool typedef enum {float_type, bool_type } var_type_e; // varDecl ::= varType ident typedef struct var_decl_s { file_location *file_loc; struct var_decl_s *next; // for lists var_type_e var_type; idents_t idents; } var_decl_t; // file unparser.c // ... // Unparse a single var-decl, vd, to out, // indented for the given nesting level void unparseVarDecl(FILE *out, var_decl_t vd, int level) { indent(out, level); fprintf(out, "%s ", vt2str(vd.var_type)); unparseIdents(out, vd.idents); semiAndNewline(out); } // Add declarations for the names in vd, // reporting duplicate declarations void scope_check_varDecl(var_decl_t vd) { } ------------------------------------------ ------------------------------------------ INDIVIDUAL VARIABLE DECLARATIONS (2/2) // file unparser.c // ... // unparse the identififers in idents, // separated by commas, to out void unparseIdents(FILE *out, idents_t idents) { ident_t *idp = idents.idents; bool printed_any = false; while (idp != NULL) { if (printed_any) { fprintf(out, ", "); } unparseIdent(out, *idp); printed_any = true; idp = idp->next; } } // in scope_check.c // ... // Add declarations for the names in ids // to current scope as type vt // reporting any duplicate declarations void scope_check_idents(idents_t ids, var_type_e vt) { } ------------------------------------------ ------------------------------------------ DECLARING A SINGLE VARIABLE // file unparser.c // ... // Unparse the given identifier to out void unparseIdent(FILE *out, ident_t id) { fprintf(out, "%s", id.name); } // file scope_check.c // ... // Add declaration for id // to current scope as type vt // reporting if it's a duplicate declaration void scope_check_declare_ident(ident_t id, var_type_e vt) { } ------------------------------------------ g. Checking for undeclared identifiers ------------------------------------------ CHECKING FOR UNDECLARED IDENTIFIERS Where would they be? So need to: ------------------------------------------ i. undeclared identifiers in statments (1). ASTs for statements ------------------------------------------ CHECKING STATEMENTS FOR UNDECLARED IDs // file ast.h // ... // kinds of statements typedef enum {assign_stmt, begin_stmt, if_stmt, read_stmt, write_stmt } stmt_kind_e; // forward declaration struct stmt_s; // assignStmt ::= ident = expr typedef struct { file_location *file_loc; const char *name; struct expr_s *expr; } assign_stmt_t; // stmts ::= { stmt } typedef struct { file_location *file_loc; struct stmt_s *stmts; } stmts_t; // beginStmt ::= begin varDecls stmts typedef struct { file_location *file_loc; var_decls_t var_decls; stmts_t stmts; } begin_stmt_t; // ... // stmt ::= assignStmt | beginStmt | ifStmt | readStmt | writeStmt typedef struct stmt_s { file_location *file_loc; struct stmt_s *next; // for lists stmt_kind_e stmt_kind; union { assign_stmt_t assign_stmt; begin_stmt_t begin_stmt; if_stmt_t if_stmt; read_stmt_t read_stmt; write_stmt_t write_stmt; } data; } stmt_t; ------------------------------------------ (2). checking statements ------------------------------------------ DECLARATION CHECKING OF STATEMENTS (1/2) // file unparser.c // ... // Unparse stmt to out, // indented for the given level. void unparseStmt(FILE *out, stmt_t stmt, int indentLevel) { switch (stmt.stmt_kind) { case assign_stmt: unparseAssignStmt(out, stmt.data.assign_stmt, indentLevel); break; case begin_stmt: unparseBeginStmt(out, stmt.data.begin_stmt, indentLevel); break; case if_stmt: // ... } } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING OF STATEMENTS (2/2) // check the statement to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_stmt(stmt_t stmt) { } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING ASSIGNMENT STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseAssignStmt(FILE *out, assign_stmt_t stmt, int level) { indent(out, level); fprintf(out, "%s = ", stmt.name); unparseExpr(out, *(stmt.expr)); semiAndNewline(out); } // check the statement for // undeclared identifiers void scope_check_assignStmt( assign_stmt_t stmt) { } ------------------------------------------ ------------------------------------------ CHECKING AN INDIVIDUAL IDENTIFIER // check that name has been declared, // if so, then return an id_use for it // otherwise, produce an error id_use *scope_check_ident_declared( file_location floc, const char *name) { } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING OF BEGIN STATEMENTS // file unparser.c // ... // Unparse stmt to out // with indentation level given by level, void unparseBeginStmt(FILE *out, begin_stmt_t stmt, int level) { indent(out, level); fprintf(out, "{\n"); unparseVarDecls(out, stmt.var_decls, level+1); unparseStmts(out, stmt.stmts,level+1); indent(out, level); fprintf(out, "}\n"); } // file scope_check.c // ... // check the statement for // duplicate declarations and for // undeclared identifiers void scope_check_beginStmt(begin_stmt_t stmt) { } ------------------------------------------ Does the order matter in the scope checking? How do we know when to call symtab_enter_scope and symtab_leave_scope? h. checking expressions ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (1/3) // file ast.h // ... // kinds of expressions typedef enum {expr_bin_op, expr_ident, expr_number, expr_logical_not } expr_kind_e; // forward declaration for expressions struct expr_s; // expr ::= expr op expr // op ::= == | != | < | <= | + | - | * | / typedef struct { file_location *file_loc; struct expr_s *expr1; token_t op; struct expr_s *expr2; } binary_op_expr_t; // expr ::= expr op expr | ident | number typedef struct expr_s { file_location *file_loc; expr_kind_e expr_kind; union expr_u { binary_op_expr_t binary; ident_t ident; number_t number; struct expr_s *logical_not; } data; } expr_t; ------------------------------------------ ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (2/3) // file unparser.c // ... // Unparse the expression exp to out // adding parentheses to indicate nesting void unparseExpr(FILE *out, expr_t exp) { switch (exp.expr_kind) { case expr_bin_op: unparseBinaryOpExpr(out, exp.data.binary); break; case expr_ident: unparseIdent(out, exp.data.ident); break; case expr_number: unparseNumber(out, exp.data.number); break; case expr_logical_not: unparseLogicalNotExpr(out, *(exp.data.logical_not)); break; default: bail_with_error(/* ... */); break; } } ------------------------------------------ ------------------------------------------ DECLARATION CHECKING EXPRESSIONS (3/3) // file scope_check.c // ... // check the expresion to make sure that // all idenfifiers used have been declared // (if not, then produce an error) void scope_check_expr(expr_t exp) { } ------------------------------------------ What should be done for numbers? What should be done for logical not expressions? ------------------------------------------ DECLARATION CHECKING FOR BINARY OP EXPRs // check that all identifiers used in exp // have been declared // (if not, then produce an error) void scope_check_binary_op_expr(binary_op_expr_t exp) { scope_check_expr(*(exp.expr1)); // (note: no identifiers can occur in the operator) scope_check_expr(*(exp.expr2)); } ------------------------------------------ Is there anything you want look at to see what to do? Is there anything that should be done for the operator? ------------------------------------------ DECLARATION CHECKING FOR IDENTIFIER EXPRs // check id to make sure that // all it has been declared // (if not, then produce an error) void scope_check_ident_expr(ident_t id) { } ------------------------------------------ Is there anything you want look at to see what to do? 2. Summary of Declaration Checking ------------------------------------------ SUMMARY OF DECLARATION CHECKING Key concepts/data: Algorithm: ------------------------------------------