编译原理之词法分析和语法分析
来源:互联网 发布:康耐视视觉检测软件 编辑:程序博客网 时间:2024/06/09 20:25
花了一天写出的程序没有顾及很多层面,但对于理解基本的实验道理和交上实验还是有点帮助的。代码实现了基于有限自动机的词法分析,采用递归下降分析法和EBNF文法实现语法分析并生成中间代码。from sdu
lexAnalysis.h
- /*
- * lexAnalysis.h
- *
- * Created on: 2014-12-2
- * Author: liuqiushan
- */
- #ifndef LEXANALYSIS_H_
- #define LEXANALYSIS_H_
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- typedef enum _SYM
- {
- _CONST,
- _VAR,
- _procedure,
- _begin,
- _end,
- _if,
- _then,
- _ID, // such as the 'a' in 'var a;'
- _INT, // such as the '10' in 'const a = 10;'
- _ASSIGN,// '='
- _PLUS,// '+'
- _SUB,// '-'
- _STAR,// '*'
- _DIV,// '/'
- _LESS,// '<'
- _MORE,// '>'
- _LESSEQ,// '<='
- _MOREEQ,// '>='
- _DH,// ','
- _MD,// ':='
- _LEFT,// '('
- _RIGHT,// ')'
- _JH,// '#'
- _FH// ‘;’
- }SYM;
- SYM getSYM(char* str);
- void appendStr(char* str, char* c, int p_str);
- int isABC(char* c);
- int isNumber(char* c);
- #endif /* LEXANALYSIS_H_ */
LexAnalysis.c
- /*
- * LexAnalysis.c
- *
- * Created on: 2014-12-2
- * Author: liuqiushan
- */
- #include "lexAnalysis.h"
- SYM getSYM(char* _str)
- {
- SYM temp = _ID; // default identifier
- if(!strcasecmp(_str, "CONST"))
- temp = _CONST;
- else if(!strcasecmp(_str, "VAR"))
- temp = _VAR;
- else if(!strcasecmp(_str, "procedure"))
- temp = _procedure;
- else if(!strcasecmp(_str, "begin"))
- temp = _begin;
- else if(!strcasecmp(_str, "end"))
- temp = _end;
- else if(!strcasecmp(_str, "if"))
- temp = _if;
- else if(!strcasecmp(_str, "then"))
- temp = _then;
- return temp;
- }
- void appendStr(char str[], char* c, int p_str )
- {
- str[p_str] = *c;
- }
- int isABC(char* c)
- {
- char ch = *c;
- if(ch=='q'||ch=='w'||ch=='e'||ch=='r'||ch=='t'||ch=='y'||ch=='u'||ch=='i'||ch=='o'||ch=='p'||
- ch=='a'||ch=='s'||ch=='d'||ch=='f'||ch=='g'||ch=='h'||ch=='j'||ch=='k'||ch=='l'||
- ch=='z'||ch=='x'||ch=='c'||ch=='v'||ch=='b'||ch=='n'||ch=='m'||ch=='Q'||ch=='W'||
- ch=='E'||ch=='R'||ch=='T'||ch=='Y'||ch=='U'||ch=='I'||ch=='O'||ch=='P'||ch=='A'||
- ch=='S'||ch=='D'||ch=='F'||ch=='G'||ch=='H'||ch=='J'||ch=='K'||ch=='L'||ch=='Z'||
- ch=='X'||ch=='C'||ch=='V'||ch=='B'||ch=='N'||ch=='M')
- return 1;
- else
- return 0;
- }
- int isNumber(char* c)
- {
- char ch = *c;
- if(ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='0')
- return 1;
- else
- return 0;
- }
synAnalysis.h
- /*
- * synAnalysis.h
- *
- * Created on: 2014-12-2
- * Author: liuqiushan
- */
- #ifndef SYNANALYSIS_H_
- #define SYNANALYSIS_H_
- typedef enum _KIND
- {
- PRO,//procedure
- CON,//constant
- VAR //variable
- }CVPKind;
- /*
- * PL/0 Object Instruction
- *
- * There are three domains:
- *
- * f is the function code
- * l is the difference between the declared level and the invoked level of the variable
- *
- * -----------------------------
- * | | | |
- * | f | l | a |
- * | | | |
- * -----------------------------
- */
- typedef enum _FunctionCode
- {
- LIT,//① LIT: put the constant on the top of the stack, a is the constant
- LOD,//② LOD: put the variable on the top of the stack, a is the address
- STO,//③ STO: put the content on the top of the stack into a variable. such as c := b + 10
- CAL,//④ CAL
- INT,//⑤ INT: create the data area, and a is the number of the area
- JMP,//⑥ JMP
- JPC,//⑦ JPC
- OPR//⑧ OPR: operation, and the type is delivered by a. And the operation objects are the top of the stack and the next top of the stack
- }FunctionCode;
- typedef enum _OPR
- {
- ADD = 1, //+
- SUB, //-
- MUL, //*
- DIV, // /
- GT,
- LT,
- GE,
- LE,
- UE,
- EQ,
- WRITE,
- READ,
- MINUS
- }OPRTYPE;
- typedef struct _TableItem
- {
- char* name;
- CVPKind kind;
- int level;
- char* address;
- }CVPTableItem;
- typedef struct _Instruct
- {
- FunctionCode f;
- int l;
- int a;
- }ObjectInstruct;
- #endif /* SYNANALYSIS_H_ */
SynAnalysis.c
- /*
- ============================================================================
- Name : SynAnalysis.c
- Author : liuqiushan
- Date : Dec. 2, 2014 18:16
- Version : 1.0
- Copyright : Shandong University
- Description : Syntax Analysis
- ============================================================================
- */
- /*
- * Expression EBNF
- *--------------------------------------------
- *| Expression -> Term { Addop Term } |
- *| Addop -> "+" | "-" |
- *| Term -> Factor { Mulop Factor } |
- *| Mulop -> "*" | "/" |
- *| Factor -> ID | NUM | "(" Expression ")" |
- *--------------------------------------------
- */
- /*
- * Pl/0 Statements to be analyzed
- *--------------------------------------------
- *| const a=10 |
- *| var b,c |
- *| procedure p |
- *| begin |
- *| c:=b+a |
- *| end |
- *--------------------------------------------
- */
- #include "lexAnalysis.h"
- #include "synAnalysis.h"
- // Data Structure
- typedef struct Statement
- {
- char* _statement;
- int _length; // the length of the statement
- int _locate_pointer; // default point to the first character
- }P_Statement;
- // Methods
- void putsStatements(void);
- void initStatements(void);
- void binding(P_Statement* p_s, char* statement);
- void lexical_Analysis(char* a, int len_a);
- void printLexResults(void);
- SYM getNextSYMToken();
- char* getNextID();
- char* getNextNumber();
- void levelProcess(int level);
- void processConstant(int level);
- int processVariable(int level);
- void processProcedure(int level);
- void strcpyCVPTable(CVPTableItem* cvp, char* name, char* address);
- void printSynResults(void);
- int str2int(char *str);
- void GEN(FunctionCode f, int l, int a);
- void Sentence(int level);
- void EBNFExpression(int level);
- int getIndexCVP(char* id);
- void Term(int level);
- void Factor(int level);
- /* Global Variables */
- // Information about the statements set
- #define statementNum 6
- char* statements[statementNum] = {
- "const a=10",
- "var b,c",
- "procedure p",
- "begin",
- "c:=b+a",
- "end"
- };
- // Contain those statements to be analyzed
- P_Statement statementSet[statementNum];
- // For lexical analysis
- #define MAX 100
- SYM array_SYM[MAX]; // SYM array for lexical analysis
- int p_a_SYM = 0; // SYM array location pointer
- char array_Id[MAX][MAX]; // identifier array for lexical analysis
- int p_a_Id = 0; // identifier array location pointer
- char array_Num[MAX][MAX]; // number array for lexical analysis
- int p_a_Num = 0; // number array location pointer
- // Token acquired during lexical analysis
- char str[MAX] = ""; // current token to be analyzed
- int p_str = 0; // token inner location pointer
- /* For Syntax Analysis */
- // Token acquired during syntax analysis
- SYM _currentSYMToken;
- int currentPointer = 0;
- char* _currentID;
- int currentPointerID = 0;
- char* _currentNumber;
- int currentPointerNumber = 0;
- int level = 0; //It indicates which level is under processing, and also the level is just the main level.
- CVPTableItem cvptable[MAX]; // used to record every element, and we need the table to show which elements (including constant, variable and procedure) are in the program
- int p_table = 0; // the appending pointer of cvptable
- int addrVar = 3; // the initial address for the first variable is 3
- // the following two are used for the address of procedure. In fact up to now, I do not their meaning.
- int indexW = 0;
- int offset = 1;
- ObjectInstruct CODE[MAX]; // recording the generated intermediate code
- int p_code = 0; // the appending pointer of CODE
- OPRTYPE type_opr; // Expression operation type
- // Main
- int main(void) {
- puts("====================Initial=====================");
- putsStatements(); // print which statements need to be analyzed
- initStatements(); // initial all the statements to be analyzed with the struct P_Statement
- // start lexical analysis
- puts("================Lexical Analysis================");
- int i;
- for (i = 0; i < statementNum; i++) {
- lexical_Analysis(statementSet[i]._statement, statementSet[i]._length);
- }
- array_SYM[p_a_SYM] = -1;// end with -1
- // print the results of lexical analysis
- printLexResults();
- // Now let us start Syntax Analysis, just enjoy it
- puts("================Syntax Analysis================");
- _currentSYMToken = getNextSYMToken();
- levelProcess(level);
- //print the results of syntax analysis
- printSynResults();
- return EXIT_SUCCESS;
- }
- // Implementation
- void putsStatements(void)
- {
- puts("Analyzing the following statements: ");
- puts("");
- int i;
- for (i = 0; i < statementNum; i++)
- {
- puts(statements[i]);
- }
- }
- void initStatements(void)
- {
- int i;
- for (i = 0; i < statementNum; i++)
- binding(&statementSet[i], statements[i]);
- }
- void binding(P_Statement* p_s, char* statement)
- {
- // calculate the length of statement;
- int len = strlen(statement);
- p_s->_statement = malloc(len*sizeof(char));
- strcpy(p_s->_statement, statement);
- p_s->_length = len;
- p_s->_locate_pointer = 0;
- // testing
- // printf("%s\n", p_s->_statement);
- // printf("%d\n", p_s->_length);
- }
- // Lexical Analysis Main Program
- void lexical_Analysis(char* a, int len_a)
- {
- int i;
- for (i = 0; i < len_a; i++) {
- // Only get one character each time
- if (a[i]==' ')
- continue;
- // Finite Automation Method
- if (isABC(&a[i])) // If the first character is letter
- {
- while (isABC(&a[i])||isNumber(&a[i]))
- {
- appendStr(str, &a[i], p_str);
- p_str++;
- i++;
- if (i == len_a)
- break;
- }
- i--;
- SYM code = getSYM(str);
- //either identifier or keyword
- if (code == _ID)
- {
- array_SYM[p_a_SYM] = _ID;
- p_a_SYM++;
- strcpy(array_Id[p_a_Id], str);
- p_a_Id++;
- } else { // keyword or reserved word of Pl/0
- array_SYM[p_a_SYM] = code;
- p_a_SYM++;
- }
- // before next one, clean them
- memset(str, 0, sizeof(str));
- p_str = 0;
- } else if (isNumber(&a[i])) // If the first character is number
- {
- while (isNumber(&a[i]))
- {
- appendStr(str, &a[i], p_str);
- p_str++;
- i++;
- if (i == len_a)
- break;
- }
- i--;
- array_SYM[p_a_SYM] = _INT;
- p_a_SYM++;
- strcpy(array_Num[p_a_Num], str);
- p_a_Num++;
- // before next one, clean them
- memset(str, 0, sizeof(str));
- p_str = 0;
- } else if (a[i] == '=') {
- array_SYM[p_a_SYM] = _ASSIGN;
- p_a_SYM++;
- } else if (a[i] == ',') {
- array_SYM[p_a_SYM] = _DH;
- p_a_SYM++;
- } else if (a[i] == ':') {
- i++;
- if (a[i] == '=')
- {
- array_SYM[p_a_SYM] = _MD;
- p_a_SYM++;
- }
- } else if (a[i] == '+') {
- array_SYM[p_a_SYM] = _PLUS;
- p_a_SYM++;
- }
- }
- // print the results of lexical analysis
- //printLexResults();
- }
- void printLexResults(void)
- {
- int i;
- puts("============Lexical Analysis Results================");
- printf("SYM: [ ");
- for (i = 0; i < p_a_SYM; i++)
- {
- switch(array_SYM[i])
- {
- case 0: printf("_CONST "); break;
- case 1: printf("_VAR "); break;
- case 2: printf("_procedure "); break;
- case 3: printf("_begin "); break;
- case 4: printf("_end "); break;
- case 5: printf("_if "); break;
- case 6: printf("_then "); break;
- case 7: printf("_ID "); break;
- case 8: printf("_INT "); break;
- case 9: printf("_ASSIGN "); break;
- case 10: printf("_PLUS "); break;
- case 11: printf("_SUB "); break;
- case 12: printf("_STAR "); break;
- case 13: printf("_DIV "); break;
- case 14: printf("_LESS "); break;
- case 15: printf("_MORE "); break;
- case 16: printf("_LESSEQ "); break;
- case 17: printf("_MOREEQ "); break;
- case 18: printf("_DH "); break;
- case 19: printf("_MD "); break;
- case 20: printf("_LEFT "); break;
- case 21: printf("_RIGHT "); break;
- case 22: printf("_JH "); break;
- case 23: printf("_FH "); break;
- }
- }
- printf("]\n");
- printf("ID: [ ");
- for (i = 0; i < p_a_Id; i++)
- printf("%s ", array_Id[i]);
- printf("]\n");
- printf("NUM: [ ");
- for (i = 0; i < p_a_Num; i++)
- printf("%s ", array_Num[i]);
- printf("]\n");
- }
- SYM getNextSYMToken()
- {
- return array_SYM[currentPointer++];
- }
- char* getNextID()
- {
- return array_Id[currentPointerID++];
- }
- char* getNextNumber()
- {
- return array_Num[currentPointerNumber++];
- }
- void levelProcess(int level)
- {
- int countVar = 0; // record the number of variables in one level
- if (_currentSYMToken != -1)
- {
- if (_currentSYMToken == _CONST) // Constant
- {
- processConstant(level);
- if ((_currentSYMToken=getNextSYMToken())==-1)
- return;
- }
- if (_currentSYMToken == _VAR) // variables follow constant in our example
- {
- countVar = processVariable(level);
- }
- if (_currentSYMToken == _procedure) // procedure follows variables in our example
- {
- _currentSYMToken = getNextSYMToken(); // get the identifier of the procedure
- processProcedure(level);
- }
- }
- GEN(INT, 0, 3+countVar); // Create the specified number area. Because we designed addrVar 3, so we plus 3
- Sentence(level);
- GEN(OPR, 0, 0); // quit the data area
- }
- void processConstant(int level)
- {
- if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID) // a
- if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ASSIGN) // =
- if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_INT) // 10
- if ((_currentID=getNextID())!=NULL) // get the identifier
- if ((_currentNumber=getNextNumber())!=NULL) // get the number
- {
- strcpyCVPTable(&cvptable[p_table], _currentID, _currentNumber);
- cvptable[p_table].kind = CON;
- cvptable[p_table].level = level;
- // Do not forget to add p_table
- p_table++;
- }
- }
- int processVariable(int level)
- {
- int count = 0; // record the number of variables
- if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID)
- if ((_currentID=getNextID())!=NULL) // get the identifier
- {
- count++;
- char str[MAX];
- sprintf(str, "%d", addrVar);
- strcpyCVPTable(&cvptable[p_table], _currentID, str);
- cvptable[p_table].kind = VAR;
- cvptable[p_table].level = level;
- addrVar++;
- // Do not forget to add p_table
- p_table++;
- }
- // Maybe we could meet such var a,b
- while((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_DH)
- {
- if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID)
- if ((_currentID=getNextID())!=NULL) // get the identifier
- {
- count++;
- char str[MAX];
- sprintf(str, "%d", addrVar);
- strcpyCVPTable(&cvptable[p_table], _currentID, str);
- cvptable[p_table].kind = VAR;
- cvptable[p_table].level = level;
- addrVar++;
- // Do not forget to add p_table
- p_table++;
- }
- }
- return count;
- }
- void processProcedure(int level)
- {
- if (_currentSYMToken!=-1&&_currentSYMToken==_ID)
- if ((_currentID=getNextID())!=NULL) // get the identifier of the procedure
- {
- char str[MAX];
- int temp = indexW + offset;
- sprintf(str, "%d", temp);
- strcpyCVPTable(&cvptable[p_table], _currentID, str);
- cvptable[p_table].kind = PRO;
- cvptable[p_table].level = level;
- // Do not forget to add p_table
- p_table++;
- }
- // IMPORTANT PHASE: SUBLEVEL
- _currentSYMToken = getNextSYMToken(); // in our example it should be begin
- levelProcess(level + 1); // Recursive-descent Parsing
- }
- void strcpyCVPTable(CVPTableItem* cvp, char* name, char* address)
- {
- int len_N = strlen(name);
- int len_A = strlen(address);
- cvp->name = (char*)malloc(len_N*sizeof(char));
- cvp->address = (char*)malloc(len_A*sizeof(char));
- strcpy(cvp->name, name);
- strcpy(cvp->address, address);
- }
- void printSynResults(void)
- {
- int i;
- puts("============Syntax Analysis Results================");
- puts("The Table of Constant, Variable and Procedure");
- puts("Name\tKind\tLevel\tAddress\t");
- for (i = 0; i < p_table; i++)
- {
- //printf("%s\t%d\t%d\t%s\n", cvptable[i].name, cvptable[i].kind, cvptable[i].level, cvptable[i].address);
- printf("%s\t", cvptable[i].name);
- switch(cvptable[i].kind) {
- case CON: printf("CON\t"); break;
- case VAR: printf("VAR\t"); break;
- case PRO: printf("PRO\t"); break;
- }
- printf("%d\t%s\n", cvptable[i].level, cvptable[i].address);
- }
- puts("");
- puts("Intermediate Code");
- puts("No.\tFunction Code\tLevel Difference\tDisplacement");
- for (i = 0; i < p_code; i++)
- {
- printf("%d\t", i);
- switch(CODE[i].f)
- {
- case LIT: printf("LIT\t"); break;
- case INT: printf("INT\t"); break;
- case LOD: printf("LOD\t"); break;
- case STO: printf("STO\t"); break;
- case CAL: printf("CAL\t"); break;
- case JMP: printf("JMP\t"); break;
- case JPC: printf("JPC\t"); break;
- case OPR: printf("OPR\t"); break;
- }
- printf("\t%d\t\t\t%d\n", CODE[i].l, CODE[i].a);
- }
- }
- int str2int(char *str)
- {
- int v = 0;
- do {
- v = 10*v+*str-'0';
- str++;
- } while((*str>='0')&&(*str<='9'));
- return v;
- }
- // To generate the object instruct
- void GEN(FunctionCode f, int l, int a)
- {
- CODE[p_code].f = f;
- CODE[p_code].l = l;
- CODE[p_code].a = a;
- p_code++;
- }
- void Sentence(int level)
- {
- // Here I just implement begin and Assignment i.e. _MD(:=)
- if (_currentSYMToken != -1)
- {
- if (_currentSYMToken ==_begin)
- {
- _currentSYMToken = getNextSYMToken();
- Sentence(level);// recurse with the sub-sentence of the sentence (i.e. nest), but they are in the same level
- while(_currentSYMToken != -1)
- {
- if (_currentSYMToken == _end)
- {
- _currentSYMToken = getNextSYMToken(); // in our example, this is the end of SYM table. it should be -1
- return;
- }
- }
- } else if (_currentSYMToken == _ID) // Assignment
- {
- _currentID = getNextID(); // I omit the variables valid checking, but we sill need to find it in cvptable
- int cvpIndex = getIndexCVP(_currentID); // Just find its index in cvptable
- if((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_MD) // assignment symbol
- {
- _currentSYMToken = getNextSYMToken();
- EBNFExpression(level);
- GEN(STO, level-cvptable[cvpIndex].level, str2int(cvptable[cvpIndex].address));
- return; // assignment over
- }
- }
- }
- }
- void EBNFExpression(int level)
- {
- Term(level); // the level + and -
- // Here we implement + and -
- while((_currentSYMToken=getNextSYMToken())!=-1)
- {
- if (_currentSYMToken == _SUB || _currentSYMToken == _PLUS)
- {
- if (_currentSYMToken == _SUB)
- type_opr = SUB;
- else
- type_opr = ADD;
- _currentSYMToken = getNextSYMToken(); // next term
- Term(level);
- GEN(OPR, 0, type_opr);
- }
- }
- }
- int getIndexCVP(char* id)
- {
- int i;
- for (i = 0; i < p_table; i++)
- {
- if(!strcmp(cvptable[i].name, id))
- break;
- }
- return i;
- }
- void Term(int level)
- {
- Factor(level); // the level of * and /
- // we omit * and /
- }
- void Factor(int level)
- {
- // In fact, we omit '(' and ')', but it is easy to implement it by recursing with EBNFExpression
- if(_currentSYMToken!=-1&&_currentSYMToken==_ID)
- {
- _currentID = getNextID();
- int cvpIndex = getIndexCVP(_currentID); // Just find its index in cvptable
- if(cvptable[cvpIndex].kind == VAR) // type checking
- {
- GEN(LOD, level-cvptable[cvpIndex].level, str2int(cvptable[cvpIndex].address));
- } else if (cvptable[cvpIndex].kind == CON) {
- GEN(LIT, 0, str2int(cvptable[cvpIndex].address));
- }
- }
- }
实验结果:
0 0
- 编译原理之词法分析和语法分析
- 编译原理之词法分析和语法分析
- 编译原理之词法分析、语法分析、语义分析
- 编译原理之词法分析、语法分析、语义分析
- 编译原理之词法分析、语法分析、语义分析
- 编译原理-简单计算器(词法分析/语法分析)-含源码
- 编译原理 之 词法分析
- 编译原理之词法分析
- 编译原理之词法分析
- 编译原理之词法分析
- 编译原理之词法分析
- 编译原理之词法分析
- 编译原理之词法分析
- 17.词法分析和语法分析
- 编译原理之语法分析
- 编译原理-词法分析-语法分析-语义分析生成中间代码-python版
- 编译原理实验 之 词法分析
- 编译原理之词法分析xml
- Using dispatch groups to wait for multiple web services
- MongoDB下载和安装
- LINUX 驱动学习 -设备模型之别人的理解(2)
- MyBatis_3.4.0学习系列(一)
- Djang建立项目
- 编译原理之词法分析和语法分析
- mySql实用小常识
- 自定义控件三部曲之绘图篇(十七)——为Bitmap添加阴影并封装控件
- spark学习 —— 入门
- agrc argv解释
- 关于CoordinatorLayout与Behavior的一点分析
- 加了transform:translateX后position:fixed没效果
- Java小程序——汽车租赁
- java Socket和ServerSocket多线程编程