词法分析程序 C++/C

来源:互联网 发布:照片文件恢复软件 编辑:程序博客网 时间:2024/06/09 21:35

//c++程序

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <conio.h>
#include <stdlib.h>

#define KEYWORD_LEN 32        //保留字个数
#define STR_MAX_LEN 300        //标识符最大长度
#define PRO_MAX_LEN 20480    //源程序最大长度
#define STB_MAX_LEN 1000    //符号表最大容量
#define CTB_MAX_LEN 1000    //常数表最大容量

#define ERROR    0                //错误
#define ID        (KEYWORD_LEN+1)    //标识符
#define CONST    (KEYWORD_LEN+2)    //常量
#define OPERAT    (KEYWORD_LEN+3)    //运算符
#define DIVIDE    (KEYWORD_LEN+4)    //界符

int  errorLine=0;
char proBuffer[PRO_MAX_LEN] = "";    //存储程序代码的全局缓冲区
char ch;                            //读出来的当前字符
char wordget[STR_MAX_LEN];            //标识符 或 常量
int  point = 0;                        //源程序当前位置指针
char signTab[STB_MAX_LEN][STR_MAX_LEN];    //符号表
int  pointSTB = 0;                    //符号表指针
char constTab[CTB_MAX_LEN][STR_MAX_LEN];    //常量表
int  pointCTB = 0;                    //常数表指针
char kwTab[KEYWORD_LEN][10]={        //保留字表 C语言一共有32个保留字[关键字]
        "auto",        "break",    "case",        "char",        "const",
        "continue",    "default",    "do",        "double",    "else",
        "enum",        "extern",    "float",    "for",        "goto",
        "if",        "int",        "long",        "register",    "return",
        "short",    "signed",    "sizeof",    "static",    "struct",
        "switch",    "typedef",    "union",    "unsigned",    "void",
        "volatile",    "while"};
char errorTab[][50]={                //错误代码表
    /*0*/"未知错误",    /*1*/"非法的字符",    /*2*/"不正确的字符常量表达",
    /*3*/"不正确的字符串表达",    /*4*/"不正确的数字表达",    /*5*/"注释丢失'*/'"};

typedef struct signDuality
{
    int kind;
    int value;
}*pDualistic, Dualistic;
void pretreatment();    //预处理
void ProcError(int id);    //错误
bool GetChar();            //获得一个字符不包括结束标记
bool GetBC();            //获得一个非空白字符
void Concat(char *str);    //将ch连接到str后
int  Reserve(char *str);    //对str字符串查找保留字表 若是一个保留字-返回其编码 否则返回

0
void Retract();            //将搜索指示器回调一个字符位置
int  InsertId(char *str);//将str串以标识符插入符号表,并返回符号表指针
int  InsertConst(char *str);            //将str串以常数插入符号表,并返回常数表指针
bool wordAnalyse(pDualistic pDu);    //词法分析 true正常


//预处理 将缓冲区内的源代码去掉注释和无效空格
void pretreatment()
{int lines=0;
    char tmp[PRO_MAX_LEN];    //先将处理结果保存到临时空间
    int tmpp = 0;             //这个临时空间的末尾指针
    bool flg;
    char tmpc;
    //去掉注释先
    //注释有两种 一种是// 另一种是/**/
    point = 0;
    do
    {
        flg = GetChar();

        if(ch == '/')
        {
            flg = GetChar();
            switch(ch)
            {
            case '/':
                do
                {
                    flg = GetChar();                   
                }while(!(ch == '/n' || flg == false));//注释一直到行尾或文件结束
                if(ch == '/n')
                    Retract();    //归还换行
                break;
            case '*':
                do
                {
                    flg = GetChar();
                    tmpc = ch;
                    //为了保证出错处理程序能正确定位出错位置 保留注释中的换行
                    if(tmpc == '/n')
                        tmp[tmpp++] = tmpc;
                    flg = GetChar();
                    Retract();    //归还一个字符
                }while(flg && !(flg && tmpc == '*' && ch == '/'));
                flg = GetChar();
                if (!flg)
                {
                    ProcError(5);
                }
                break;
            default:             //不是任何一种注释
                Retract();
                Retract();
                GetChar();
                tmp[tmpp++] = ch;
                flg = GetChar();
                tmp[tmpp++] = ch;
            }
        }
        else
        {
            tmp[tmpp++] = ch;
        }
    }while(flg);
    tmp[tmpp] = '/0';
    strcpy(proBuffer,tmp);
}

//错误
void ProcError(int id)
{
    printf("/nError:第%d行,%s/n",errorLine, errorTab[id]);
}

//获得一个字符
bool GetChar()
{
    if(point < PRO_MAX_LEN && proBuffer[point] != '/0')
    {//如果当前下标合法且当前字符为结束标记则取字符增游标
        ch = proBuffer[point++];
        if (ch == '/n')
            errorLine ++;
        return true;
    }
    ch = '/0';
    return false;

//获得一个非空白字符
bool GetBC()
{
    do
    {
        if(!GetChar())    //获取字符失败
        {
            ch = '/0';
            return false;
        }
    }while(isspace(ch));    //直到获得一个非空白字符
    return true;
}

//将ch连接到str后
void Concat(char *str)
{
    int i;
    for(i=0; str[i]; ++i);
    str[i] = ch;
    str[i+1] = '/0';
}

//对str字符串查找保留字表 若是一个保留字-返回其编码 否则返回0
int Reserve(char *str)
{
    int i;
    for(i=0; i<KEYWORD_LEN; ++i)    //从保留字表中查找str串
    {
        if(0 == strcmp(kwTab[i], str))
            return i+1;    //注意,这里加一原因是0值被错误标记占用
    }
    return 0;
}

//将搜索指示器回调一个字符位置
void Retract()///char *ch
{
    if(proBuffer[point] == '/n' && errorLine > 0)
        errorLine --;
    point --;
}

//将str串以标识符插入符号表,并返回符号表指针
int InsertId(char *str)
{
    int i;
    for(i=0; i < pointSTB; ++i)
        if(0 == strcmp(signTab[i], str))
            return i;
    strcpy(signTab[pointSTB++], str);
    return (pointSTB-1);
}

//将str串以常数插入常量表,并返回常数表指针
int InsertConst(char *str)
{
    int i;
    for(i=0; i < pointCTB; ++i)
        if(0 == strcmp(constTab[i], str))
            return i;
    strcpy(constTab[pointCTB++], str);
    return (pointCTB-1);
}

//词法分析 false--分析结束
bool wordAnalyse(pDualistic pDu)
{
    int  code, value;
    char judge;    //这里有个技巧 借用此变量巧妙的运用SWITCH结构
    int  i = 0;    //辅助

    GetBC();
    judge = ch;
    if (isalpha(ch) || ch == '_')    judge='L';
    if (isdigit(ch))    judge='D';
    switch(judge)
    {
    case 'L':
        while(isalnum(ch) || ch == '_')
        {    //标识符
            wordget[i++] = ch;
            GetChar();
        }
        wordget[i] = '/0';
        Retract();    //回退一个字符
        code = Reserve(wordget);
        if(code == 0)
        {
            value = InsertId(wordget);
            pDu->kind  = ID;
            pDu->value = value;
        }
        else
        {
            pDu->kind = code;
            pDu->value = -1;
        }
        return true;
    case 'D':
        while(isdigit(ch))
        {
            wordget[i++] = ch;
            GetChar();
        }
        wordget[i] = '/0';
        Retract();
        value = InsertConst(wordget);
        pDu->kind = CONST;
        pDu->value= value;
        return true;
//( ) [ ] . , !  != ~ sizeof < << <= > >> >= = ==  & && &= |

 || |= ?: + ++ +=
// - -> -- -= * *= / /= % %= >>= <<= ^ ^=
    case '"':
        //字符串常量
        do
        {
            wordget[i++] = ch;
            GetChar();
        }while(ch != '"' && ch != '/0');
        wordget[i++] = ch;    wordget[i] = '/0';
        if(ch == '/0')
        {
            printf("%s",wordget);
            ProcError(3);
            pDu->kind = ERROR;
            pDu->value = 0;
        }
        else
        {
            value = InsertConst(wordget);
            pDu->kind = CONST;
            pDu->value = value;
        }
        return true;
        //字符常量
    case '/'':
        wordget[i++] = ch;    // '
        GetChar();
        wordget[i++] = ch;

        if(ch == '//')    // '/n'
        {//如果是转义字符则要多接收一个字符
            GetChar();            // ch = '
            wordget[i++] = ch;
        }
        GetChar();
        wordget[i++] = ch;
        wordget[i] = '/0';
        if(ch != '/'')
        {//'/b'
            printf("%s",wordget);
            ProcError(2);
            pDu->kind = ERROR;
            pDu->value = 0;
        }
        else
        {
            value = InsertConst(wordget);
            pDu->kind = CONST;
            pDu->value = value;
        }
        return true;
    case '(':
    case ')':
    case '[':
    case ']':
    case '.':
    case ',':
    case '~':
    case '?':
    case ':':
    case ';':
    case '{':
    case '}':
    case '#':
        wordget[i++] = ch;    wordget[i] = '/0';
        pDu->kind = DIVIDE; //界符
        pDu->value = -1;
        return true;
    case '!':
        //!=
        wordget[i++] = ch;
        GetChar();
        if (ch=='=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '<':
        // << <=
        wordget[i++] = ch;
        GetChar();
        if (ch == '<' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '>':
        // >> >=
        wordget[i++] = ch;
        GetChar();
        if (ch == '>' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '=':
        // ==
        wordget[i++] = ch;
        GetChar();
        if (ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '&':
        // && &=
        wordget[i++] = ch;
        GetChar();
        if (ch == '&' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '|':
        // || |=
        wordget[i++] = ch;
        GetChar();
        if (ch == '|' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '+':
        // ++ +=
        wordget[i++] = ch;
        GetChar();
        if (ch == '+' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '-':
        // -- -= ->
        wordget[i++] = ch;
        GetChar();
        if (ch == '-' || ch == '=' || ch == '>') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '*':
        // ** *=
        wordget[i++] = ch;
        GetChar();
        if (ch == '*' || ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '/':
        // /=
        wordget[i++] = ch;
        GetChar();
        if (ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '%':
        // %=
        wordget[i++] = ch;
        GetChar();
        if (ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '^':
        // ^=
        wordget[i++] = ch;
        GetChar();
        if (ch == '=') wordget[i++] = ch;
        else Retract();
        wordget[i]='/0';
        break;
    case '/0':
        return false;
    default:
        ProcError(1);
        return false;
    }
    pDu->kind = OPERAT;
    return true;
}

int main()
{
    Dualistic  tmp;
    pDualistic ptmp = &tmp;
    FILE *fin, *fout;
    int i;
    char c;
 char filename[20];
    printf("源代码读入/n");
 //scanf("%s",filename);
 //将源程序读入缓冲区
    if ((fin=fopen("Test.txt","r")) == NULL)
    {
        printf("Cannot open infile/n");
        return 0;
    }
    i = 0;
    //c = fgetc(fin);
 while((c = fgetc(fin)) != EOF)
    {
        if(i >= PRO_MAX_LEN-1)
        {
            printf("/n程序代码太长,无法处理/a");
            return 0;
        }
        proBuffer[i++] = c;
    }
    fclose(fin);    //关闭文件
    proBuffer[i++] = '/0';
    printf("/n***************************/n源代码读入成功,源代码如下:/n%s",proBuffer);
    printf("/n按任意键继续/n");   
 getch();
    //预处理
    printf("/n预处理/n");
    pretreatment();
    printf("/n***************************/n预处理成功,去掉注释后的源代码为:/n%s*",proBuffer);
    printf("/n按任意键继续/n");   
 getch();
    printf("/n词法分析/n");
    point = 0;
    //词法分析
    if ((fout=fopen("Result.txt","wb")) == NULL)
    {
        printf("建立文件Result.txt失败。/n");
        return 0;
    }
    i = 0;
    errorLine = 0;    //错误行归零
    do
    {
        if(i++ > PRO_MAX_LEN)//防止遇到BUG 导致程序死循环无限写文件
            break;
        if(!wordAnalyse(ptmp))
        {
            break;
        }
        if (ptmp->value == -1)
            fprintf(fout, "<%3d,  ->/t",ptmp->kind);
        else
            fprintf(fout, "<%3d,%3d>/t",ptmp->kind, ptmp->value);
        switch(ptmp->kind)
        {
        case ERROR:
            fprintf(fout, "(出  错:%s)",wordget);
            break;
        case ID:
            fprintf(fout, "(标识符:%s)",wordget);
            break;
        case CONST:
            fprintf(fout, "(常  量:%s)",wordget);
            break;
        case OPERAT:
            fprintf(fout, "(运算符:%s)",wordget);
            break;
        case DIVIDE:
            fprintf(fout, "(界  符:%s)",wordget);
            break;
        default:;
        }
        if(ptmp->kind >= 1 && ptmp->kind <= KEYWORD_LEN)
            fprintf(fout, "(关键字:%s)",kwTab[ptmp->kind-1]);
        fprintf(fout, "/r/n");       
    }while(1);
    fclose(fout);

    printf("写回常量表和标识符表/n");
    //常量表
    if ((fout=fopen("Const.txt","wb")) == NULL)
    {
        printf("建立文件Const.txt失败。/n");
        return 0;
    }
    for(i = 0; i < pointCTB; ++i)
        fprintf(fout, "%3d %s/r/n",i, constTab[i]);
    fclose(fout);
    //标识符表
    if ((fout=fopen("Sign.txt","wb")) == NULL)
    {
        printf("建立文件Sign.txt失败。/n");
        return 0;
    }
    for(i = 0; i < pointSTB; ++i)
        fprintf(fout, "%3d %s/r/n",i, signTab[i]);
    fclose(fout);
    printf("/n写入完毕/n按任意键继续/n");  
 getch();

    return 0;
}

 

原创粉丝点击