嵌入式单片机设计吧 关注:23贴子:95
  • 2回复贴,共1

C语言编译器——词法分析器代码

只看楼主收藏回复

简单实现了一个词法分析器,可以解析
money > 45
money >= 45
两条语句
enum token_enum_states{
Init = 0,
Id,//abc_
IntLiteral,//1123
GT,//>
GE,//>=
ERR
}token_enum_state = Init;
typedef void Fun_append(char ch);
typedef struct token{
enum token_enum_states type;
char text[20];
Fun_append* append;
}Token;
Token new_token;
static char index = 0;
void my_append(char ch){
new_token.text[index] = ch;
index++;
}
void init_index(){
char i;
index = 0;
for(i = 0; i < 20; i++){
new_token.text[i] = 0;
}
}
#define bool char
#define TRUE 1
#define FALSE 0
bool is_alpha(char ch){
if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'){
return TRUE;
}
return FALSE;
}
bool is_digit(char ch){
if(ch >= '0' && ch <= '9'){
return TRUE;
}
return FALSE;
}
const char* const token_string[] = {
"Init",
"Id",//abc_
"IntLiteral",//1123
"GT",//>
"GE",//>=
};
enum token_enum_states initToken(char ch){
if(new_token.type != Init){
printf("%s %s\r\n",token_string[new_token.type],new_token.text);
}
init_index();
if(is_alpha(ch)){//第一个字符是字母
new_token.type = Id;
new_token.append(ch);
}else if(is_digit(ch)){//第一个字符是数字
new_token.type = IntLiteral;
new_token.append(ch);
}else if(ch == '>'){
new_token.type = GT;
new_token.append(ch);
}else{
new_token.type = Init;
}
return new_token.type;
}
char get_token_string(){
static int tokens_index = 0;
char ch;
char my_tokens[] = "age >= 45";
ch = my_tokens[tokens_index++];
if(ch == 0){
return -1;
}
return ch;
}
int main(){
char ch ;
new_token.append = &my_append;
while(1){
ch = get_token_string();
switch (token_enum_state){
case Init:
token_enum_state = initToken(ch);
break;
case Id:
if(is_alpha(ch) || is_digit(ch)){
new_token.append(ch);
}else{
token_enum_state = initToken(ch);
}
break;
case GT:
if(ch == '='){
new_token.type = GE;
new_token.append(ch);
}else{
token_enum_state = initToken(ch);
}
break;
case GE:
token_enum_state = initToken(ch);
break;
case IntLiteral:
if(is_digit(ch)){
new_token.append(ch);
}else{
token_enum_state = initToken(ch);
}
break;
}
if(ch < 0){
return 1;
}
}
}


IP属地:北京1楼2022-08-12 21:03回复
    #define Id_LENGTH 20
    enum token_enum_states{
    Init = 0,
    Id,//abc_
    IntLiteral,//1123
    GT,//>
    GE,//>=
    ERR
    }token_enum_state = Init;
    typedef void Fun_append(char ch);
    typedef struct token{
    enum token_enum_states type;
    char text[Id_LENGTH];
    Fun_append* append;
    }Token;
    Token new_token;
    static char index = 0;
    void my_append(char ch){
    new_token.text[index] = ch;
    index++;
    }
    void init_index(){
    char i;
    index = 0;
    for(i = 0; i < Id_LENGTH; i++){
    new_token.text[i] = 0;
    }
    }
    #define bool char
    #define TRUE 1
    #define FALSE 0
    bool is_alpha(char ch){
    if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'){
    return TRUE;
    }
    return FALSE;
    }
    bool is_digit(char ch){
    if(ch >= '0' && ch <= '9'){
    return TRUE;
    }
    return FALSE;
    }
    const char* const token_string[] = {
    "Init",
    "Id",//abc_
    "IntLiteral",//1123
    "GT",//>
    "GE",//>=
    };
    enum token_enum_states initToken(char ch){
    if(new_token.type != Init){
    printf("%s %s\r\n",token_string[new_token.type],&new_token.text);
    }
    init_index();
    if(is_alpha(ch)){//第一个字符是字母
    new_token.type = Id;
    new_token.append(ch);
    }else if(is_digit(ch)){//第一个字符是数字
    new_token.type = IntLiteral;
    new_token.append(ch);
    }else if(ch == '>'){
    new_token.type = GT;
    new_token.append(ch);
    }else{
    new_token.type = Init;
    }
    return new_token.type;
    }
    char get_token_string(){
    static int tokens_index = 0;
    char ch;
    char my_tokens[] = "money > 45;age >= 45;";
    //char my_tokens[] = "money >= 45";
    ch = my_tokens[tokens_index++];
    if(ch == 0){
    return -1;
    }
    return ch;
    }
    int main(){
    char ch ;
    new_token.append = &my_append;
    while(1){
    ch = get_token_string();
    switch (token_enum_state){
    case Init:
    token_enum_state = initToken(ch);
    break;
    case Id:
    if(is_alpha(ch) || is_digit(ch)){
    new_token.append(ch);
    }else{
    token_enum_state = initToken(ch);
    }
    break;
    case GT:
    if(ch == '='){
    new_token.type = GE;
    new_token.append(ch);
    }else{
    token_enum_state = initToken(ch);
    }
    break;
    case GE:
    token_enum_state = initToken(ch);
    break;
    case IntLiteral:
    if(is_digit(ch)){
    new_token.append(ch);
    }else{
    token_enum_state = initToken(ch);
    }
    break;
    }
    if(ch < 0){
    return 1;
    }
    }
    }


    IP属地:北京2楼2022-08-12 21:14
    回复
      加入表达式 加减乘除 和关键字 char的判断
      #include<stdio.h>
      //===========================================================================================
      const char* const token_string[] = {
      "Init",
      "Id",//abc_
      "IntLiteral",//1123
      "GT",//>
      "GE",//>=
      "Assignment",//=
      "Plus",//+
      "Minus",//-
      "Star",//*
      "Slash",// /
      "CHAR",//char
      };
      #define Id_LENGTH 20
      enum token_enum_states{
      Init = 0,
      Id,//abc_
      IntLiteral,//1123
      GT,//>
      GE,//>=
      Assignment,//=
      Plus,//+
      Minus,//-
      Star,//*
      Slash,// /
      CHAR,//char
      ERR,
      CHAR_H,
      CHAR_A,
      CHAR_R,
      }token_enum_state = Init;
      typedef void Fun_append(char ch);
      typedef struct token{
      enum token_enum_states type;
      char text[Id_LENGTH];
      Fun_append* append;
      }Token;
      Token new_token;
      static char index = 0;
      void my_append(char ch){
      new_token.text[index] = ch;
      index++;
      }
      void init_index(){
      char i;
      index = 0;
      for(i = 0; i < Id_LENGTH; i++){
      new_token.text[i] = 0;
      }
      }
      #define bool char
      #define TRUE 1
      #define FALSE 0
      bool is_alpha(char ch){
      if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'){
      return TRUE;
      }
      return FALSE;
      }
      bool is_digit(char ch){
      if(ch >= '0' && ch <= '9'){
      return TRUE;
      }
      return FALSE;
      }
      void save_tokens(){
      if(new_token.type != Init){
      printf("%s %s\r\n",token_string[new_token.type],&new_token.text);
      }
      }
      enum token_enum_states initToken(char ch){
      save_tokens();
      init_index();
      if(is_alpha(ch)){//第一个字符是字母
      if(ch == 'c'){
      new_token.type = CHAR_H;
      }else{
      new_token.type = Id;
      }
      new_token.append(ch);
      }else if(is_digit(ch)){//第一个字符是数字
      new_token.type = IntLiteral;
      new_token.append(ch);
      }else if(ch == '>'){
      new_token.type = GT;
      new_token.append(ch);
      }else if(ch == '='){
      new_token.type = Assignment;
      new_token.append(ch);
      }else if(ch == '+'){
      new_token.type = Plus;
      new_token.append(ch);
      }else if(ch == '-'){
      new_token.type = Minus;
      new_token.append(ch);
      }else if(ch == '*'){
      new_token.type = Star;
      new_token.append(ch);
      }else if(ch == '/'){
      new_token.type = Slash;
      new_token.append(ch);
      }else{
      new_token.type = Init;
      }
      return new_token.type;
      }
      char get_token_string(){
      static int tokens_index = 0;
      char ch;
      char my_tokens[] = "\
      money > 45;\
      age >= 45;\
      char abc = 1+2-3*4/5;\
      ";
      //char my_tokens[] = "money >= 45";
      ch = my_tokens[tokens_index++];
      if(ch == 0){
      return -1;
      }
      return ch;
      }
      int Lexical_analyzer(){
      char ch ;
      new_token.append = &my_append;
      while(1){
      ch = get_token_string();
      switch (token_enum_state){
      case Init:
      token_enum_state = initToken(ch);
      break;
      case Id:
      if(is_alpha(ch) || is_digit(ch)){
      new_token.append(ch);
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case GT:
      if(ch == '='){
      new_token.type = GE;
      new_token.append(ch);
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case GE:
      token_enum_state = initToken(ch);
      break;
      case IntLiteral:
      if(is_digit(ch)){
      new_token.append(ch);
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case Assignment:
      case Plus:
      case Minus:
      case Star:
      case Slash:
      token_enum_state = initToken(ch);
      break;
      case CHAR_H:
      if (ch == 'h') {
      token_enum_state = CHAR_A;
      new_token.append(ch);
      }else if(is_alpha(ch) || is_digit(ch)){
      new_token.type = Id;
      new_token.append(ch);
      token_enum_state = Id;
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case CHAR_A:
      if (ch == 'a') {
      token_enum_state = CHAR_R;
      new_token.append(ch);
      }else if(is_alpha(ch) || is_digit(ch)){
      new_token.type = Id;
      new_token.append(ch);
      token_enum_state = Id;
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case CHAR_R:
      if (ch == 'r') {
      token_enum_state = CHAR;
      new_token.append(ch);
      }else if(is_alpha(ch) || is_digit(ch)){
      new_token.type = Id;
      new_token.append(ch);
      token_enum_state = Id;
      }else{
      token_enum_state = initToken(ch);
      }
      break;
      case CHAR:
      if(is_alpha(ch) || is_digit(ch)){
      new_token.type = Id;
      new_token.append(ch);
      token_enum_state = Id;
      }else{
      new_token.type = CHAR;
      token_enum_state = initToken(ch);
      }
      break;
      }
      if(ch < 0){
      return 1;
      }
      }
      }
      int main(){
      Lexical_analyzer();
      }


      IP属地:北京3楼2022-08-13 10:41
      回复