c++enumslex

Enum not giving the correct value


I've been making a simple interpreter for my command line project, however I recently just stumbled upon a problem which I don't know the cause of.

Here are the codes:

// cls.h
#pragma once
#include <iostream>
#include <string>
#include <iomanip>

enum id {undef, kwd, idt, opr, val};    // Ignore idt

extern std::string keywords[];          // Declaring the keywords array
extern std::string operators[];         // Declaring the operators array

class Token
{
    id ID;
    std::string value;
public:
    Token() {
        ID = undef;
        value = "";
    }

    void SetID(id ID) {
        this->ID = ID;
    }

    void SetValue(std::string value) {
        this->value = value;
    }

    id GetID() {
        return ID;
    }

    std::string GetValue() {
        return value;
    }
};

std::string UserInput();
void Lexical(Token *token, int nstr, std::string str, int ntok);


// cls.cpp

#include <iostream>

// Definitions of the keywords and operators arrays
std::string keywords[] = {
                             "echo",
                             "exit"
                         };

std::string operators[] = {
                             "+",
                             "-",
                             "/",
                             "%" 
                          };


// input.cpp

#include "cls.h"

std::string UserInput()             // Function to take user's input
{
    std::string input;
    std::getline(std::cin, input);

    return input;
}


// lex.cpp

#include "cls.h"

// To check whether a token is a keyword
static bool aKeyword(std::string str)
{
    for (int i = 0; i < 10; i++) {
        if (keywords[i] == str)
            return true;
    }

    return false;
}

// To check whether a token is an operator
static bool aOperator(std::string str)
{
    for (int i = 0; i < 10; i++) {
        if (operators[i] == str)
            return true;
    }

    return false;
}

void Lexical(Token *token, int ntok, std::string str, int nstr)
{
    // Working the tokens' values
    int idx = 0;
    bool fspace = false;
    std::string plh[256];

    for (int i = 0; i < nstr; i++) {
        if ((str.at(i) == ' ') && (fspace == false)) {
            idx++;
            fspace = true;
            continue;
        } else
        if ((str.at(i) == ' ') && (fspace == true)) {
            continue;
        }

        plh[idx].append(&(str.at(i)), 1);
        fspace = false;
    }

    for (int i = 0; i < ntok; i++) {
        (token + i)->SetValue(plh[i]); 
    }

    // Working the tokens' IDs
    // Checking whether a token is a keyword, an operator or a value
    for (int i = 0; i < ntok; i++) {
        if (aKeyword((token + i)->GetValue()) == true) {
            (token + i)->SetID(kwd);
        } else
        if (aOperator((token + i)->GetValue()) == true) {       // <--- The problem
            (token + i)->SetID(opr);
        } else {
            (token + i)->SetID(val);
        }
    }

}


// init.cpp

#include "cls.h"

int main()
{
    std::string usinput;
    Token token[256];

    std::cout << "User input: ";
    usinput = UserInput();          // Taking user's input

    Lexical(token, 256, usinput, (int) usinput.size());   // Calling the lexical function

    // Outputting the tokens' values and IDs (The IDs is in the parentheses)
    for (int i = 0; i < 10; i++) {
        std::cout << "Token[" << i << "] = "
                  << token[i].GetValue()
                  << std::setfill(' ') 
                  << std::setw(15 - (token[i].GetValue()).size())
                  << "(" << token[i].GetID() << ")"
                  << std::endl;
    }

    return 0;
}

The problem appears in the Lexical function when it tries to check and classify the tokens with the ID's keyword (kwd), operator (opr), or value (val). In the following definition of enum id

enum id {undef, kwd, idt, opr, val};

It is clear that opr should have the value 3 (for operator), however when I tried to run the program and saw the outputs, it instead gave the value 1 (for keyword)

benanthony@DESKTOP-QI9Q4LV:~/codes/CLine$ make
g++ init.cpp lex.cpp input.cpp cls.cpp -Wall -Wextra -o CLine
benanthony@DESKTOP-QI9Q4LV:~/codes/CLine$ ./CLine
User input: Hello there + - Rand - Lett - echo yes
Token[0] = Hello         (4)
Token[1] = there         (4)
Token[2] = +             (1)
Token[3] = -             (1)
Token[4] = Rand          (4)
Token[5] = -             (1)
Token[6] = Lett          (4)
Token[7] = -             (1)
Token[8] = echo          (1)
Token[9] = yes           (4)

The + and - in token[2], token[3], token[5], and token[7] should've had the ID value 3.

I've tried solving this by messing around like changing the names of the ID's but it still didn't work.

This seems like a simple problem but I still don't know how to solve it. Is there something that I missed or is there any mistake that I made?


Solution

  • The issue is caused by Undefined Behaviour in your aKeyword and aOperator methods. you are comparing strings that are outside your array, i < 10, if you fix these you'll get the expected results:

    // To check whether a token is a keyword
    static bool aKeyword(std::string str)
    {
        for (int i = 0; i < 2; i++) {
            if (keywords[i] == str)
                return true;
        }
    
        return false;
    }
    
    // To check whether a token is an operator
    static bool aOperator(std::string str)
    {
        for (int i = 0; i < 4; i++) {
            if (operators[i] == str)
                return true;
        }
    
        return false;
    }