#include #include #include "Lexer.hpp" namespace dumb { const std::vector Lexer::Keywords = { "int", "float", "string" }; dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) { std::ifstream file(sourceFile); std::stringstream buffer; buffer << file.rdbuf(); this->sourceCode = buffer.str(); std::cout << this->sourceCode << std::endl; } void Lexer::tokenize() { while (this->current() != Lexer::EoF) { if (this->current() == Lexer::EOL) { this->consume(); this->column = 1; this->line++; continue; } else if (std::isspace(this->current())) { this->consume(); continue; } else if (this->current() == '/' && this->next() == '/') { while (this->current() != Lexer::EOL && this->current() != Lexer::EoF) this->consume(); continue; } else if (this->current() == '/' && this->next() == '*') { char last = this->current(); while (true) { const char& cur = this->consume(); if (cur == '/' && last == '*' || cur == Lexer::EoF) break; last = cur; } continue; } else if (this->current() == ';') { this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column); this->consume(); continue; } else if (this->current() == ',') { this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column); this->consume(); continue; } else if (std::isalpha(this->current())) { this->readIdentifier(); continue; } else if (Lexer::IsOperator(this->current())) { this->readOperator(); continue; } else if (this->current() == '"') { this->readStringLiteral(); continue; } else if (std::isdigit(this->current()) || this->current() == '.') { this->readNumber(); continue; } else { std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; exit(1); } } for (auto& token : this->tokenStream) { std::cout << "type: " << static_cast(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl; } } void Lexer::readIdentifier() { size_t line = this->line; size_t column = this->column; this->buffer << this->consume(); while (std::isalnum(this->current())) this->buffer << this->consume(); std::string data = this->buffer.str(); Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER; this->tokenStream.emplace_back(type, data, line, column); this->column += data.length(); this->resetBuffer(); } void Lexer::readOperator() { this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column); this->consume(); } void Lexer::readStringLiteral() { size_t line = this->line; size_t column = this->column; this->consume(); while (this->current() != '"') this->buffer << this->consume(); std::string data = this->buffer.str(); this->consume(); this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column); this->resetBuffer(); } void Lexer::readNumber() { size_t line = this->line; size_t column = this->column; bool floatingPoint = this->current() == '.'; this->buffer << this->consume(); while (std::isdigit(this->current()) || this->current() == '.') { const char& cur = this->consume(); if (cur == '.' && floatingPoint) { std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; exit(1); } floatingPoint = cur == '.'; this->buffer << cur; } Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL; std::string data = this->buffer.str(); this->tokenStream.emplace_back(type, data, line, column); this->resetBuffer(); } }