You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
139 lines
3.9 KiB
139 lines
3.9 KiB
#include <fstream>
|
|
#include <iostream>
|
|
|
|
#include "Lexer.hpp"
|
|
|
|
namespace dumb {
|
|
|
|
const std::vector<std::string> Lexer::Keywords = {
|
|
"int",
|
|
"float",
|
|
"string"
|
|
};
|
|
|
|
dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) {
|
|
std::ifstream file(sourceFile);
|
|
std::stringstream buffer;
|
|
|
|
buffer << file.rdbuf();
|
|
|
|
this->sourceCode = buffer.str();
|
|
|
|
std::cout << this->sourceCode << std::endl;
|
|
}
|
|
|
|
void Lexer::tokenize() {
|
|
while (this->current() != Lexer::EoF) {
|
|
if (this->current() == Lexer::EOL) {
|
|
this->consume();
|
|
this->column = 1;
|
|
this->line++;
|
|
continue;
|
|
} else if (std::isspace(this->current())) {
|
|
this->consume();
|
|
continue;
|
|
} else if (this->current() == '/' && this->next() == '/') {
|
|
while (this->current() != Lexer::EOL && this->current() != Lexer::EoF)
|
|
this->consume();
|
|
continue;
|
|
} else if (this->current() == '/' && this->next() == '*') {
|
|
char last = this->current();
|
|
while (true) {
|
|
const char& cur = this->consume();
|
|
if (cur == '/' && last == '*' || cur == Lexer::EoF)
|
|
break;
|
|
last = cur;
|
|
}
|
|
continue;
|
|
} else if (this->current() == ';') {
|
|
this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column);
|
|
this->consume();
|
|
continue;
|
|
} else if (this->current() == ',') {
|
|
this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column);
|
|
this->consume();
|
|
continue;
|
|
} else if (std::isalpha(this->current())) {
|
|
this->readIdentifier();
|
|
continue;
|
|
} else if (Lexer::IsOperator(this->current())) {
|
|
this->readOperator();
|
|
continue;
|
|
} else if (this->current() == '"') {
|
|
this->readStringLiteral();
|
|
continue;
|
|
} else if (std::isdigit(this->current()) || this->current() == '.') {
|
|
this->readNumber();
|
|
continue;
|
|
} else {
|
|
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
|
|
exit(1);
|
|
}
|
|
|
|
}
|
|
|
|
for (auto& token : this->tokenStream) {
|
|
std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl;
|
|
}
|
|
}
|
|
|
|
void Lexer::readIdentifier() {
|
|
size_t line = this->line;
|
|
size_t column = this->column;
|
|
|
|
this->buffer << this->consume();
|
|
|
|
while (std::isalnum(this->current()))
|
|
this->buffer << this->consume();
|
|
|
|
std::string data = this->buffer.str();
|
|
Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER;
|
|
|
|
this->tokenStream.emplace_back(type, data, line, column);
|
|
this->column += data.length();
|
|
this->resetBuffer();
|
|
}
|
|
|
|
void Lexer::readOperator() {
|
|
this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column);
|
|
this->consume();
|
|
}
|
|
|
|
void Lexer::readStringLiteral() {
|
|
size_t line = this->line;
|
|
size_t column = this->column;
|
|
this->consume();
|
|
|
|
while (this->current() != '"')
|
|
this->buffer << this->consume();
|
|
|
|
std::string data = this->buffer.str();
|
|
this->consume();
|
|
this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column);
|
|
this->resetBuffer();
|
|
}
|
|
|
|
void Lexer::readNumber() {
|
|
size_t line = this->line;
|
|
size_t column = this->column;
|
|
bool floatingPoint = this->current() == '.';
|
|
|
|
this->buffer << this->consume();
|
|
|
|
while (std::isdigit(this->current()) || this->current() == '.') {
|
|
const char& cur = this->consume();
|
|
if (cur == '.' && floatingPoint) {
|
|
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
|
|
exit(1);
|
|
}
|
|
floatingPoint = cur == '.';
|
|
this->buffer << cur;
|
|
}
|
|
Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL;
|
|
|
|
std::string data = this->buffer.str();
|
|
this->tokenStream.emplace_back(type, data, line, column);
|
|
this->resetBuffer();
|
|
}
|
|
|
|
} |