a dumb compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
3.9 KiB

#include <fstream>
#include <iostream>
#include "Lexer.hpp"
namespace dumb {
const std::vector<std::string> Lexer::Keywords = {
"int",
"float",
"string"
};
dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) {
std::ifstream file(sourceFile);
std::stringstream buffer;
buffer << file.rdbuf();
this->sourceCode = buffer.str();
std::cout << this->sourceCode << std::endl;
}
void Lexer::tokenize() {
while (this->current() != Lexer::EoF) {
if (this->current() == Lexer::EOL) {
this->consume();
this->column = 1;
this->line++;
continue;
} else if (std::isspace(this->current())) {
this->consume();
continue;
} else if (this->current() == '/' && this->next() == '/') {
while (this->current() != Lexer::EOL && this->current() != Lexer::EoF)
this->consume();
continue;
} else if (this->current() == '/' && this->next() == '*') {
char last = this->current();
while (true) {
const char& cur = this->consume();
if (cur == '/' && last == '*' || cur == Lexer::EoF)
break;
last = cur;
}
continue;
} else if (this->current() == ';') {
this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column);
this->consume();
continue;
} else if (this->current() == ',') {
this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column);
this->consume();
continue;
} else if (std::isalpha(this->current())) {
this->readIdentifier();
continue;
} else if (Lexer::IsOperator(this->current())) {
this->readOperator();
continue;
} else if (this->current() == '"') {
this->readStringLiteral();
continue;
} else if (std::isdigit(this->current()) || this->current() == '.') {
this->readNumber();
continue;
} else {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
}
for (auto& token : this->tokenStream) {
std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl;
}
}
void Lexer::readIdentifier() {
size_t line = this->line;
size_t column = this->column;
this->buffer << this->consume();
while (std::isalnum(this->current()))
this->buffer << this->consume();
std::string data = this->buffer.str();
Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER;
this->tokenStream.emplace_back(type, data, line, column);
this->column += data.length();
this->resetBuffer();
}
void Lexer::readOperator() {
this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column);
this->consume();
}
void Lexer::readStringLiteral() {
size_t line = this->line;
size_t column = this->column;
this->consume();
while (this->current() != '"')
this->buffer << this->consume();
std::string data = this->buffer.str();
this->consume();
this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column);
this->resetBuffer();
}
void Lexer::readNumber() {
size_t line = this->line;
size_t column = this->column;
bool floatingPoint = this->current() == '.';
this->buffer << this->consume();
while (std::isdigit(this->current()) || this->current() == '.') {
const char& cur = this->consume();
if (cur == '.' && floatingPoint) {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
floatingPoint = cur == '.';
this->buffer << cur;
}
Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL;
std::string data = this->buffer.str();
this->tokenStream.emplace_back(type, data, line, column);
this->resetBuffer();
}
}