a dumb compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

130 lines
2.6 KiB

#pragma once
#include <filesystem>
#include <string>
#include <sstream>
#include <vector>
#include <unordered_map>
namespace dumb {
class Compiler;
struct Token {
enum class Type {
KEYWORD,
IDENTIFIER,
OPERATOR_PLUS,
OPERATOR_MINUS,
OPERATOR_ASTERISK,
OPERATOR_SLASH,
OPERATOR_EQUALS,
OPERATOR_DOT,
OPERATOR_COLON,
OPERATOR_ARROW,
OPERATOR_DOUBLE_COLON,
OPERATOR_AMPERSAND,
OPERATOR_PIPE,
INTEGER_LITERAL,
FLOAT_LITERAL,
STRING_LITERAL,
SEMICOLON,
COMMA,
OPEN_PAREN,
CLOSE_PAREN,
OPEN_BRACE,
CLOSE_BRACE,
OPEN_BRACKET,
CLOSE_BRACKET
};
static const std::vector<std::string> ReverseType;
Type type;
std::string data;
size_t line;
size_t column;
Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {}
};
class Lexer {
public:
typedef std::vector<Token> TokenStream;
private:
Compiler& compiler;
std::string sourceCode;
TokenStream tokenStream;
std::stringstream buffer;
size_t line;
size_t column;
size_t pointer;
static const std::vector<std::string> Keywords;
static const std::unordered_map<std::string, Token::Type> Operators;
void resetBuffer() {
this->buffer.str("");
this->buffer.clear();
}
const char& current() const {
return this->peek(0);
}
const char& next() const {
return this->peek(1);
}
const char& peek(size_t amount = 0) const {
size_t address = this->pointer + amount;
return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF;
}
const char& consume() {
const char& current = this->peek();
this->pointer++;
this->column++;
return current;
}
static bool IsOperator(const char& car) {
switch (car) {
case '+':
case '-':
case '*':
case '/':
case '=':
case ':':
case '.':
case '&':
case '|':
return true;
default:
return false;
}
}
static bool IsKeyword(const std::string& identifier) {
return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){
return keyword == identifier;
});
}
void readIdentifier();
void readOperator();
void readStringLiteral();
void readNumber();
void readBaseNumber();
public:
constexpr static char EoF = '\0';
constexpr static char EOL = '\n';
explicit Lexer(Compiler& compiler, std::filesystem::path sourceFile);
const TokenStream& tokenize();
};
}
std::ostream& operator <<(std::ostream& os, const dumb::Lexer::TokenStream& tokenStream);