You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
130 lines
2.6 KiB
130 lines
2.6 KiB
#pragma once
|
|
|
|
#include <filesystem>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include <unordered_map>
|
|
|
|
namespace dumb {
|
|
class Compiler;
|
|
|
|
struct Token {
|
|
enum class Type {
|
|
KEYWORD,
|
|
IDENTIFIER,
|
|
OPERATOR_PLUS,
|
|
OPERATOR_MINUS,
|
|
OPERATOR_ASTERISK,
|
|
OPERATOR_SLASH,
|
|
OPERATOR_EQUALS,
|
|
OPERATOR_DOT,
|
|
OPERATOR_COLON,
|
|
OPERATOR_ARROW,
|
|
OPERATOR_DOUBLE_COLON,
|
|
OPERATOR_AMPERSAND,
|
|
OPERATOR_PIPE,
|
|
INTEGER_LITERAL,
|
|
FLOAT_LITERAL,
|
|
STRING_LITERAL,
|
|
SEMICOLON,
|
|
COMMA,
|
|
OPEN_PAREN,
|
|
CLOSE_PAREN,
|
|
OPEN_BRACE,
|
|
CLOSE_BRACE,
|
|
OPEN_BRACKET,
|
|
CLOSE_BRACKET
|
|
};
|
|
|
|
static const std::vector<std::string> ReverseType;
|
|
|
|
Type type;
|
|
std::string data;
|
|
size_t line;
|
|
size_t column;
|
|
|
|
Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {}
|
|
};
|
|
|
|
class Lexer {
|
|
public:
|
|
typedef std::vector<Token> TokenStream;
|
|
private:
|
|
Compiler& compiler;
|
|
std::string sourceCode;
|
|
TokenStream tokenStream;
|
|
std::stringstream buffer;
|
|
size_t line;
|
|
size_t column;
|
|
size_t pointer;
|
|
|
|
static const std::vector<std::string> Keywords;
|
|
static const std::unordered_map<std::string, Token::Type> Operators;
|
|
|
|
void resetBuffer() {
|
|
this->buffer.str("");
|
|
this->buffer.clear();
|
|
}
|
|
|
|
const char& current() const {
|
|
return this->peek(0);
|
|
}
|
|
|
|
const char& next() const {
|
|
return this->peek(1);
|
|
}
|
|
|
|
const char& peek(size_t amount = 0) const {
|
|
size_t address = this->pointer + amount;
|
|
return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF;
|
|
}
|
|
|
|
const char& consume() {
|
|
const char& current = this->peek();
|
|
this->pointer++;
|
|
this->column++;
|
|
|
|
return current;
|
|
}
|
|
|
|
static bool IsOperator(const char& car) {
|
|
switch (car) {
|
|
case '+':
|
|
case '-':
|
|
case '*':
|
|
case '/':
|
|
case '=':
|
|
case ':':
|
|
case '.':
|
|
case '&':
|
|
case '|':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool IsKeyword(const std::string& identifier) {
|
|
return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){
|
|
return keyword == identifier;
|
|
});
|
|
}
|
|
|
|
void readIdentifier();
|
|
void readOperator();
|
|
void readStringLiteral();
|
|
void readNumber();
|
|
void readBaseNumber();
|
|
public:
|
|
constexpr static char EoF = '\0';
|
|
constexpr static char EOL = '\n';
|
|
explicit Lexer(Compiler& compiler, std::filesystem::path sourceFile);
|
|
|
|
const TokenStream& tokenize();
|
|
};
|
|
|
|
}
|
|
|
|
std::ostream& operator <<(std::ostream& os, const dumb::Lexer::TokenStream& tokenStream);
|
|
|
|
|