#pragma once #include #include #include #include #include namespace dumb { class Compiler; struct Token { enum class Type { KEYWORD, IDENTIFIER, OPERATOR_PLUS, OPERATOR_MINUS, OPERATOR_ASTERISK, OPERATOR_SLASH, OPERATOR_EQUALS, OPERATOR_DOT, OPERATOR_COLON, OPERATOR_ARROW, OPERATOR_DOUBLE_COLON, OPERATOR_AMPERSAND, OPERATOR_PIPE, INTEGER_LITERAL, FLOAT_LITERAL, STRING_LITERAL, SEMICOLON, COMMA, OPEN_PAREN, CLOSE_PAREN, OPEN_BRACE, CLOSE_BRACE, OPEN_BRACKET, CLOSE_BRACKET }; static const std::vector ReverseType; Type type; std::string data; size_t line; size_t column; Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {} }; class Lexer { public: typedef std::vector TokenStream; private: Compiler& compiler; std::string sourceCode; TokenStream tokenStream; std::stringstream buffer; size_t line; size_t column; size_t pointer; static const std::vector Keywords; static const std::unordered_map Operators; void resetBuffer() { this->buffer.str(""); this->buffer.clear(); } const char& current() const { return this->peek(0); } const char& next() const { return this->peek(1); } const char& peek(size_t amount = 0) const { size_t address = this->pointer + amount; return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF; } const char& consume() { const char& current = this->peek(); this->pointer++; this->column++; return current; } static bool IsOperator(const char& car) { switch (car) { case '+': case '-': case '*': case '/': case '=': case ':': case '.': case '&': case '|': return true; default: return false; } } static bool IsKeyword(const std::string& identifier) { return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){ return keyword == identifier; }); } void readIdentifier(); void readOperator(); void readStringLiteral(); void readNumber(); void readBaseNumber(); public: constexpr static char EoF = '\0'; constexpr static char EOL = '\n'; explicit Lexer(Compiler& compiler, std::filesystem::path sourceFile); const TokenStream& tokenize(); }; } std::ostream& operator <<(std::ostream& os, const dumb::Lexer::TokenStream& tokenStream);