Compare commits

..

5 Commits

Author SHA1 Message Date
Michael Ochmann 2686964d3e fixes and improvements: 3 years ago
Michael Ochmann e45abf48ba fixed additional comsumption of token if operator 3 years ago
Michael Ochmann 8614e08569 added parser class 3 years ago
Michael Ochmann a1391708b2 now lexing keyowrds, operator `::` and operator `->` 3 years ago
Michael Ochmann 8366eadb62 made source dynamic 3 years ago
  1. 2
      CMakeLists.txt
  2. 12
      src/Compiler.cpp
  3. 4
      src/Compiler.hpp
  4. 214
      src/Lexer.cpp
  5. 31
      src/Lexer.hpp
  6. 8
      src/Parser.cpp
  7. 18
      src/Parser.hpp
  8. 10
      tests/test.dmb

@ -5,6 +5,6 @@ set(CMAKE_CXX_STANDARD 20)
file(GLOB_RECURSE SRC "src/*.cpp") file(GLOB_RECURSE SRC "src/*.cpp")
add_executable(dumb main.cpp ${SRC} src/Lexer.cpp src/Lexer.hpp) add_executable(dumb main.cpp ${SRC})
target_include_directories(dumb PUBLIC src) target_include_directories(dumb PUBLIC src)

@ -1,10 +1,18 @@
#include <iostream>
#include "Compiler.hpp" #include "Compiler.hpp"
namespace dumb { namespace dumb {
Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))), Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))),
lexer(std::make_unique<Lexer>(this->sourceFile)) { lexer(std::make_unique<Lexer>(*this, this->sourceFile)) {
this->lexer->tokenize();
const Lexer::TokenStream& tokenStream = this->lexer->tokenize();
this->parser = std::make_unique<Parser>(*this, tokenStream);
}
void Compiler::reportError(std::string message, Token &token) {
std::cout << message << '\n';
} }
} }

@ -6,6 +6,7 @@
#include <memory> #include <memory>
#include "Lexer.hpp" #include "Lexer.hpp"
#include "Parser.hpp"
namespace dumb { namespace dumb {
typedef std::vector<std::string> ArgumentList; typedef std::vector<std::string> ArgumentList;
@ -14,8 +15,11 @@ namespace dumb {
private: private:
std::filesystem::path sourceFile; std::filesystem::path sourceFile;
std::unique_ptr<Lexer> lexer; std::unique_ptr<Lexer> lexer;
std::unique_ptr<Parser> parser;
public: public:
explicit Compiler(ArgumentList file); explicit Compiler(ArgumentList file);
void reportError(std::string message, Token& token);
}; };
} }

@ -1,28 +1,85 @@
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <numeric>
#include "Lexer.hpp" #include "Lexer.hpp"
namespace dumb { namespace dumb {
const std::vector<std::string> Token::ReverseType = {
"KEYWORD",
"IDENTIFIER",
"OPERATOR_PLUS",
"OPERATOR_MINUS",
"OPERATOR_ASTERISK",
"OPERATOR_SLASH",
"OPERATOR_EQUALS",
"OPERATOR_DOT",
"OPERATOR_COLON",
"OPERATOR_ARROW",
"OPERATOR_DOUBLE_COLON",
"OPERATOR_AMPERSAND",
"OPERATOR_PIPE",
"INTEGER_LITERAL",
"FLOAT_LITERAL",
"STRING_LITERAL",
"SEMICOLON",
"COMMA",
"OPEN_PAREN",
"CLOSE_PAREN",
"OPEN_BRACE",
"CLOSE_BRACE",
"OPEN_BRACKET",
"CLOSE_BRACKET"
};
const std::vector<std::string> Lexer::Keywords = { const std::vector<std::string> Lexer::Keywords = {
"int", "int",
"float", "float",
"string" "u8",
"u16",
"u32",
"u64",
"s8",
"s16",
"s32",
"s64",
"string",
"while",
"for",
"if",
"else",
"this",
"namespace"
};
const std::unordered_map<std::string, Token::Type> Lexer::Operators = {
{"+", Token::Type::OPERATOR_PLUS},
{"-", Token::Type::OPERATOR_MINUS},
{"/", Token::Type::OPERATOR_SLASH},
{"*", Token::Type::OPERATOR_ASTERISK},
{"=", Token::Type::OPERATOR_EQUALS},
{":", Token::Type::OPERATOR_COLON},
{".", Token::Type::OPERATOR_DOT},
{"&", Token::Type::OPERATOR_AMPERSAND},
{"|", Token::Type::OPERATOR_PIPE}
}; };
dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) { dumb::Lexer::Lexer(Compiler& compiler, std::filesystem::path sourceFile) : compiler(compiler), line(1), column(1), pointer(0) {
std::ifstream file(sourceFile); std::ifstream file(sourceFile);
std::stringstream buffer; std::stringstream buffer;
buffer << file.rdbuf(); buffer << file.rdbuf();
this->sourceCode = buffer.str(); this->sourceCode = buffer.str();
std::cout << this->sourceCode << std::endl;
} }
void Lexer::tokenize() { const Lexer::TokenStream& Lexer::tokenize() {
while (this->current() != Lexer::EoF) { while (this->current() != Lexer::EoF) {
if (this->current() == Lexer::EOL) { if (this->current() == Lexer::EOL) {
this->consume(); this->consume();
@ -40,6 +97,10 @@ namespace dumb {
char last = this->current(); char last = this->current();
while (true) { while (true) {
const char& cur = this->consume(); const char& cur = this->consume();
if (cur == Lexer::EOL) {
this->line++;
this->column = 1;
}
if (cur == '/' && last == '*' || cur == Lexer::EoF) if (cur == '/' && last == '*' || cur == Lexer::EoF)
break; break;
last = cur; last = cur;
@ -56,25 +117,54 @@ namespace dumb {
} else if (std::isalpha(this->current())) { } else if (std::isalpha(this->current())) {
this->readIdentifier(); this->readIdentifier();
continue; continue;
} else if (this->current() == '0' && (this->next() == 'x' || this->next() == 'b') && std::isalnum(this->peek(2))) {
this->readBaseNumber();
continue;
} else if (std::isdigit(this->current()) || this->current() == '.') {
this->readNumber();
continue;
} else if (Lexer::IsOperator(this->current())) { } else if (Lexer::IsOperator(this->current())) {
this->readOperator(); this->readOperator();
continue; continue;
} else if (this->current() == '"') { } else if (this->current() == '"') {
this->readStringLiteral(); this->readStringLiteral();
continue; continue;
} else if (std::isdigit(this->current()) || this->current() == '.') {
this->readNumber();
continue;
} else { } else {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; Token::Type type;
exit(1);
switch (this->current()) {
case '{':
type = Token::Type::OPEN_BRACE;
break;
case '}':
type = Token::Type::CLOSE_BRACE;
break;
case '(':
type = Token::Type::OPEN_PAREN;
break;
case ')':
type = Token::Type::CLOSE_PAREN;
break;
case '[':
type = Token::Type::OPEN_BRACKET;
break;
case ']':
type = Token::Type::CLOSE_BRACKET;
break;
default:
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
std::string value = std::string(1, this->current());
this->tokenStream.emplace_back(type, value, this->line, this->column);
this->consume();
} }
} }
for (auto& token : this->tokenStream) { std::cout << this->tokenStream << std::endl;
std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl;
} return this->tokenStream;
} }
void Lexer::readIdentifier() { void Lexer::readIdentifier() {
@ -90,13 +180,33 @@ namespace dumb {
Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER; Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER;
this->tokenStream.emplace_back(type, data, line, column); this->tokenStream.emplace_back(type, data, line, column);
this->column += data.length();
this->resetBuffer(); this->resetBuffer();
} }
void Lexer::readOperator() { void Lexer::readOperator() {
this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column); Token::Type type;
this->consume();
if (this->current() == '-' && this->next() == '>') {
type = Token::Type::OPERATOR_ARROW;
this->consume();
this->consume();
}
else if (this->current() == ':' && this->next() == ':') {
type = Token::Type::OPERATOR_DOUBLE_COLON;
this->consume();
this->consume();
}
else {
if (Lexer::Operators.find(std::string(1, this->current())) == Lexer::Operators.end()) {
std::cout << "unknown operator '" << this->current() << "' on line " << this->line << " column " << this->column << "." << std::endl;
exit(1);
}
type = Lexer::Operators.at(std::string(1, this->current()));
this->consume();
}
this->tokenStream.emplace_back(type, "", this->line, this->column);
} }
void Lexer::readStringLiteral() { void Lexer::readStringLiteral() {
@ -126,7 +236,7 @@ namespace dumb {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1); exit(1);
} }
floatingPoint = cur == '.'; floatingPoint = floatingPoint || cur == '.';
this->buffer << cur; this->buffer << cur;
} }
Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL; Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL;
@ -136,4 +246,74 @@ namespace dumb {
this->resetBuffer(); this->resetBuffer();
} }
void Lexer::readBaseNumber() {
size_t line = this->line;
size_t column = this->column;
this->consume();
const char& kind = this->consume();
size_t value;
switch (kind) {
case 'x': {
while (std::isdigit(this->current()) || (this->current() >= 'A' && this->current() <= 'F') || (this->current() >= 'a' && this->current() <= 'f'))
this->buffer << this->consume();
std::string stringValue(this->buffer.str());
value = stringValue.length() > 0 ? std::stoi(stringValue, nullptr, 16) : 0;
break;
}
case 'b': {
while (this->current() == '0' || this->current() == '1')
this->buffer << this->consume();
std::string stringValue(this->buffer.str());
value = stringValue.length() > 0 ? std::stoi(stringValue, nullptr, 2) : 0;
break;
}
default:
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
if (std::isalnum(this->current())) {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
this->tokenStream.emplace_back(Token::Type::INTEGER_LITERAL, std::to_string(value), line, column);
this->resetBuffer();
}
}
std::ostream& operator <<(std::ostream& os, const dumb::Lexer::TokenStream& tokenStream) {
size_t maxLength = 0;
size_t maxDataLength = 0;
size_t maxLine = 0;
for (auto& string : dumb::Token::ReverseType) {
if (string.length() > maxLength)
maxLength = string.length();
}
maxLength += 4;
for (auto& token : tokenStream) {
if (token.data.length() > maxDataLength)
maxDataLength = token.data.length();
if (token.line > maxLine)
maxLine = token.line;
}
maxDataLength += 6;
maxLine = std::to_string(maxLine).length();
for (auto& token : tokenStream) {
const std::string& type = dumb::Token::ReverseType.at(static_cast<int>(token.type));
std::string data = token.data.empty() ? "" : "<" + token.data + ">";
os << std::left << std::setfill(' ') << std::setw(maxLength);
os << type << std::setw(maxDataLength);
os << data << std::setw(maxLine) << std::right;
os << token.line << ":" << std::left << std::setw(0) << token.column << '\n';
}
return os;
} }

@ -4,14 +4,26 @@
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include <unordered_map>
namespace dumb { namespace dumb {
class Compiler;
struct Token { struct Token {
enum class Type { enum class Type {
KEYWORD, KEYWORD,
IDENTIFIER, IDENTIFIER,
OPERATOR, OPERATOR_PLUS,
OPERATOR_MINUS,
OPERATOR_ASTERISK,
OPERATOR_SLASH,
OPERATOR_EQUALS,
OPERATOR_DOT,
OPERATOR_COLON,
OPERATOR_ARROW,
OPERATOR_DOUBLE_COLON,
OPERATOR_AMPERSAND,
OPERATOR_PIPE,
INTEGER_LITERAL, INTEGER_LITERAL,
FLOAT_LITERAL, FLOAT_LITERAL,
STRING_LITERAL, STRING_LITERAL,
@ -25,6 +37,8 @@ namespace dumb {
CLOSE_BRACKET CLOSE_BRACKET
}; };
static const std::vector<std::string> ReverseType;
Type type; Type type;
std::string data; std::string data;
size_t line; size_t line;
@ -34,15 +48,19 @@ namespace dumb {
}; };
class Lexer { class Lexer {
public:
typedef std::vector<Token> TokenStream;
private: private:
Compiler& compiler;
std::string sourceCode; std::string sourceCode;
std::vector<Token> tokenStream; TokenStream tokenStream;
std::stringstream buffer; std::stringstream buffer;
size_t line; size_t line;
size_t column; size_t column;
size_t pointer; size_t pointer;
static const std::vector<std::string> Keywords; static const std::vector<std::string> Keywords;
static const std::unordered_map<std::string, Token::Type> Operators;
void resetBuffer() { void resetBuffer() {
this->buffer.str(""); this->buffer.str("");
@ -79,6 +97,8 @@ namespace dumb {
case '=': case '=':
case ':': case ':':
case '.': case '.':
case '&':
case '|':
return true; return true;
default: default:
return false; return false;
@ -95,13 +115,16 @@ namespace dumb {
void readOperator(); void readOperator();
void readStringLiteral(); void readStringLiteral();
void readNumber(); void readNumber();
void readBaseNumber();
public: public:
constexpr static char EoF = '\0'; constexpr static char EoF = '\0';
constexpr static char EOL = '\n'; constexpr static char EOL = '\n';
explicit Lexer(std::filesystem::path sourceFile); explicit Lexer(Compiler& compiler, std::filesystem::path sourceFile);
void tokenize(); const TokenStream& tokenize();
}; };
} }
std::ostream& operator <<(std::ostream& os, const dumb::Lexer::TokenStream& tokenStream);

@ -0,0 +1,8 @@
#include "Parser.hpp"
namespace dumb {
Parser::Parser(Compiler &compiler, const Lexer::TokenStream& tokenStream) : compiler(compiler), tokenStream(tokenStream), pointer(0) {
}
}

@ -0,0 +1,18 @@
#pragma once
#include "Lexer.hpp"
namespace dumb {
class Compiler;
class Parser {
private:
Compiler& compiler;
const Lexer::TokenStream& tokenStream;
size_t pointer;
public:
Parser(Compiler& compiler, const Lexer::TokenStream& tokenStream);
};
}

@ -1,4 +1,4 @@
foo : int = 55; foo : int = 55 + 2;
myFloat : float = 3.1415; myFloat : float = 3.1415;
// this is a comment // this is a comment
@ -8,4 +8,12 @@ stupid block comment
that goes for ages that goes for ages
*/ */
binary := 0b01110;
hex := 0xFF;
bar : string = "lol"; // more comments bar : string = "lol"; // more comments
main :: (args : int, argv : string[]) -> int {
return 0;
}
Loading…
Cancel
Save