commit 61121a9c2bb2a34b3ee08aeb271350e6f912ef86 Author: Michael Ochmann Date: Thu Feb 17 01:50:07 2022 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d0b6a8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea +.vscode +.DS_Store + +build/** diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..949f20f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.21) +project(dumb) + +set(CMAKE_CXX_STANDARD 20) + +file(GLOB_RECURSE SRC "src/*.cpp") + +add_executable(dumb main.cpp ${SRC} src/Lexer.cpp src/Lexer.hpp) + +target_include_directories(dumb PUBLIC src) diff --git a/README.md b/README.md new file mode 100644 index 0000000..55df0db --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# dumb +– a compiler of sorts diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..d713871 --- /dev/null +++ b/main.cpp @@ -0,0 +1,11 @@ +#include + +#include "Compiler.hpp" + +int main(int args, const char* argv[]) { + dumb::ArgumentList arguments(argv + 1, argv + args); + + std::unique_ptr app = std::make_unique(arguments); + + return 0; +} diff --git a/src/Compiler.cpp b/src/Compiler.cpp new file mode 100644 index 0000000..9dc279a --- /dev/null +++ b/src/Compiler.cpp @@ -0,0 +1,10 @@ +#include "Compiler.hpp" + + +namespace dumb { + + Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))), + lexer(std::make_unique(this->sourceFile)) { + this->lexer->tokenize(); + } +} diff --git a/src/Compiler.hpp b/src/Compiler.hpp new file mode 100644 index 0000000..daa3166 --- /dev/null +++ b/src/Compiler.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include +#include + +#include "Lexer.hpp" + +namespace dumb { + typedef std::vector ArgumentList; + + class Compiler { + private: + std::filesystem::path sourceFile; + std::unique_ptr lexer; + public: + explicit Compiler(ArgumentList file); + }; + +} + diff --git a/src/Lexer.cpp b/src/Lexer.cpp new file mode 100644 index 0000000..4613fef --- /dev/null +++ b/src/Lexer.cpp @@ -0,0 +1,139 @@ +#include +#include + +#include "Lexer.hpp" + +namespace dumb { + + const std::vector Lexer::Keywords = { + "int", + "float", + "string" + }; + + dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) { + std::ifstream file(sourceFile); + std::stringstream buffer; + + buffer << file.rdbuf(); + + this->sourceCode = buffer.str(); + + std::cout << this->sourceCode << std::endl; + } + + void Lexer::tokenize() { + while (this->current() != Lexer::EoF) { + if (this->current() == Lexer::EOL) { + this->consume(); + this->column = 1; + this->line++; + continue; + } else if (std::isspace(this->current())) { + this->consume(); + continue; + } else if (this->current() == '/' && this->next() == '/') { + while (this->current() != Lexer::EOL && this->current() != Lexer::EoF) + this->consume(); + continue; + } else if (this->current() == '/' && this->next() == '*') { + char last = this->current(); + while (true) { + const char& cur = this->consume(); + if (cur == '/' && last == '*' || cur == Lexer::EoF) + break; + last = cur; + } + continue; + } else if (this->current() == ';') { + this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column); + this->consume(); + continue; + } else if (this->current() == ',') { + this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column); + this->consume(); + continue; + } else if (std::isalpha(this->current())) { + this->readIdentifier(); + continue; + } else if (Lexer::IsOperator(this->current())) { + this->readOperator(); + continue; + } else if (this->current() == '"') { + this->readStringLiteral(); + continue; + } else if (std::isdigit(this->current()) || this->current() == '.') { + this->readNumber(); + continue; + } else { + std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; + exit(1); + } + + } + + for (auto& token : this->tokenStream) { + std::cout << "type: " << static_cast(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl; + } + } + + void Lexer::readIdentifier() { + size_t line = this->line; + size_t column = this->column; + + this->buffer << this->consume(); + + while (std::isalnum(this->current())) + this->buffer << this->consume(); + + std::string data = this->buffer.str(); + Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER; + + this->tokenStream.emplace_back(type, data, line, column); + this->column += data.length(); + this->resetBuffer(); + } + + void Lexer::readOperator() { + this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column); + this->consume(); + } + + void Lexer::readStringLiteral() { + size_t line = this->line; + size_t column = this->column; + this->consume(); + + while (this->current() != '"') + this->buffer << this->consume(); + + std::string data = this->buffer.str(); + this->consume(); + this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column); + this->resetBuffer(); + } + + void Lexer::readNumber() { + size_t line = this->line; + size_t column = this->column; + bool floatingPoint = this->current() == '.'; + + this->buffer << this->consume(); + + while (std::isdigit(this->current()) || this->current() == '.') { + const char& cur = this->consume(); + if (cur == '.' && floatingPoint) { + std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; + exit(1); + } + floatingPoint = cur == '.'; + this->buffer << cur; + } + Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL; + + std::string data = this->buffer.str(); + this->tokenStream.emplace_back(type, data, line, column); + this->resetBuffer(); + } + +} \ No newline at end of file diff --git a/src/Lexer.hpp b/src/Lexer.hpp new file mode 100644 index 0000000..cc1fba1 --- /dev/null +++ b/src/Lexer.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace dumb { + + struct Token { + enum class Type { + KEYWORD, + IDENTIFIER, + OPERATOR, + INTEGER_LITERAL, + FLOAT_LITERAL, + STRING_LITERAL, + SEMICOLON, + COMMA, + OPEN_PAREN, + CLOSE_PAREN, + OPEN_BRACE, + CLOSE_BRACE, + OPEN_BRACKET, + CLOSE_BRACKET + }; + + Type type; + std::string data; + size_t line; + size_t column; + + Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {} + }; + + class Lexer { + private: + std::string sourceCode; + std::vector tokenStream; + std::stringstream buffer; + size_t line; + size_t column; + size_t pointer; + + static const std::vector Keywords; + + void resetBuffer() { + this->buffer.str(""); + this->buffer.clear(); + } + + const char& current() const { + return this->peek(0); + } + + const char& next() const { + return this->peek(1); + } + + const char& peek(size_t amount = 0) const { + size_t address = this->pointer + amount; + return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF; + } + + const char& consume() { + const char& current = this->peek(); + this->pointer++; + this->column++; + + return current; + } + + static bool IsOperator(const char& car) { + switch (car) { + case '+': + case '-': + case '*': + case '/': + case '=': + case ':': + case '.': + return true; + default: + return false; + } + } + + static bool IsKeyword(const std::string& identifier) { + return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){ + return keyword == identifier; + }); + } + + void readIdentifier(); + void readOperator(); + void readStringLiteral(); + void readNumber(); + public: + constexpr static char EoF = '\0'; + constexpr static char EOL = '\n'; + explicit Lexer(std::filesystem::path sourceFile); + + void tokenize(); + }; + +} + diff --git a/tests/test.dmb b/tests/test.dmb new file mode 100644 index 0000000..0f469f9 --- /dev/null +++ b/tests/test.dmb @@ -0,0 +1,11 @@ +foo : int = 55; +myFloat : float = 3.1415; + +// this is a comment + +/* this is a +stupid block comment +that goes for ages +*/ + +bar : string = "lol"; // more comments \ No newline at end of file