initial commit

master
Michael Ochmann 3 years ago
commit 61121a9c2b
  1. 5
      .gitignore
  2. 10
      CMakeLists.txt
  3. 2
      README.md
  4. 11
      main.cpp
  5. 10
      src/Compiler.cpp
  6. 22
      src/Compiler.hpp
  7. 139
      src/Lexer.cpp
  8. 107
      src/Lexer.hpp
  9. 11
      tests/test.dmb

5
.gitignore vendored

@ -0,0 +1,5 @@
.idea
.vscode
.DS_Store
build/**

@ -0,0 +1,10 @@
cmake_minimum_required(VERSION 3.21)
project(dumb)
set(CMAKE_CXX_STANDARD 20)
file(GLOB_RECURSE SRC "src/*.cpp")
add_executable(dumb main.cpp ${SRC} src/Lexer.cpp src/Lexer.hpp)
target_include_directories(dumb PUBLIC src)

@ -0,0 +1,2 @@
# dumb
– a compiler of sorts

@ -0,0 +1,11 @@
#include <memory>
#include "Compiler.hpp"
int main(int args, const char* argv[]) {
dumb::ArgumentList arguments(argv + 1, argv + args);
std::unique_ptr<dumb::Compiler> app = std::make_unique<dumb::Compiler>(arguments);
return 0;
}

@ -0,0 +1,10 @@
#include "Compiler.hpp"
namespace dumb {
Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))),
lexer(std::make_unique<Lexer>(this->sourceFile)) {
this->lexer->tokenize();
}
}

@ -0,0 +1,22 @@
#pragma once
#include <vector>
#include <string>
#include <filesystem>
#include <memory>
#include "Lexer.hpp"
namespace dumb {
typedef std::vector<std::string> ArgumentList;
class Compiler {
private:
std::filesystem::path sourceFile;
std::unique_ptr<Lexer> lexer;
public:
explicit Compiler(ArgumentList file);
};
}

@ -0,0 +1,139 @@
#include <fstream>
#include <iostream>
#include "Lexer.hpp"
namespace dumb {
const std::vector<std::string> Lexer::Keywords = {
"int",
"float",
"string"
};
dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) {
std::ifstream file(sourceFile);
std::stringstream buffer;
buffer << file.rdbuf();
this->sourceCode = buffer.str();
std::cout << this->sourceCode << std::endl;
}
void Lexer::tokenize() {
while (this->current() != Lexer::EoF) {
if (this->current() == Lexer::EOL) {
this->consume();
this->column = 1;
this->line++;
continue;
} else if (std::isspace(this->current())) {
this->consume();
continue;
} else if (this->current() == '/' && this->next() == '/') {
while (this->current() != Lexer::EOL && this->current() != Lexer::EoF)
this->consume();
continue;
} else if (this->current() == '/' && this->next() == '*') {
char last = this->current();
while (true) {
const char& cur = this->consume();
if (cur == '/' && last == '*' || cur == Lexer::EoF)
break;
last = cur;
}
continue;
} else if (this->current() == ';') {
this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column);
this->consume();
continue;
} else if (this->current() == ',') {
this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column);
this->consume();
continue;
} else if (std::isalpha(this->current())) {
this->readIdentifier();
continue;
} else if (Lexer::IsOperator(this->current())) {
this->readOperator();
continue;
} else if (this->current() == '"') {
this->readStringLiteral();
continue;
} else if (std::isdigit(this->current()) || this->current() == '.') {
this->readNumber();
continue;
} else {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
}
for (auto& token : this->tokenStream) {
std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl;
}
}
void Lexer::readIdentifier() {
size_t line = this->line;
size_t column = this->column;
this->buffer << this->consume();
while (std::isalnum(this->current()))
this->buffer << this->consume();
std::string data = this->buffer.str();
Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER;
this->tokenStream.emplace_back(type, data, line, column);
this->column += data.length();
this->resetBuffer();
}
void Lexer::readOperator() {
this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column);
this->consume();
}
void Lexer::readStringLiteral() {
size_t line = this->line;
size_t column = this->column;
this->consume();
while (this->current() != '"')
this->buffer << this->consume();
std::string data = this->buffer.str();
this->consume();
this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column);
this->resetBuffer();
}
void Lexer::readNumber() {
size_t line = this->line;
size_t column = this->column;
bool floatingPoint = this->current() == '.';
this->buffer << this->consume();
while (std::isdigit(this->current()) || this->current() == '.') {
const char& cur = this->consume();
if (cur == '.' && floatingPoint) {
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
exit(1);
}
floatingPoint = cur == '.';
this->buffer << cur;
}
Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL;
std::string data = this->buffer.str();
this->tokenStream.emplace_back(type, data, line, column);
this->resetBuffer();
}
}

@ -0,0 +1,107 @@
#pragma once
#include <filesystem>
#include <string>
#include <sstream>
#include <vector>
namespace dumb {
struct Token {
enum class Type {
KEYWORD,
IDENTIFIER,
OPERATOR,
INTEGER_LITERAL,
FLOAT_LITERAL,
STRING_LITERAL,
SEMICOLON,
COMMA,
OPEN_PAREN,
CLOSE_PAREN,
OPEN_BRACE,
CLOSE_BRACE,
OPEN_BRACKET,
CLOSE_BRACKET
};
Type type;
std::string data;
size_t line;
size_t column;
Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {}
};
class Lexer {
private:
std::string sourceCode;
std::vector<Token> tokenStream;
std::stringstream buffer;
size_t line;
size_t column;
size_t pointer;
static const std::vector<std::string> Keywords;
void resetBuffer() {
this->buffer.str("");
this->buffer.clear();
}
const char& current() const {
return this->peek(0);
}
const char& next() const {
return this->peek(1);
}
const char& peek(size_t amount = 0) const {
size_t address = this->pointer + amount;
return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF;
}
const char& consume() {
const char& current = this->peek();
this->pointer++;
this->column++;
return current;
}
static bool IsOperator(const char& car) {
switch (car) {
case '+':
case '-':
case '*':
case '/':
case '=':
case ':':
case '.':
return true;
default:
return false;
}
}
static bool IsKeyword(const std::string& identifier) {
return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){
return keyword == identifier;
});
}
void readIdentifier();
void readOperator();
void readStringLiteral();
void readNumber();
public:
constexpr static char EoF = '\0';
constexpr static char EOL = '\n';
explicit Lexer(std::filesystem::path sourceFile);
void tokenize();
};
}

@ -0,0 +1,11 @@
foo : int = 55;
myFloat : float = 3.1415;
// this is a comment
/* this is a
stupid block comment
that goes for ages
*/
bar : string = "lol"; // more comments
Loading…
Cancel
Save