commit
61121a9c2b
9 changed files with 317 additions and 0 deletions
@ -0,0 +1,5 @@ |
||||
.idea |
||||
.vscode |
||||
.DS_Store |
||||
|
||||
build/** |
@ -0,0 +1,10 @@ |
||||
cmake_minimum_required(VERSION 3.21) |
||||
project(dumb) |
||||
|
||||
set(CMAKE_CXX_STANDARD 20) |
||||
|
||||
file(GLOB_RECURSE SRC "src/*.cpp") |
||||
|
||||
add_executable(dumb main.cpp ${SRC} src/Lexer.cpp src/Lexer.hpp) |
||||
|
||||
target_include_directories(dumb PUBLIC src) |
@ -0,0 +1,11 @@ |
||||
#include <memory> |
||||
|
||||
#include "Compiler.hpp" |
||||
|
||||
int main(int args, const char* argv[]) { |
||||
dumb::ArgumentList arguments(argv + 1, argv + args); |
||||
|
||||
std::unique_ptr<dumb::Compiler> app = std::make_unique<dumb::Compiler>(arguments); |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,10 @@ |
||||
#include "Compiler.hpp" |
||||
|
||||
|
||||
namespace dumb { |
||||
|
||||
Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))), |
||||
lexer(std::make_unique<Lexer>(this->sourceFile)) { |
||||
this->lexer->tokenize(); |
||||
} |
||||
} |
@ -0,0 +1,22 @@ |
||||
#pragma once |
||||
|
||||
#include <vector> |
||||
#include <string> |
||||
#include <filesystem> |
||||
#include <memory> |
||||
|
||||
#include "Lexer.hpp" |
||||
|
||||
namespace dumb { |
||||
typedef std::vector<std::string> ArgumentList; |
||||
|
||||
class Compiler { |
||||
private: |
||||
std::filesystem::path sourceFile; |
||||
std::unique_ptr<Lexer> lexer; |
||||
public: |
||||
explicit Compiler(ArgumentList file); |
||||
}; |
||||
|
||||
} |
||||
|
@ -0,0 +1,139 @@ |
||||
#include <fstream> |
||||
#include <iostream> |
||||
|
||||
#include "Lexer.hpp" |
||||
|
||||
namespace dumb { |
||||
|
||||
const std::vector<std::string> Lexer::Keywords = { |
||||
"int", |
||||
"float", |
||||
"string" |
||||
}; |
||||
|
||||
dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) { |
||||
std::ifstream file(sourceFile); |
||||
std::stringstream buffer; |
||||
|
||||
buffer << file.rdbuf(); |
||||
|
||||
this->sourceCode = buffer.str(); |
||||
|
||||
std::cout << this->sourceCode << std::endl; |
||||
} |
||||
|
||||
void Lexer::tokenize() { |
||||
while (this->current() != Lexer::EoF) { |
||||
if (this->current() == Lexer::EOL) { |
||||
this->consume(); |
||||
this->column = 1; |
||||
this->line++; |
||||
continue; |
||||
} else if (std::isspace(this->current())) { |
||||
this->consume(); |
||||
continue; |
||||
} else if (this->current() == '/' && this->next() == '/') { |
||||
while (this->current() != Lexer::EOL && this->current() != Lexer::EoF) |
||||
this->consume(); |
||||
continue; |
||||
} else if (this->current() == '/' && this->next() == '*') { |
||||
char last = this->current(); |
||||
while (true) { |
||||
const char& cur = this->consume(); |
||||
if (cur == '/' && last == '*' || cur == Lexer::EoF) |
||||
break; |
||||
last = cur; |
||||
} |
||||
continue; |
||||
} else if (this->current() == ';') { |
||||
this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column); |
||||
this->consume(); |
||||
continue; |
||||
} else if (this->current() == ',') { |
||||
this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column); |
||||
this->consume(); |
||||
continue; |
||||
} else if (std::isalpha(this->current())) { |
||||
this->readIdentifier(); |
||||
continue; |
||||
} else if (Lexer::IsOperator(this->current())) { |
||||
this->readOperator(); |
||||
continue; |
||||
} else if (this->current() == '"') { |
||||
this->readStringLiteral(); |
||||
continue; |
||||
} else if (std::isdigit(this->current()) || this->current() == '.') { |
||||
this->readNumber(); |
||||
continue; |
||||
} else { |
||||
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; |
||||
exit(1); |
||||
} |
||||
|
||||
} |
||||
|
||||
for (auto& token : this->tokenStream) { |
||||
std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl; |
||||
} |
||||
} |
||||
|
||||
void Lexer::readIdentifier() { |
||||
size_t line = this->line; |
||||
size_t column = this->column; |
||||
|
||||
this->buffer << this->consume(); |
||||
|
||||
while (std::isalnum(this->current())) |
||||
this->buffer << this->consume(); |
||||
|
||||
std::string data = this->buffer.str(); |
||||
Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER; |
||||
|
||||
this->tokenStream.emplace_back(type, data, line, column); |
||||
this->column += data.length(); |
||||
this->resetBuffer(); |
||||
} |
||||
|
||||
void Lexer::readOperator() { |
||||
this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column); |
||||
this->consume(); |
||||
} |
||||
|
||||
void Lexer::readStringLiteral() { |
||||
size_t line = this->line; |
||||
size_t column = this->column; |
||||
this->consume(); |
||||
|
||||
while (this->current() != '"') |
||||
this->buffer << this->consume(); |
||||
|
||||
std::string data = this->buffer.str(); |
||||
this->consume(); |
||||
this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column); |
||||
this->resetBuffer(); |
||||
} |
||||
|
||||
void Lexer::readNumber() { |
||||
size_t line = this->line; |
||||
size_t column = this->column; |
||||
bool floatingPoint = this->current() == '.'; |
||||
|
||||
this->buffer << this->consume(); |
||||
|
||||
while (std::isdigit(this->current()) || this->current() == '.') { |
||||
const char& cur = this->consume(); |
||||
if (cur == '.' && floatingPoint) { |
||||
std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl; |
||||
exit(1); |
||||
} |
||||
floatingPoint = cur == '.'; |
||||
this->buffer << cur; |
||||
} |
||||
Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL; |
||||
|
||||
std::string data = this->buffer.str(); |
||||
this->tokenStream.emplace_back(type, data, line, column); |
||||
this->resetBuffer(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,107 @@ |
||||
#pragma once |
||||
|
||||
#include <filesystem> |
||||
#include <string> |
||||
#include <sstream> |
||||
#include <vector> |
||||
|
||||
namespace dumb { |
||||
|
||||
struct Token { |
||||
enum class Type { |
||||
KEYWORD, |
||||
IDENTIFIER, |
||||
OPERATOR, |
||||
INTEGER_LITERAL, |
||||
FLOAT_LITERAL, |
||||
STRING_LITERAL, |
||||
SEMICOLON, |
||||
COMMA, |
||||
OPEN_PAREN, |
||||
CLOSE_PAREN, |
||||
OPEN_BRACE, |
||||
CLOSE_BRACE, |
||||
OPEN_BRACKET, |
||||
CLOSE_BRACKET |
||||
}; |
||||
|
||||
Type type; |
||||
std::string data; |
||||
size_t line; |
||||
size_t column; |
||||
|
||||
Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {} |
||||
}; |
||||
|
||||
class Lexer { |
||||
private: |
||||
std::string sourceCode; |
||||
std::vector<Token> tokenStream; |
||||
std::stringstream buffer; |
||||
size_t line; |
||||
size_t column; |
||||
size_t pointer; |
||||
|
||||
static const std::vector<std::string> Keywords; |
||||
|
||||
void resetBuffer() { |
||||
this->buffer.str(""); |
||||
this->buffer.clear(); |
||||
} |
||||
|
||||
const char& current() const { |
||||
return this->peek(0); |
||||
} |
||||
|
||||
const char& next() const { |
||||
return this->peek(1); |
||||
} |
||||
|
||||
const char& peek(size_t amount = 0) const { |
||||
size_t address = this->pointer + amount; |
||||
return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF; |
||||
} |
||||
|
||||
const char& consume() { |
||||
const char& current = this->peek(); |
||||
this->pointer++; |
||||
this->column++; |
||||
|
||||
return current; |
||||
} |
||||
|
||||
static bool IsOperator(const char& car) { |
||||
switch (car) { |
||||
case '+': |
||||
case '-': |
||||
case '*': |
||||
case '/': |
||||
case '=': |
||||
case ':': |
||||
case '.': |
||||
return true; |
||||
default: |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
static bool IsKeyword(const std::string& identifier) { |
||||
return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){ |
||||
return keyword == identifier; |
||||
}); |
||||
} |
||||
|
||||
void readIdentifier(); |
||||
void readOperator(); |
||||
void readStringLiteral(); |
||||
void readNumber(); |
||||
public: |
||||
constexpr static char EoF = '\0'; |
||||
constexpr static char EOL = '\n'; |
||||
explicit Lexer(std::filesystem::path sourceFile); |
||||
|
||||
void tokenize(); |
||||
}; |
||||
|
||||
} |
||||
|
@ -0,0 +1,11 @@ |
||||
foo : int = 55; |
||||
myFloat : float = 3.1415; |
||||
|
||||
// this is a comment |
||||
|
||||
/* this is a |
||||
stupid block comment |
||||
that goes for ages |
||||
*/ |
||||
|
||||
bar : string = "lol"; // more comments |
Loading…
Reference in new issue