initial commit

4 years ago · 61121a9c2b
commit 61121a9c2b
9 changed files with 317 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 .idea
 .vscode
 .DS_Store
 build/**
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,10 @@
 cmake_minimum_required(VERSION 3.21)
 project(dumb)
 set(CMAKE_CXX_STANDARD 20)
 file(GLOB_RECURSE SRC "src/*.cpp")
 add_executable(dumb main.cpp ${SRC} src/Lexer.cpp src/Lexer.hpp)
 target_include_directories(dumb PUBLIC src)
--- a/README.md
+++ b/README.md
@ -0,0 +1,2 @@
 # dumb
 – a compiler of sorts
--- a/main.cpp
+++ b/main.cpp
@ -0,0 +1,11 @@
 #include <memory>
 #include "Compiler.hpp"
 int main(int args, const char* argv[]) {
 	dumb::ArgumentList arguments(argv + 1, argv + args);
 	std::unique_ptr<dumb::Compiler> app = std::make_unique<dumb::Compiler>(arguments);
 	return 0;
 }
--- a/src/Compiler.cpp
+++ b/src/Compiler.cpp
@ -0,0 +1,10 @@
 #include "Compiler.hpp"
 namespace dumb {
 	Compiler::Compiler(ArgumentList file) : sourceFile(std::filesystem::current_path() / std::filesystem::path(file.at(0))),
 			lexer(std::make_unique<Lexer>(this->sourceFile)) {
 		this->lexer->tokenize();
 	}
 }
--- a/src/Compiler.hpp
+++ b/src/Compiler.hpp
@ -0,0 +1,22 @@
 #pragma once
 #include <vector>
 #include <string>
 #include <filesystem>
 #include <memory>
 #include "Lexer.hpp"
 namespace dumb {
 	typedef std::vector<std::string> ArgumentList;
 	class Compiler {
 		private:
 			std::filesystem::path sourceFile;
 			std::unique_ptr<Lexer> lexer;
 		public:
 			explicit Compiler(ArgumentList file);
 	};
 }
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -0,0 +1,139 @@
 #include <fstream>
 #include <iostream>
 #include "Lexer.hpp"
 namespace dumb {
 	const std::vector<std::string> Lexer::Keywords = {
 			"int",
 			"float",
 			"string"
 	};
 	dumb::Lexer::Lexer(std::filesystem::path sourceFile) : line(1), column(1), pointer(0) {
 		std::ifstream file(sourceFile);
 		std::stringstream buffer;
 		buffer << file.rdbuf();
 		this->sourceCode = buffer.str();
 		std::cout << this->sourceCode << std::endl;
 	}
 	void Lexer::tokenize() {
 		while (this->current() != Lexer::EoF) {
 			if (this->current() == Lexer::EOL) {
 				this->consume();
 				this->column = 1;
 				this->line++;
 				continue;
 			} else if (std::isspace(this->current())) {
 				this->consume();
 				continue;
 			} else if (this->current() == '/' && this->next() == '/') {
 				while (this->current() != Lexer::EOL && this->current() != Lexer::EoF)
 					this->consume();
 				continue;
 			} else if (this->current() == '/' && this->next() == '*') {
 				char last = this->current();
 				while (true) {
 					const char& cur = this->consume();
 					if (cur == '/' && last == '*' || cur == Lexer::EoF)
 						break;
 					last = cur;
 				}
 				continue;
 			} else if (this->current() == ';') {
 				this->tokenStream.emplace_back(Token::Type::SEMICOLON, std::string(1, this->current()), this->line, this->column);
 				this->consume();
 				continue;
 			} else if (this->current() == ',') {
 				this->tokenStream.emplace_back(Token::Type::COMMA, std::string(1, this->current()), this->line, this->column);
 				this->consume();
 				continue;
 			} else if (std::isalpha(this->current())) {
 				this->readIdentifier();
 				continue;
 			} else if (Lexer::IsOperator(this->current())) {
 				this->readOperator();
 				continue;
 			} else if (this->current() == '"') {
 				this->readStringLiteral();
 				continue;
 			} else if (std::isdigit(this->current()) || this->current() == '.') {
 				this->readNumber();
 				continue;
 			} else {
 				std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
 				exit(1);
 			}
 		}
 		for (auto& token : this->tokenStream) {
 			std::cout << "type: " << static_cast<int>(token.type) << ", data: " << token.data << ", line: " << token.line << ", column: " << token.column << std::endl;
 		}
 	}
 	void Lexer::readIdentifier() {
 		size_t line   = this->line;
 		size_t column = this->column;
 		this->buffer << this->consume();
 		while (std::isalnum(this->current()))
 			this->buffer << this->consume();
 		std::string data = this->buffer.str();
 		Token::Type type = Lexer::IsKeyword(data) ? Token::Type::KEYWORD : Token::Type::IDENTIFIER;
 		this->tokenStream.emplace_back(type, data, line, column);
 		this->column += data.length();
 		this->resetBuffer();
 	}
 	void Lexer::readOperator() {
 		this->tokenStream.emplace_back(Token::Type::OPERATOR, std::string(1, this->current()), this->line, this->column);
 		this->consume();
 	}
 	void Lexer::readStringLiteral() {
 		size_t line   = this->line;
 		size_t column = this->column;
 		this->consume();
 		while (this->current() != '"')
 			this->buffer << this->consume();
 		std::string data = this->buffer.str();
 		this->consume();
 		this->tokenStream.emplace_back(Token::Type::STRING_LITERAL, data, line, column);
 		this->resetBuffer();
 	}
 	void Lexer::readNumber() {
 		size_t line          = this->line;
 		size_t column        = this->column;
 		bool   floatingPoint = this->current() == '.';
 		this->buffer << this->consume();
 		while (std::isdigit(this->current()) || this->current() == '.') {
 			const char& cur = this->consume();
 			if (cur == '.' && floatingPoint) {
 				std::cout << "Unexpected token '" << this->current() << "' on line " << this->line << " column " << this->column << std::endl;
 				exit(1);
 			}
 			floatingPoint = cur == '.';
 			this->buffer << cur;
 		}
 		Token::Type type = floatingPoint ? Token::Type::FLOAT_LITERAL : Token::Type::INTEGER_LITERAL;
 		std::string data = this->buffer.str();
 		this->tokenStream.emplace_back(type, data, line, column);
 		this->resetBuffer();
 	}
 }
--- a/src/Lexer.hpp
+++ b/src/Lexer.hpp
@ -0,0 +1,107 @@
 #pragma once
 #include <filesystem>
 #include <string>
 #include <sstream>
 #include <vector>
 namespace dumb {
 	struct Token {
 		enum class Type {
 			KEYWORD,
 			IDENTIFIER,
 			OPERATOR,
 			INTEGER_LITERAL,
 			FLOAT_LITERAL,
 			STRING_LITERAL,
 			SEMICOLON,
 			COMMA,
 			OPEN_PAREN,
 			CLOSE_PAREN,
 			OPEN_BRACE,
 			CLOSE_BRACE,
 			OPEN_BRACKET,
 			CLOSE_BRACKET
 		};
 		Type type;
 		std::string data;
 		size_t line;
 		size_t column;
 		Token(Type type, std::string data, size_t line, size_t column) : type(type), data(std::move(data)), line(line), column(column) {}
 	};
 	class Lexer {
 		private:
 			std::string sourceCode;
 			std::vector<Token> tokenStream;
 			std::stringstream buffer;
 			size_t line;
 			size_t column;
 			size_t pointer;
 			static const std::vector<std::string> Keywords;
 			void resetBuffer() {
 				this->buffer.str("");
 				this->buffer.clear();
 			}
 			const char& current() const {
 				return this->peek(0);
 			}
 			const char& next() const {
 				return this->peek(1);
 			}
 			const char& peek(size_t amount = 0) const {
 				size_t address = this->pointer + amount;
 				return address < this->sourceCode.size() ? this->sourceCode.at(address) : Lexer::EoF;
 			}
 			const char& consume() {
 				const char& current = this->peek();
 				this->pointer++;
 				this->column++;
 				return current;
 			}
 			static bool IsOperator(const char& car) {
 				switch (car) {
 					case '+':
 					case '-':
 					case '*':
 					case '/':
 					case '=':
 					case ':':
 					case '.':
 						return true;
 					default:
 						return false;
 				}
 			}
 			static bool IsKeyword(const std::string& identifier) {
 				return std::any_of(Lexer::Keywords.begin(), Lexer::Keywords.end(), [&identifier](std::string keyword){
 					return keyword == identifier;
 				});
 			}
 			void readIdentifier();
 			void readOperator();
 			void readStringLiteral();
 			void readNumber();
 		public:
 			constexpr static char EoF = '\0';
 			constexpr static char EOL = '\n';
 			explicit Lexer(std::filesystem::path sourceFile);
 			void tokenize();
 	};
 }
--- a/tests/test.dmb
+++ b/tests/test.dmb
@ -0,0 +1,11 @@
 foo : int = 55;
 myFloat : float = 3.1415;
 // this is a comment
 /* this is a
 stupid block comment
 that goes for ages
 */
 bar : string = "lol"; // more comments