diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..e73f25f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.10) + +project("OMGL") + +# set(CMAKE_CXX_FLAGS "-O3") +# set(CMAKE_CXX_FLAGS "-O0 --coverage -ftest-coverage -fprofile-arcs") + +find_package(GTest REQUIRED) +find_package(Threads REQUIRED) + +include_directories( + "include" + ${GTEST_INCLUDE_DIRS} + ) + +set(CMAKE_CXX_STANDARD 17) + +set(SOURCE_FILES + src/preprocessor.cpp + src/lexer.cpp + src/syntax_tree.cpp + ) + +set(TEST_FILES + ) + +add_executable(OMGL src/main.cpp ${SOURCE_FILES}) + +# add_executable(Test tests/MainTest.cpp ${TEST_FILES} ${SOURCE_FILES}) + +# target_link_libraries(Test ${GTEST_LIBRARIES} Threads::Threads) diff --git a/README.md b/README.md index b42ea78..6938c2b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,39 @@ # Oh my god programming language Питонисты плакали когда увидели это! + +Реализован язык с типами int string. Я могу создавать переменные в любой степени вложенности, они корректно убираются со стека. Также корректно работает приоритет операций. Пример кода на моем языке: +```c++ +string s = "abacaba "; +for (int i = 0; i < 5; i += 1) { + string t = ""; + for (int j = 0; j < 3; j += 1) { + t += "t"; + } + s += t; + print(s); +} + +int x = (1 + 2) * 3; +print(x, x + 1, x + 2); +print(7777); + +while (x != 0) { + x -= 1; +} + +print(x); +``` + +Результат работы: +```text +abacaba ttt +abacaba tttttt +abacaba ttttttttt +abacaba tttttttttttt +abacaba ttttttttttttttt +9 10 11 +7777 +0 +END +``` \ No newline at end of file diff --git a/include/lexer.hpp b/include/lexer.hpp new file mode 100644 index 0000000..268c5e6 --- /dev/null +++ b/include/lexer.hpp @@ -0,0 +1,48 @@ +#pragma once +#include +#include + +class Lexer { + public: + struct LexerToken { + enum class Type { + None, Word, Semicolon, CurlyOpenBracket, CurlyCloseBracket, + RoundOpenBracket, RoundCloseBracket, Period, Commo, Plus, Minus, Star, + Slash, Equal, ExclamationMark, PlusEqual, MinusEqual, StarEqual, + SlashEqual, EqualEqual, ExclamationMarkEqual, PlusPlus, MinusMinus, + LAngle, RAngle, LAngleEqual, RAngleEqual, LArrow, RArrow, FOR, IF, ELSE, + WHILE, StringLiteral + }; + LexerToken() = default; + LexerToken(const LexerToken&) = default; + LexerToken(LexerToken&&) = default; + LexerToken(const Type& type, const std::string& info) : type(type), info(info) {} + LexerToken(const Type& type, std::string&& info) : type(type), info(info) {} + LexerToken(Type&& type, const std::string& info) : type(type), info(info) {} + LexerToken(Type&& type, std::string&& info) : type(type), info(info) {} + + LexerToken& operator=(LexerToken&&) = default; + LexerToken& operator=(const LexerToken&) = default; + + Type type; + std::string info; + }; + using LexerTokenList = std::list; + + void ParseText(std::string&&); + void ParseText(const std::string&); + + LexerTokenList GetTokens() const; + private: + void PushToken(); + LexerTokenList tokens; + + LexerToken::Type current_state = LexerToken::Type::None; + std::string current_info; +}; + +using LexerTokenList = Lexer::LexerTokenList; + +namespace std { +std::string to_string(Lexer::LexerToken::Type); +} diff --git a/include/preprocessor.hpp b/include/preprocessor.hpp new file mode 100644 index 0000000..f04ec9c --- /dev/null +++ b/include/preprocessor.hpp @@ -0,0 +1,5 @@ +#pragma once +#include + +std::string Preprocessor(std::string&& s); +std::string Preprocessor(const std::string& s); diff --git a/include/syntax_tree.hpp b/include/syntax_tree.hpp new file mode 100644 index 0000000..784ddc4 --- /dev/null +++ b/include/syntax_tree.hpp @@ -0,0 +1,491 @@ +#pragma once +#include +#include +#include +#include +#include "lexer.hpp" + +struct Node; +struct Variable; +struct VariableInStack; +struct Expression; + +struct Node { + Node* parent = nullptr; + Node* next = nullptr; + Node* previous = nullptr; + + void Insert(Node*); + void InsertBefore(Node*); + virtual void Run(std::vector>& stack) = 0; + virtual ~Node() {} +}; + +enum class Operation { + None, + Plus, + Minus, + Star, + Slash, + Equal, + ExclamationMark, + PlusEqual, + MinusEqual, + StarEqual, + SlashEqual, + EqualEqual, + ExclamationMarkEqual, + PlusPlus, + MinusMinus, + LAngle, + RAngle, + LAngleEqual, + RAngleEqual, + Value +}; + +enum class PriorityType { + L, R +}; + +struct TypeVariable { + enum ID { + none = 0, type_int = 1, type_string = 2 + }; + + TypeVariable() {} + TypeVariable(ID id) : id(id) {} + ID id = ID::none; + size_t size; +}; + +struct DeallocateStack : public Node { + DeallocateStack(size_t count) : count(count) {} + void Run(std::vector>& stack) override { + for (size_t i = 0; i < count; ++i) { + stack.pop_back(); + } + } + size_t count; +}; + +struct HiddenDeallocateStack : public Node { + HiddenDeallocateStack(size_t count) : count(count) {} + void Run(std::vector>& stack) override { + for (size_t i = 0; i < count; ++i) { + stack.pop_back(); + } + } + size_t count; +}; + +struct VariableInStack { + VariableInStack(TypeVariable type_variable) : type_variable(type_variable) {} + void CallOperator(std::shared_ptr& another, std::shared_ptr& result, Operation op) { + if (op == Operation::Equal || op == Operation::PlusEqual || op == Operation::MinusEqual || op == Operation::StarEqual || op == Operation::SlashEqual) { + if (type_variable.id != TypeVariable::ID::none && type_variable.id != another->type_variable.id) { + throw std::logic_error("error variable convert"); + } + if (result->type_variable.id != TypeVariable::ID::none && result->type_variable.id != another->type_variable.id) { + throw std::logic_error("error variable convert"); + } + result->Clear(); + result->type_variable.id = another->type_variable.id; + result->Allocate(); + if (op == Operation::Equal) { + Clear(); + type_variable.id = another->type_variable.id; + Allocate(); + } + if (another->type_variable.id == TypeVariable::ID::type_int) { + int& x = *static_cast(memory); + int& y = *static_cast(another->memory); + int& res = *static_cast(result->memory); + if (op == Operation::Equal) { + if (this == another.get()) { + res = x; + } else { + res = x = y; + } + } else if (op == Operation::PlusEqual) { + if (this == another.get()) { + res += x; + } else { + res = x += y; + } + } else if (op == Operation::MinusEqual) { + if (this == another.get()) { + res -= x; + } else { + res = x -= y; + } + } else if (op == Operation::StarEqual) { + if (this == another.get()) { + res *= x; + } else { + res = x *= y; + } + } else { + if (this == another.get()) { + res /= x; + } else { + res = x /= y; + } + } + } else if (another->type_variable.id == TypeVariable::ID::type_string) { + std::string& x = *static_cast(memory); + std::string& y = *static_cast(another->memory); + std::string& res = *static_cast(result->memory); + if (op == Operation::Equal) { + if (this == another.get()) { + res = x; + } else { + res = x = y; + } + } else if (op == Operation::PlusEqual) { + if (this == another.get()) { + res += x; + } else { + res = x += y; + } + } else { + throw std::logic_error("invalid operation string"); + } + } + return; + } else if (op == Operation::EqualEqual || op == Operation::ExclamationMarkEqual || op == Operation::LAngle || op == Operation::RAngle || op == Operation::LAngleEqual || op == Operation::RAngleEqual) { + result->Clear(); + result->type_variable.id = TypeVariable::ID::type_int; + result->Allocate(); + int& res = *static_cast(result->memory); + + if (another->type_variable.id == TypeVariable::ID::type_int && type_variable.id == TypeVariable::ID::type_int) { + auto& x = *static_cast(memory); + auto& y = *static_cast(another->memory); + if (op == Operation::EqualEqual) { + res = (x == y); + } else if (op == Operation::ExclamationMarkEqual) { + res = (x != y); + } else if (op == Operation::LAngle) { + res = (x < y); + } else if (op == Operation::RAngle) { + res = (x > y); + } else if (op == Operation::LAngleEqual) { + res = (x <= y); + } else if (op == Operation::RAngleEqual) { + res = (x >= y); + } + } else if (another->type_variable.id == TypeVariable::ID::type_string && type_variable.id == TypeVariable::ID::type_string) { + auto& x = *static_cast(memory); + auto& y = *static_cast(another->memory); + if (op == Operation::EqualEqual) { + res = (x == y); + } else if (op == Operation::ExclamationMarkEqual) { + res = (x != y); + } else if (op == Operation::LAngle) { + res = (x < y); + } else if (op == Operation::RAngle) { + res = (x > y); + } else if (op == Operation::LAngleEqual) { + res = (x <= y); + } else if (op == Operation::RAngleEqual) { + res = (x >= y); + } + } + } else if (op == Operation::Plus || op == Operation::Minus || op == Operation::Star || op == Operation::Slash) { + result->Clear(); + result->type_variable.id = another->type_variable.id; + result->Allocate(); + if (another->type_variable.id == TypeVariable::ID::type_int && type_variable.id == TypeVariable::ID::type_int) { + int& x = *static_cast(memory); + int& y = *static_cast(another->memory); + int& res = *static_cast(result->memory); + if (op == Operation::Plus) { + res = (x + y); + } else if (op == Operation::Minus) { + res = (x - y); + } else if (op == Operation::Star) { + res = (x * y); + } else if (op == Operation::Slash) { + res = (x / y); + } + } else if (another->type_variable.id == TypeVariable::ID::type_string && type_variable.id == TypeVariable::ID::type_string) { + std::string& x = *static_cast(memory); + std::string& y = *static_cast(another->memory); + std::string& res = *static_cast(result->memory); + if (op == Operation::Plus) { + res = (x + y); + } + } + } + return; + + + + + + + if (type_variable.id == TypeVariable::ID::type_int) { + int& x = *static_cast(memory); + if (another->type_variable.id == TypeVariable::ID::type_int) { + int& y = *static_cast(another->memory); + result->type_variable.id = TypeVariable::ID::type_int; + result->Clear(); + result->Allocate(); + int& res = *static_cast(result->memory); + if (op == Operation::Equal) { + /* + if (this == &another) + res = y; + else + x = res = y; + */ + } else if (op == Operation::Plus) { + res = x + y; + } else if (op == Operation::Star) { + res = x * y; + } else if (op == Operation::LAngle) { + res = x < y; + } + } + } else if (type_variable.id == TypeVariable::ID::type_string) { + using std::string; + string& x = *static_cast(memory); + if (another->type_variable.id == TypeVariable::ID::type_string) { + string& y = *static_cast(another->memory); + result->type_variable.id = TypeVariable::ID::type_string; + result->Clear(); + result->Allocate(); + string& res = *static_cast(result->memory); + if (op == Operation::Equal) { + if (this == another.get()) + res = y; + else + x = res = y; + } + } + } + } + void Clear() { + if (memory) { + if (type_variable.id == TypeVariable::ID::type_int) { + delete static_cast(memory); + } else if (type_variable.id == TypeVariable::ID::type_string) { + delete static_cast(memory); + } + } + memory = nullptr; + } + void Allocate() { + if (type_variable.id == TypeVariable::ID::type_int) { + memory = new int(15); + } else if (type_variable.id == TypeVariable::ID::type_string) { + memory = new std::string("lkajsdf"); + } + } + TypeVariable type_variable; + void* memory = nullptr; +}; + +struct Variable : public Node { + Variable(std::string type, std::string name) : type(std::move(type)), name(std::move(name)) {} + void Run(std::vector>& stack) override; + std::string type; + std::string name; + Expression* default_value = nullptr; + // TYPE + // STORAGE +}; + +struct Container : public Node { + void AddChildren(Node*); + void Run(std::vector>& stack) override { + Node* cur = children_begin; + while (cur) { + cur->Run(stack); + cur = cur->next; + } + } + + Node* children_begin = nullptr; + Node* children_end = nullptr; +}; + +struct CodeBlock : public Container {}; + +struct Function : public Node { + Function(const std::string& name) : name(name), code(new CodeBlock()) {} + Function(std::string&& name) : name(name) {} + void Run(std::vector>& stack) override { + code->Run(stack); + // TODO + } + std::string name; + CodeBlock* code; + // RETURN VALUE +}; + +struct CreateVariables : Node { + CreateVariables(size_t count) : count(count) {} + void Run(std::vector>& stack) override {} + size_t count; +}; + +struct CallFunction : public Node { + CallFunction(std::string name_function) : name_function(std::move(name_function)) {} + void Run(std::vector>& stack) override; + std::string name_function; + std::vector parameters; +}; + +using stack_pointer = size_t; + +struct Expression : public Node { + using Types = std::variant; + Expression() {} + Expression(Types type1, Types type2, Operation op) : type1(std::move(type1)), type2(std::move(type2)), op(op) { + if (op == Operation::Value) { + count = 1; + } else { + count = 1; + auto e_type1 = std::get(type1); + auto e_type2 = std::get(type2); + count += e_type1->count; + count += e_type2->count; + + position_result = count; + e_type2->AddStackPointer(e_type1->count); + } + } + void AddStackPointer(stack_pointer d) { + position_result += d; + } + void Run(std::vector>& stack) override { + if (op == Operation::Value) { + if (std::holds_alternative(type1)) { + stack.push_back(*(stack.end() - std::get(type1))); + } else { + stack.push_back(std::make_shared(TypeVariable())); + if (std::get(type1)[0] == '\"') { + stack.back()->type_variable = TypeVariable(TypeVariable::ID::type_string); + stack.back()->Allocate(); + (*(std::string*) stack.back()->memory) = + std::get(type1).substr(1, std::get(type1).size() - 2); + } else { + stack.back()->type_variable = TypeVariable(TypeVariable::ID::type_int); + stack.back()->Allocate(); + (*(int*) stack.back()->memory) = std::atoi(std::get(type1).c_str()); + } + } + } else { + std::get(type1)->Run(stack); + std::shared_ptr x = std::move(stack.back()); + stack.pop_back(); + std::get(type2)->Run(stack); + std::shared_ptr y = std::move(stack.back()); + stack.pop_back(); + stack.push_back(std::make_shared(TypeVariable())); + x->CallOperator(y, stack.back(), op); + } + } + static size_t GetPriority(Operation); + static PriorityType GetPriorityType(Operation); + static Operation Convert(Lexer::LexerToken::Type); + Types type1, type2; + Operation op = Operation::None; + stack_pointer position_result = 0; + size_t count; +}; + +struct FOR : public Node { + void Run(std::vector>& stack) override { + while (true) { + check->Run(stack); + auto res = *((int*) stack.back()->memory); + stack.pop_back(); + if (!res) { + break; + } + code->Run(stack); + size_t sz = stack.size(); + tick->Run(stack); + while (stack.size() != sz) { + stack.pop_back(); + } + } + } + Variable* var = nullptr; + Expression* check = nullptr; + Expression* tick = nullptr; + CodeBlock* code = nullptr; +}; + +struct IF : public Node { + void Run(std::vector>& stack) override { + if (check) { + check->Run(stack); + auto res = *((int*) stack.back()->memory); + stack.pop_back(); + if (res) { + code->Run(stack); + } + } + } + Expression* check = nullptr; + CodeBlock* code = nullptr; +}; + +struct WHILE : public Node { + void Run(std::vector>& stack) override { + if (check) { + while (true) { + check->Run(stack); + auto res = *((int*) stack.back()->memory); + stack.pop_back(); + if (res) { + code->Run(stack); + } else { + break; + } + } + } + } + Expression* check = nullptr; + CodeBlock* code = nullptr; +}; + +class SyntaxTree { + public: + SyntaxTree() {} + void PushLexerTokenList(const LexerTokenList&); + void Compile(); + void Run(); + private: + bool IsTypeName(Node*, const std::string&); + bool IsVariableName(Node*, const std::string&); + bool IsFunctionName(Node*, const std::string&); + CodeBlock* ParseCurlyBrackets(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushCurlyBrackets(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushRoundBrackets(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushFunction(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushLine(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + Variable* ParseNewVariable(const LexerTokenList&, LexerTokenList::const_iterator&); + void PushNewVariable(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushExpression(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushParametersFunction(CallFunction*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushCallFunction(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushFOR(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushSignatureFOR(FOR*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushIF(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushSignatureIF(IF*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushWHILE(Container*, const LexerTokenList&, LexerTokenList::const_iterator&); + void PushSignatureWHILE(WHILE*, const LexerTokenList&, LexerTokenList::const_iterator&); + Expression* ParseExpression(LexerTokenList::const_iterator, LexerTokenList::const_iterator); + Expression* ParseExpression(const LexerTokenList&, LexerTokenList::const_iterator&); + void PushDeallocateStack(Node*, size_t); + stack_pointer GetCountStackOffsetForVariable(Node* node, std::string name); + void LinkVariables(Node*); + void LinkVariablesInExpression(Expression*, Node*); + CodeBlock* tree_ = nullptr; +}; + diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..9f24113 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,253 @@ +#include +#include +#include "lexer.hpp" + +bool IsNumber(char x) { + return x >= '0' && x <= '9'; +} + +std::unordered_set special_symbols = {';', '{', '}', '(', ')', '.', ','}; + +bool IsSpecialSymbol(char x) { + return special_symbols.find(x) != special_symbols.end(); +} + +std::unordered_set break_symbols = {'+', '-', '*', '/', '=', '!', '<', '>'}; + +bool IsBreakSymbol(char x) { + return break_symbols.find(x) != special_symbols.end(); +} + +void Lexer::ParseText(std::string&& text) { + using Type = LexerToken::Type; + for (char x: text) { + if (x == '\"') { + if (current_state != Type::StringLiteral) { + current_info += x; + PushToken(); + current_state = Type::StringLiteral; + } else { + current_info += x; + PushToken(); + } + } else if (current_state == Type::StringLiteral) { + current_info += x; + } else if (std::isspace(x)) { + PushToken(); + } else if (IsBreakSymbol(x)) { + if (x == '=') { + if (current_state == Type::Plus) { + current_state = Type::PlusEqual; + PushToken(); + } else if (current_state == Type::Minus) { + current_state = Type::MinusEqual; + PushToken(); + } else if (current_state == Type::Star) { + current_state = Type::StarEqual; + PushToken(); + } else if (current_state == Type::Slash) { + current_state = Type::SlashEqual; + PushToken(); + } else if (current_state == Type::Equal) { + current_state = Type::EqualEqual; + PushToken(); + } else if (current_state == Type::ExclamationMark) { + current_state = Type::ExclamationMarkEqual; + PushToken(); + } else if (current_state == Type::LAngle) { + current_state = Type::LAngleEqual; + PushToken(); + } else if (current_state == Type::RAngle) { + current_state = Type::RAngleEqual; + PushToken(); + } else { + PushToken(); + current_state = Type::Equal; + } + } else if (x == '+') { + if (current_state == Type::Plus) { + current_state = Type::PlusPlus; + PushToken(); + } else { + PushToken(); + current_state = Type::Plus; + } + } else if (x == '-') { + if (current_state == Type::Minus) { + current_state = Type::MinusMinus; + PushToken(); + } else if (current_state == Type::LAngle) { + current_state = Type::LArrow; + PushToken(); + } else { + PushToken(); + current_state = Type::Minus; + } + } else if (x == '>') { + if (current_state == Type::Minus) { + current_state = Type::RArrow; + PushToken(); + } else { + PushToken(); + current_state = Type::RAngle; + } + } else { + PushToken(); + if (x == '+') { + current_state = Type::Plus; + } else if (x == '-') { + current_state = Type::Minus; + } else if (x == '*') { + current_state = Type::Star; + } else if (x == '/') { + current_state = Type::Slash; + } else if (x == '=') { + current_state = Type::Equal; + } else if (x == '!') { + current_state = Type::ExclamationMark; + } else if (x == '<') { + current_state = Type::LAngle; + } else if (x == '>') { + current_state = Type::RAngle; + } else { + throw std::logic_error("incorrect operator"); + } + } + } else if (current_state == Type::None || IsSpecialSymbol(x)) { + PushToken(); + if (x == ';') { + current_state = Type::Semicolon; + PushToken(); + } else if (x == '{') { + current_state = Type::CurlyOpenBracket; + PushToken(); + } else if (x == '}') { + current_state = Type::CurlyCloseBracket; + PushToken(); + } else if (x == '(') { + current_state = Type::RoundOpenBracket; + PushToken(); + } else if (x == ')') { + current_state = Type::RoundCloseBracket; + PushToken(); + } else if (x == '.') { + current_state = Type::Period; + PushToken(); + } else if (x == ',') { + current_state = Type::Commo; + PushToken(); + } else { + current_state = Type::Word; + current_info += x; + } + } else if (current_state == Type::Word) { + current_info += x; + } + } + + PushToken(); +} + +void Lexer::ParseText(const std::string& text) { + std::string copy(text); + ParseText(std::move(copy)); +} + +void Lexer::PushToken() { + using Type = LexerToken::Type; + if (current_state != Type::None) { + if (current_state == Type::Word && current_info == "for") { + current_state = Type::FOR; + current_info.clear(); + } else if (current_state == Type::Word && current_info == "while") { + current_state = Type::WHILE; + current_info.clear(); + } else if (current_state == Type::Word && current_info == "if") { + current_state = Type::IF; + current_info.clear(); + } else if (current_state == Type::Word && current_info == "else") { + current_state = Type::ELSE; + current_info.clear(); + } + tokens.emplace_back(current_state, std::move(current_info)); + current_state = Type::None; + current_info.clear(); + } +} + +LexerTokenList Lexer::GetTokens() const { + return tokens; +} + +namespace std { +std::string to_string(Lexer::LexerToken::Type x) { + using Type = Lexer::LexerToken::Type; + if (x == Type::None) { + return "None"; + } else if (x == Type::Word) { + return "Word"; + } else if (x == Type::Semicolon) { + return "Semicolon"; + } else if (x == Type::CurlyOpenBracket) { + return "CurlyOpenBrackets"; + } else if (x == Type::CurlyCloseBracket) { + return "CurlyCloseBrackets"; + } else if (x == Type::RoundOpenBracket) { + return "RoundOpenBrackets"; + } else if (x == Type::RoundCloseBracket) { + return "RoundCloseBrackets"; + } else if (x == Type::Commo) { + return "Commo"; + } else if (x == Type::Plus) { + return "Plus"; + } else if (x == Type::Minus) { + return "Minus"; + } else if (x == Type::Star) { + return "Star"; + } else if (x == Type::Slash) { + return "Slash"; + } else if (x == Type::Equal) { + return "Equal"; + } else if (x == Type::ExclamationMark) { + return "ExclamationMark"; + } else if (x == Type::PlusEqual) { + return "PlusEqual"; + } else if (x == Type::MinusEqual) { + return "MinusEqual"; + } else if (x == Type::StarEqual) { + return "StarEqual"; + } else if (x == Type::SlashEqual) { + return "SlashEqual"; + } else if (x == Type::EqualEqual) { + return "EqualEqual"; + } else if (x == Type::ExclamationMarkEqual) { + return "ExclamationMarkEqual"; + } else if (x == Type::PlusPlus) { + return "PlusPlus"; + } else if (x == Type::MinusMinus) { + return "MinusMinus"; + } else if (x == Type::LAngle) { + return "LAngle"; + } else if (x == Type::RAngle) { + return "RAngle"; + } else if (x == Type::LAngleEqual) { + return "LAngleEqual"; + } else if (x == Type::RAngleEqual) { + return "RAngleEqual"; + } else if (x == Type::LArrow) { + return "LArrow"; + } else if (x == Type::RArrow) { + return "RArrow"; + } else if (x == Type::FOR) { + return "for"; + } else if (x == Type::WHILE) { + return "while"; + } else if (x == Type::IF) { + return "if"; + } else if (x == Type::ELSE) { + return "else"; + } else { + return "ERROR"; + } +} +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..0a5b4db --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,45 @@ +#include +#include +#include "preprocessor.hpp" +#include "lexer.hpp" +#include "syntax_tree.hpp" + +std::string program; + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cout << "Invalid file" << std::endl; + return 1; + } + std::ifstream fin(argv[1]); + + { + std::string str; + while (getline(fin, str)) { + str.push_back('\n'); + program += str; + } + } + + // std::cout << program << std::endl; + + program = Preprocessor(std::move(program)); + + // std::cout << program << std::endl; + + Lexer lexer; + lexer.ParseText(program); + auto tokens = lexer.GetTokens(); + /* + for (auto i: tokens) { + std::cout << std::to_string(i.type) << " " << i.info << "; "; + } + std::cout << std::endl; + */ + SyntaxTree tree; + tree.PushLexerTokenList(tokens); + tree.Compile(); + tree.Run(); + std::cout << "END"; +} + diff --git a/src/preprocessor.cpp b/src/preprocessor.cpp new file mode 100644 index 0000000..536c96b --- /dev/null +++ b/src/preprocessor.cpp @@ -0,0 +1,75 @@ +#include "preprocessor.hpp" +#include + +#include // TODO + +std::string DeleteComments(std::string&& text) { + std::vector> text_without_comments; + size_t n = text.size(); + int begin_segment = 0; + + text_without_comments.emplace_back(begin_segment, n); + + for (size_t i = 0; i < n; ++i) { + if (text[i] == '/' && i + 1 < n && text[i + 1] == '/') { + text_without_comments.pop_back(); + text_without_comments.emplace_back(begin_segment, i); + while (i + 1 < n && text[i + 1] != '\n') { + ++i; + } + begin_segment = i + 1; + text_without_comments.emplace_back(begin_segment, n); + } + } + + int current_place = 0; + for (auto i: text_without_comments) { + std::copy(text.begin() + i.first, text.begin() + i.second, text.begin() + + current_place); + current_place += i.second - i.first; + } + text.resize(current_place); + return std::move(text); +} + +std::string DeleteMultiLineComments(std::string&& text) { + std::vector> text_without_comments; + size_t n = text.size(); + int begin_segment = 0; + + text_without_comments.emplace_back(begin_segment, n); + + for (size_t i = 0; i < n; ++i) { + if (text[i] == '/' && i + 1 < n && text[i + 1] == '*') { + text_without_comments.pop_back(); + text_without_comments.emplace_back(begin_segment, i); + i++; + while (i < n && (text[i - 1] != '*' || text[i] != '/')) { + ++i; + } + begin_segment = i + 1; + text_without_comments.emplace_back(begin_segment, n); + } + } + + int current_place = 0; + for (auto i: text_without_comments) { + std::copy(text.begin() + i.first, text.begin() + i.second, text.begin() + + current_place); + current_place += i.second - i.first; + } + text.resize(current_place); + return std::move(text); +} + +std::string Preprocessor(std::string&& text) { + text = DeleteComments(std::move(text)); + text = DeleteMultiLineComments(std::move(text)); + return std::move(text); +} + +std::string Preprocessor(const std::string& text) { + std::string copy(text); + copy = Preprocessor(std::move(copy)); + return copy; +} diff --git a/src/syntax_tree.cpp b/src/syntax_tree.cpp new file mode 100644 index 0000000..f622338 --- /dev/null +++ b/src/syntax_tree.cpp @@ -0,0 +1,680 @@ +#include +#include +#include +#include +#include "syntax_tree.hpp" + +using Type = Lexer::LexerToken::Type; + +std::unordered_set special_words = {"def"}; + +bool IsOperation(Type type) { + switch (type) { + case Type::Equal: + case Type::Plus: + case Type::Minus: + case Type::Star: + case Type::Slash: + case Type::ExclamationMark: + case Type::PlusEqual: + case Type::MinusEqual: + case Type::StarEqual: + case Type::SlashEqual: + case Type::EqualEqual: + case Type::ExclamationMarkEqual: + // case Type::PlusPlus: + // case Type::MinusMinus: + case Type::LAngle: + case Type::RAngle: + case Type::LAngleEqual: + case Type::RAngleEqual:return true; + default:return false; + } +} + +void Node::Insert(Node* node) { + node->previous = this; + node->parent = parent; + if (next) { + next->previous = node; + node->next = next; + } else { + if (parent) { + dynamic_cast(parent)->children_end = node; + } + } + next = node; +} + +void Node::InsertBefore(Node* node) { + node->next = this; + node->parent = parent; + if (previous) { + previous->next = node; + node->previous = previous; + } else { + if (parent) { + dynamic_cast(parent)->children_begin = node; + } + } + previous = node; +} + +void Container::AddChildren(Node* node) { + if (children_end) { + children_end->next = node; + node->previous = children_end; + children_end = node; + node->parent = this; + } else { + children_begin = children_end = node; + node->parent = this; + } +} + +size_t Expression::GetPriority(Operation op) { + switch (op) { + case Operation::Equal: + case Operation::StarEqual: + case Operation::SlashEqual: + case Operation::PlusEqual: + case Operation::MinusEqual: + return 0; + case Operation::EqualEqual: + case Operation::ExclamationMarkEqual: + return 6; + case Operation::LAngle: + case Operation::LAngleEqual: + case Operation::RAngle: + case Operation::RAngleEqual: + return 7; + case Operation::Plus: + case Operation::Minus: + return 9; + case Operation::Star: + case Operation::Slash: + return 10; + } + throw std::logic_error("unsupported operator"); +} + +Operation Expression::Convert(Lexer::LexerToken::Type type) { + switch (type) { + case Type::Equal:return Operation::Equal; + case Type::LAngle:return Operation::LAngle; + case Type::Plus:return Operation::Plus; + case Type::Minus:return Operation::Minus; + case Type::Star:return Operation::Star; + case Type::Slash:return Operation::Slash; + case Type::ExclamationMark:return Operation::ExclamationMark; + case Type::PlusEqual:return Operation::PlusEqual; + case Type::MinusEqual:return Operation::MinusEqual; + case Type::StarEqual:return Operation::StarEqual; + case Type::SlashEqual:return Operation::SlashEqual; + case Type::EqualEqual:return Operation::EqualEqual; + case Type::ExclamationMarkEqual:return Operation::ExclamationMarkEqual; + // case Type::PlusPlus: + // case Type::MinusMinus: + case Type::RAngle:return Operation::RAngle; + case Type::LAngleEqual:return Operation::LAngleEqual; + case Type::RAngleEqual:return Operation::RAngleEqual; + } + std::cerr << std::to_string(type) << std::endl; + throw std::logic_error("unsupported operator"); +} + +PriorityType Expression::GetPriorityType(Operation op) { + switch (op) { + case Operation::Equal: + case Operation::StarEqual: + case Operation::SlashEqual: + case Operation::PlusEqual: + case Operation::MinusEqual: + return PriorityType::R; + case Operation::EqualEqual: + case Operation::ExclamationMarkEqual: + case Operation::LAngle: + case Operation::LAngleEqual: + case Operation::RAngle: + case Operation::RAngleEqual: + case Operation::Plus: + case Operation::Minus: + case Operation::Star: + case Operation::Slash: + return PriorityType::L; + } + + throw std::logic_error("unsupported operator"); +} + +bool SyntaxTree::IsTypeName(Node* node, const std::string& str) { + return str == "int" || str == "string"; +} + +bool SyntaxTree::IsVariableName(Node* node, const std::string& name) { + while (node) { + if (auto var = dynamic_cast(node)) { + if (var->name == name) { + return true; + } + } + if (node->previous) { + node = node->previous; + } else { + node = node->parent; + } + } + return false; +} + +bool SyntaxTree::IsFunctionName(Node*, const std::string& str) { + return str == "print"; +} + +void SyntaxTree::PushLexerTokenList(const LexerTokenList& list) { + if (!tree_) { + tree_ = new CodeBlock(); + } + for (auto it = list.begin(); it != list.end(); ++it) { + PushLine(tree_, list, it); + } +} + +CodeBlock* SyntaxTree::ParseCurlyBrackets(Container* container, + const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + auto* current = new CodeBlock(); + current->previous = container->children_end; + current->parent = container; + + for (; it->type != Type::CurlyCloseBracket; ++it) { + PushLine(current, list, it); + } + return current; +} + +void SyntaxTree::PushCurlyBrackets(Container* container, const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + container->AddChildren(ParseCurlyBrackets(container, list, it)); +} + +/* +void SyntaxTree::PushRoundBrackets(Container* container, const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + RoundBrackets* current = new RoundBrackets(); + container->AddChildren(current); + + for (; it->type != Type::RoundCloseBracket; ++it) { + if (it->type == Type::Word) { + if (IsTypeName(container->children_end, it->info)) { + std::string type = it->info; + ++it; + if (it->type == Type::Word) { + container->AddChildren(new Variable(std::move(type), it->info)); + } + } else if (IsVariableName(container->children_end, it->info)) { + ++it; + // TODO + } + } else { + throw std::logic_error("I expected type or name"); + } + } +} + */ + +/* +void SyntaxTree::PushFunction(Container* container, const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + if (it->type == Type::Word && !IsSpecialWord(it->info)) { + auto func = new Function(it->info); + ++it; + if (it->type == Type::RoundOpenBracket) { + PushRoundBrackets(func->parameters, list, ++it); + ++it; + if (it->type == Type::RArrow) { + ++it; + if (it->type == Type::Word && IsTypeName(container->children_end, it->info)) { + ++it; + if (it->type == Type::CurlyOpenBracket) { + PushCurlyBrackets(func->code, list, ++it); + container->AddChildren(func); + } else { + throw std::logic_error("Invalid function body"); + } + } else { + throw std::logic_error("Invalit return type"); + } + } else { + throw std::logic_error("Missing -> in function"); + } + } else { + throw std::logic_error("Invalid function parameters"); + } + } else { + throw std::logic_error("Invalid function name"); + } +} + */ + +void SyntaxTree::PushLine(Container* container, const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + if (it->type == Type::CurlyOpenBracket) { + PushCurlyBrackets(container, list, ++it); + } else if (it->type == Type::RoundOpenBracket) { + PushExpression(container, list, it); + // PushRoundBrackets(container, list, ++it); + } else if (it->type == Type::IF) { + PushIF(container, list, it); + } else if (it->type == Type::FOR) { + PushFOR(container, list, it); + } else if (it->type == Type::WHILE) { + PushWHILE(container, list, it); + } else if (it->type == Type::Word) { + auto place = container->children_end; + if (!place) + place = container; + if (it->info == "def") { + // PushFunction(container, list, ++it); + } else if (IsTypeName(place, it->info)) { + PushNewVariable(container, list, it); + } else if (IsVariableName(place, it->info)) { + PushExpression(container, list, it); + } else if (IsFunctionName(place, it->info)) { + PushCallFunction(container, list, it); + } + } +} + +Variable* SyntaxTree::ParseNewVariable(const LexerTokenList& list, LexerTokenList::const_iterator& it) { + std::string type = it->info; + ++it; + if (it->type == Type::Word) { + auto var = new Variable(std::move(type), it->info); + ++it; + if (it->type == Type::Semicolon) + return var; + if (it->type == Type::Equal) { + --it; + var->default_value = ParseExpression(list, it); + if (it->type == Type::Semicolon) { + return var; + } else { + throw std::logic_error("Expected ;"); + } + } else { + throw std::logic_error("Expected ="); + } + } else { + throw std::logic_error("Expected variable name"); + } +} + +void SyntaxTree::PushNewVariable(Container* container, const LexerTokenList& list, LexerTokenList::const_iterator& it) { + container->AddChildren(ParseNewVariable(list, it)); +} + +void SyntaxTree::PushExpression(Container* container, const LexerTokenList& list, LexerTokenList::const_iterator& it) { + container->AddChildren(ParseExpression(list, it)); +} + +void SyntaxTree::PushParametersFunction(CallFunction* call_function, + const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + if (it->type != Type::RoundOpenBracket) { + throw std::logic_error("expected ( after function name"); + } + while (it->type != Type::RoundCloseBracket) { + auto expr = ParseExpression(list, ++it); + call_function->parameters.push_back(new Expression(new Expression((size_t) 0, nullptr, Operation::Value), + expr, + Operation::Equal)); + auto& t = call_function->parameters.back(); + t->parent = call_function; + std::get(t->type1)->parent = t; + std::get(t->type2)->parent = t; + } + for (size_t i = 0; i < call_function->parameters.size(); ++i) { + std::get(call_function->parameters[i]->type1)->type1 = call_function->parameters.size() - i; + } + ++it; +} + +void SyntaxTree::PushCallFunction(Container* container, + const LexerTokenList& list, + LexerTokenList::const_iterator& it) { + auto function = new CallFunction(it->info); + PushParametersFunction(function, list, ++it); + container->AddChildren(new CreateVariables(function->parameters.size())); + container->AddChildren(function); + container->AddChildren(new HiddenDeallocateStack(function->parameters.size())); +} + +void SyntaxTree::PushFOR(Container* container, + const LexerTokenList& list, + std::list::const_iterator& it) { + auto c = new Container; + auto f = new FOR(); + container->AddChildren(c); + PushSignatureFOR(f, list, ++it); + c->AddChildren(f->var); + c->AddChildren(f); + if (it->type != Type::CurlyOpenBracket) { + throw std::logic_error("expected { after for (...)"); + } + f->code = ParseCurlyBrackets(c, list, ++it); + f->code->parent = f; + f->code->previous = nullptr; +} + +void SyntaxTree::PushSignatureFOR(FOR* node_for, + const LexerTokenList& list, + std::list::const_iterator& it) { + if (it->type != Type::RoundOpenBracket) + throw std::logic_error("expected ( after for"); + ++it; + if (it->type == Type::Semicolon) { + ++it; + } else { + if (it->type != Type::Word || !IsTypeName(node_for, it->info)) { + throw std::logic_error("expected type after for ("); + } + node_for->var = ParseNewVariable(list, it); + node_for->var->parent = node_for; + + if (it->type != Type::Semicolon) { + throw std::logic_error("expected ; after for ( type name"); + } + ++it; + } + if (it->type == Type::Semicolon) { + ++it; + } else { + node_for->check = ParseExpression(list, it); + node_for->check->parent = node_for; + if (it->type != Type::Semicolon) { + throw std::logic_error("expected ; after for ( type name; expr"); + } + ++it; + } + if (it->type == Type::RoundCloseBracket) { + } else { + node_for->tick = ParseExpression(list, it); + node_for->tick->parent = node_for; + if (it->type != Type::RoundCloseBracket) { + throw std::logic_error("expected ) after for ( type name; expr; expr"); + } + ++it; + } +} + +void SyntaxTree::PushIF(Container* container, + const LexerTokenList& list, + std::list::const_iterator& it) { + auto f = new IF(); + container->AddChildren(f); + PushSignatureIF(f, list, ++it); + f->check->parent = f; + if (it->type != Type::CurlyOpenBracket) { + throw std::logic_error("expected { after if (...)"); + } + f->code = ParseCurlyBrackets(container, list, ++it); + f->code->parent = f; +} + +void SyntaxTree::PushSignatureIF(IF* node_if, + const LexerTokenList& list, + std::list::const_iterator& it) { + if (it->type != Type::RoundOpenBracket) + throw std::logic_error("expected ( after if"); + ++it; + node_if->check = ParseExpression(list, it); + node_if->check->parent = node_if; + if (it->type != Type::RoundCloseBracket) { + throw std::logic_error("expected ) after if(..."); + } + ++it; +} + +void SyntaxTree::PushWHILE(Container* container, + const LexerTokenList& list, + std::list::const_iterator& it) { + auto f = new WHILE(); + container->AddChildren(f); + PushSignatureWHILE(f, list, ++it); + if (it->type != Type::CurlyOpenBracket) { + throw std::logic_error("expected { after while (...)"); + } + f->code = ParseCurlyBrackets(container, list, ++it); +} + +void SyntaxTree::PushSignatureWHILE(WHILE* node_if, + const LexerTokenList& list, + std::list::const_iterator& it) { + if (it->type != Type::RoundOpenBracket) + throw std::logic_error("expected ( after while"); + ++it; + node_if->check = ParseExpression(list, it); + node_if->check->parent = node_if; + if (it->type != Type::RoundCloseBracket) { + throw std::logic_error("expected ) after while(..."); + } + ++it; +} + +Expression* SyntaxTree::ParseExpression(LexerTokenList::const_iterator l, LexerTokenList::const_iterator r) { + if (std::next(l) == r) { + return new Expression(l->info, nullptr, Operation::Value); + } + { + auto it = l; + if (it->type == Type::RoundOpenBracket) { + int balance = 1; + do { + ++it; + if (it->type == Type::RoundOpenBracket) { + ++balance; + } else if (it->type == Type::RoundCloseBracket) { + --balance; + } + } while (balance != 0); + } + ++it; + if (it == r) { + return ParseExpression(std::next(l), std::prev(r)); + } + } + auto mid = l; + size_t current_priority = std::numeric_limits::max(); + for (auto it = l; std::next(it) != r; ++it) { + if (it->type == Type::RoundOpenBracket) { + int balance = 1; + do { + ++it; + if (it->type == Type::RoundOpenBracket) { + ++balance; + } else if (it->type == Type::RoundCloseBracket) { + --balance; + } + } while (balance != 0); + } + ++it; + + size_t tmp = Expression::GetPriority(Expression::Convert(it->type)); + if (tmp < current_priority) { + mid = it; + current_priority = tmp; + } else if (tmp == current_priority) { + if (Expression::GetPriorityType(Expression::Convert(it->type)) == PriorityType::R) { + mid = it; + } + } + } + return new Expression(ParseExpression(l, mid), ParseExpression(std::next(mid), r), Expression::Convert(mid->type)); +} + +Expression* SyntaxTree::ParseExpression(const LexerTokenList& list, + std::list::const_iterator& it) { + auto l = it; + int balance = 0; + while (it->type == Type::Word || IsOperation(it->type) || it->type == Type::StringLiteral + || it->type == Type::RoundOpenBracket || it->type == Type::RoundCloseBracket) { + if (it->type == Type::RoundOpenBracket) { + ++balance; + } else if (it->type == Type::RoundCloseBracket) { + if (balance == 0) { + break; + } else { + --balance; + } + } + ++it; + } + return ParseExpression(l, it); +} + +void SyntaxTree::PushDeallocateStack(Node* node, size_t count_variables = 0) { + if (auto code_block = dynamic_cast(node)) { + if (code_block->children_begin) { + PushDeallocateStack(code_block->children_begin); + } + } + if (auto if_ = dynamic_cast(node)) { + PushDeallocateStack(if_->code); + } + if (auto for_ = dynamic_cast(node)) { + PushDeallocateStack(for_->code); + } + if (dynamic_cast(node)) { + ++count_variables; + } + if (auto expr = dynamic_cast(node)) { + node->Insert(new DeallocateStack(1)); + } + if (node->next) { + PushDeallocateStack(node->next, count_variables); + } else { + if (count_variables) { + node->Insert(new DeallocateStack(count_variables)); + } + } +} + +void SyntaxTree::Compile() { + PushDeallocateStack(tree_); + LinkVariables(tree_); +} + +void SyntaxTree::Run() { + std::vector> stack; + tree_->Run(stack); +} + +void SyntaxTree::LinkVariables(Node* node) { + while (node) { + if (auto expr = dynamic_cast(node)) { + LinkVariablesInExpression(expr, node); + } + if (auto var = dynamic_cast(node)) { + if (var->default_value) { + LinkVariablesInExpression(var->default_value, node); + } + } + if (auto if_ = dynamic_cast(node)) { + LinkVariablesInExpression(if_->check, node); + LinkVariables(if_->code->children_begin); + } + if (auto while_ = dynamic_cast(node)) { + LinkVariablesInExpression(while_->check, node); + LinkVariables(while_->code->children_begin); + } + if (auto for_ = dynamic_cast(node)) { + LinkVariablesInExpression(for_->check, node); + LinkVariablesInExpression(for_->tick, node); + LinkVariables(for_->code->children_begin); + } + if (auto call_function = dynamic_cast(node)) { + for (auto i: call_function->parameters) { + LinkVariablesInExpression(i, node); + } + } + if (auto container = dynamic_cast(node)) { + LinkVariables(container->children_begin); + } + node = node->next; + } +} + +stack_pointer SyntaxTree::GetCountStackOffsetForVariable(Node* node, std::string name) { + stack_pointer result = 0; + while (node) { + if (auto var = dynamic_cast(node)) { + result += 1; + if (var->name == name) { + return result; + } + } + if (auto vars = dynamic_cast(node)) { + result += vars->count; + } + if (auto vars = dynamic_cast(node)) { + result -= vars->count; + } + if (node->previous) { + node = node->previous; + } else { + node = node->parent; + } + } + return std::numeric_limits::max(); +} + +void SyntaxTree::LinkVariablesInExpression(Expression* expression, Node* node) { + if (expression->op == Operation::Value) { + if (std::holds_alternative(expression->type1)) { + auto& var = std::get(expression->type1); + stack_pointer offset = GetCountStackOffsetForVariable(node, var); + if (offset != std::numeric_limits::max()) { + expression->type1 = offset; + } + } + } else { + LinkVariablesInExpression(std::get(expression->type1), node); + LinkVariablesInExpression(std::get(expression->type2), node); + } +} + +void CallFunction::Run(std::vector>& stack) { + for (size_t i = 0; i < parameters.size(); ++i) { + stack.push_back(std::make_shared(TypeVariable())); + // stack.back().Allocate(); + } + for (auto i: parameters) { + i->Run(stack); + stack.pop_back(); + } + if (name_function == "print") { + for (size_t i = 0; i < parameters.size(); ++i) { + std::shared_ptr& t = *(stack.end() - parameters.size() + i); + if (t->type_variable.id == TypeVariable::ID::type_int) { + std::cout << *((int*) t->memory) << " "; + } else if (t->type_variable.id == TypeVariable::ID::type_string) { + std::cout << *((std::string*) t->memory) << " "; + } + } + std::cout << std::endl; + } +} + +void Variable::Run(std::vector>& stack) { + if (type == "int") { + stack.push_back(std::make_shared(TypeVariable(TypeVariable::ID::type_int))); + } else if (type == "string") { + stack.push_back(std::make_shared(TypeVariable(TypeVariable::ID::type_string))); + } + stack.back()->Allocate(); + if (default_value) { + default_value->Run(stack); + stack.pop_back(); + } +} diff --git a/tests/MainTest.cpp b/tests/MainTest.cpp new file mode 100644 index 0000000..83bd23c --- /dev/null +++ b/tests/MainTest.cpp @@ -0,0 +1,7 @@ +#include + +int main() { + testing::InitGoogleTest(); + return RUN_ALL_TESTS(); +} +