From f3848d88b3c9fc46498a805083990b0236d2c6a4 Mon Sep 17 00:00:00 2001 From: MaxanRus Date: Mon, 27 Sep 2021 02:59:08 +0300 Subject: [PATCH] Add parsing regular expressions --- CMakeLists.txt | 4 +- include/regular/RegularTree.hpp | 22 ++-- src/main.cpp | 5 +- src/regular/RegularTree.cpp | 178 ++++++++++++++++++++++++++++++-- tests/regular/parse_regular.cpp | 40 +++++++ 5 files changed, 235 insertions(+), 14 deletions(-) create mode 100644 tests/regular/parse_regular.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5be6ab5..527b984 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,9 @@ set(SOURCE_FILES src/regular/RegularTree.cpp ) -set(TEST_FILES) +set(TEST_FILES + tests/regular/parse_regular.cpp + ) add_executable(Formalang src/main.cpp ${SOURCE_FILES}) diff --git a/include/regular/RegularTree.hpp b/include/regular/RegularTree.hpp index 02dd856..7cbae74 100644 --- a/include/regular/RegularTree.hpp +++ b/include/regular/RegularTree.hpp @@ -10,20 +10,30 @@ class RegularTree { public: class Node { public: + enum class Type { + Addition, Concatenation, Word + }; + enum class Modifier { + Plus, Star, None + }; + Node(); + Node(Type); void Parse(const std::string&); - void Parse(const std::string_view); - - enum class Type { - Addition, Concatenation - }; - std::variant>, std::string> value; + void Print() const; + std::vector> children; + std::string word; Type type; + Modifier modifier = Modifier::None; private: + void ParseCurrentType(const std::string_view); + void Print(int nesting_level) const; }; RegularTree(const std::string&); + const Node& GetNode() const; + void Print() const; private: Node node_; diff --git a/src/main.cpp b/src/main.cpp index 14eb3b0..080b2cb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,5 +3,8 @@ int main() { using namespace regular; - RegularTree reg_tree("(ab)*"); + std::string str; + std::cin >> str; + RegularTree reg_tree(str); + reg_tree.Print(); } diff --git a/src/regular/RegularTree.cpp b/src/regular/RegularTree.cpp index 96128e8..24e840c 100644 --- a/src/regular/RegularTree.cpp +++ b/src/regular/RegularTree.cpp @@ -1,22 +1,188 @@ #include "regular/RegularTree.hpp" #include +#include namespace regular { RegularTree::Node::Node() {} +RegularTree::Node::Node(Type type) : type(type) {} void RegularTree::Node::Parse(const std::string& regular) { - Parse(std::string_view(regular.c_str(), regular.size())); + type = Type::Addition; + ParseCurrentType(std::string_view(regular.c_str(), regular.size())); } -void RegularTree::Node::Parse(const std::string_view regular) { - const int n = regular.size(); - for (int i = 0; i < n; ++i) { - std::cout << regular[i]; +void RegularTree::Node::Print() const { + Print(0); +} + +void RegularTree::Node::ParseCurrentType(const std::string_view regular) { + const size_t n = regular.size(); + children.clear(); + word.clear(); + + auto AddChild = [this](const std::string_view regular) { + this->children.push_back(std::make_unique(Type::Addition)); + this->children.back()->ParseCurrentType(regular); + }; + + if (n == 1) { + type = Type::Word; + } + + if (type == Type::Addition) { + int balance = 0; + int begin_child = 0; + bool wrapped_brackets = (regular[0] == '('); + + for (size_t i = 0; i < n; ++i) { + if (regular[i] == '(') { + ++balance; + } else if (regular[i] == ')') { + --balance; + } + + if (regular[i] == '|') { + if (balance == 0) { + AddChild(regular.substr(begin_child + wrapped_brackets, i - + begin_child - 2 * + wrapped_brackets)); + begin_child = i + 1; + + if (i + 1 < n) + wrapped_brackets = (regular[i + 1] == '('); + } + } else { + if (i + 1 == n) { + if (children.size() == 0) { + type = Type::Concatenation; + break; + } else { + AddChild(regular.substr(begin_child + wrapped_brackets, i - + begin_child + 1 - 2 * + wrapped_brackets)); + begin_child = i + 1; + if (i + 1 < n) + wrapped_brackets = (regular[i + 1] == '('); + } + } else if (balance == 0) { + wrapped_brackets = false; + } + } + } + } + + if (type == Type::Concatenation) { + int balance = 0; + int begin_child = 0; + + for (size_t i = 0; i < n; ++i) { + if (regular[i] == '(') { + ++balance; + if (balance == 1) { + if (begin_child < i) { + AddChild(regular.substr(begin_child, i - begin_child)); + begin_child = i + 1; + } + } + } else if (regular[i] == ')') { + --balance; + if (balance == 0) { + AddChild(regular.substr(begin_child + 1, i - begin_child - 1)); + begin_child = i + 1; + } + } else if (i + 1 == n) { + if (balance != 0) { + throw std::logic_error("invalid regular"); + } + if (children.size() == 0) { + type = Type::Word; + break; + } else { + AddChild(regular.substr(begin_child, i - begin_child + 1)); + begin_child = i + 1; + } + } else if (balance == 0) { + if (regular[i] == '+') { + if (begin_child < i) { + AddChild(regular.substr(begin_child, i - begin_child)); + } + children.back()->modifier = Modifier::Plus; + begin_child = i + 1; + } else if (regular[i] == '*') { + if (begin_child < i) { + AddChild(regular.substr(begin_child, i - begin_child)); + } + children.back()->modifier = Modifier::Star; + begin_child = i + 1; + } + } + } + } + + if (type == Type::Word) { + bool exist_modifire = regular.back() == '+' || regular.back() == '*'; + if (regular.back() == '+') { + modifier = Modifier::Plus; + } else if (regular.back() == '*') { + modifier = Modifier::Star; + } + for (size_t i = 0; i < n - exist_modifire; ++i) { + if (regular[i] == '|' || regular[i] == '(' || regular[i] == ')') { + throw std::logic_error("invalid regular"); + } + word += regular[i]; + } + } +} + +void RegularTree::Node::Print(int nesting_level) const { + auto PrintNesingLevel = [](int nesting_level) { + for (int i = 0; i < nesting_level; ++i) { + std::cout << " "; + } + }; + + PrintNesingLevel(nesting_level); + if (type == Type::Addition) { + std::cout << "Addition"; + if (modifier == Modifier::Plus) { + std::cout << "+"; + } else if (modifier == Modifier::Star) { + std::cout << "*"; + } + std::cout << " " << std::to_string(children.size()) << ":" << std::endl; + } else if (type == Type::Concatenation) { + std::cout << "Concatenation"; + if (modifier == Modifier::Plus) { + std::cout << "+"; + } else if (modifier == Modifier::Star) { + std::cout << "*"; + } + std::cout << " " << children.size() << ":" << std::endl; + } else if (type == Type::Word) { + std::cout << "Word"; + if (modifier == Modifier::Plus) { + std::cout << "+"; + } else if (modifier == Modifier::Star) { + std::cout << "*"; + } + std::cout << ": " << word << std::endl; + } + + for (const auto& i: children) { + i->Print(nesting_level + 1); } - std::cout << std::endl; } RegularTree::RegularTree(const std::string& regular) { node_.Parse(regular); } + +const RegularTree::Node& RegularTree::GetNode() const { + return node_; +} + +void RegularTree::Print() const { + node_.Print(); +} } diff --git a/tests/regular/parse_regular.cpp b/tests/regular/parse_regular.cpp new file mode 100644 index 0000000..25bf34a --- /dev/null +++ b/tests/regular/parse_regular.cpp @@ -0,0 +1,40 @@ +#include +#include "regular/RegularTree.hpp" + +using namespace regular; + +TEST(parse_regular, only_addition) { + RegularTree("a"); + RegularTree("aa"); + RegularTree("aaa"); + RegularTree("aaaa"); + RegularTree("aaaaa"); + RegularTree("a|aaaa"); + RegularTree("a|a|a|a|a"); + RegularTree("hello|world"); + RegularTree("qe|wr|lkj|alk"); +} + +TEST(parse_regular, only_folding) { + RegularTree("(kajfkasf(aksdjf)jka(((aksjdf)K)))jakd"); + RegularTree("(kajsdfk(aksdjf)kajsdf)kjasdkfja(skdjf(((aksjdkadf)ksjf(kdja))))"); + RegularTree("((((kdjf))))"); + RegularTree("kasjf(akjsfkjasdg)kajsdg"); + RegularTree("akjsdf(akjdf(kdjfak(jkasdf)))"); + RegularTree("123k4j1k351kk21jkj21k6j2k36j1(((((ajkfajsdfkafdalkdjflk)))))"); +} + +TEST(parse_regular, only_modifiers) { + RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf*alksjdf"); + RegularTree("jlakjdf*aksdjflaf8+laksfj*lakjsf*alksjdf*"); + RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf+alksjdf"); + RegularTree("jlakjd+aksdjflaf8*laksfj*lakjsf*alksjdf+"); +} + +TEST(parse_regular, all_operations) { + RegularTree("(alkjdfaksdf*|lkasdj*|(kasjdf|kajdf*|kjd)*|laksjf*)+"); + RegularTree("(alkjdfaksdf|lkasdj*|(kas+jdf|kajdf*|kjd)*|laksjf*)+"); + RegularTree("a|(a|(a|(a|)*))*"); + RegularTree("kj*|kjadf*|(kajsdf|(kajsd|kjadf|(kasjdf)|kajs)*)*"); +} +