Add parsing regular expressions

This commit is contained in:
MaxanRus 2021-09-27 02:59:08 +03:00
parent a1843f4898
commit f3848d88b3
5 changed files with 235 additions and 14 deletions

View file

@ -19,7 +19,9 @@ set(SOURCE_FILES
src/regular/RegularTree.cpp
)
set(TEST_FILES)
set(TEST_FILES
tests/regular/parse_regular.cpp
)
add_executable(Formalang src/main.cpp ${SOURCE_FILES})

View file

@ -10,20 +10,30 @@ class RegularTree {
public:
class Node {
public:
enum class Type {
Addition, Concatenation, Word
};
enum class Modifier {
Plus, Star, None
};
Node();
Node(Type);
void Parse(const std::string&);
void Parse(const std::string_view);
enum class Type {
Addition, Concatenation
};
std::variant<std::vector<std::unique_ptr<Node>>, std::string> value;
void Print() const;
std::vector<std::unique_ptr<Node>> children;
std::string word;
Type type;
Modifier modifier = Modifier::None;
private:
void ParseCurrentType(const std::string_view);
void Print(int nesting_level) const;
};
RegularTree(const std::string&);
const Node& GetNode() const;
void Print() const;
private:
Node node_;

View file

@ -3,5 +3,8 @@
int main() {
using namespace regular;
RegularTree reg_tree("(ab)*");
std::string str;
std::cin >> str;
RegularTree reg_tree(str);
reg_tree.Print();
}

View file

@ -1,22 +1,188 @@
#include "regular/RegularTree.hpp"
#include <iostream>
#include <exception>
namespace regular {
RegularTree::Node::Node() {}
RegularTree::Node::Node(Type type) : type(type) {}
void RegularTree::Node::Parse(const std::string& regular) {
Parse(std::string_view(regular.c_str(), regular.size()));
type = Type::Addition;
ParseCurrentType(std::string_view(regular.c_str(), regular.size()));
}
void RegularTree::Node::Parse(const std::string_view regular) {
const int n = regular.size();
for (int i = 0; i < n; ++i) {
std::cout << regular[i];
void RegularTree::Node::Print() const {
Print(0);
}
void RegularTree::Node::ParseCurrentType(const std::string_view regular) {
const size_t n = regular.size();
children.clear();
word.clear();
auto AddChild = [this](const std::string_view regular) {
this->children.push_back(std::make_unique<Node>(Type::Addition));
this->children.back()->ParseCurrentType(regular);
};
if (n == 1) {
type = Type::Word;
}
if (type == Type::Addition) {
int balance = 0;
int begin_child = 0;
bool wrapped_brackets = (regular[0] == '(');
for (size_t i = 0; i < n; ++i) {
if (regular[i] == '(') {
++balance;
} else if (regular[i] == ')') {
--balance;
}
if (regular[i] == '|') {
if (balance == 0) {
AddChild(regular.substr(begin_child + wrapped_brackets, i -
begin_child - 2 *
wrapped_brackets));
begin_child = i + 1;
if (i + 1 < n)
wrapped_brackets = (regular[i + 1] == '(');
}
} else {
if (i + 1 == n) {
if (children.size() == 0) {
type = Type::Concatenation;
break;
} else {
AddChild(regular.substr(begin_child + wrapped_brackets, i -
begin_child + 1 - 2 *
wrapped_brackets));
begin_child = i + 1;
if (i + 1 < n)
wrapped_brackets = (regular[i + 1] == '(');
}
} else if (balance == 0) {
wrapped_brackets = false;
}
}
}
}
if (type == Type::Concatenation) {
int balance = 0;
int begin_child = 0;
for (size_t i = 0; i < n; ++i) {
if (regular[i] == '(') {
++balance;
if (balance == 1) {
if (begin_child < i) {
AddChild(regular.substr(begin_child, i - begin_child));
begin_child = i + 1;
}
}
} else if (regular[i] == ')') {
--balance;
if (balance == 0) {
AddChild(regular.substr(begin_child + 1, i - begin_child - 1));
begin_child = i + 1;
}
} else if (i + 1 == n) {
if (balance != 0) {
throw std::logic_error("invalid regular");
}
if (children.size() == 0) {
type = Type::Word;
break;
} else {
AddChild(regular.substr(begin_child, i - begin_child + 1));
begin_child = i + 1;
}
} else if (balance == 0) {
if (regular[i] == '+') {
if (begin_child < i) {
AddChild(regular.substr(begin_child, i - begin_child));
}
children.back()->modifier = Modifier::Plus;
begin_child = i + 1;
} else if (regular[i] == '*') {
if (begin_child < i) {
AddChild(regular.substr(begin_child, i - begin_child));
}
children.back()->modifier = Modifier::Star;
begin_child = i + 1;
}
}
}
}
if (type == Type::Word) {
bool exist_modifire = regular.back() == '+' || regular.back() == '*';
if (regular.back() == '+') {
modifier = Modifier::Plus;
} else if (regular.back() == '*') {
modifier = Modifier::Star;
}
for (size_t i = 0; i < n - exist_modifire; ++i) {
if (regular[i] == '|' || regular[i] == '(' || regular[i] == ')') {
throw std::logic_error("invalid regular");
}
word += regular[i];
}
}
}
void RegularTree::Node::Print(int nesting_level) const {
auto PrintNesingLevel = [](int nesting_level) {
for (int i = 0; i < nesting_level; ++i) {
std::cout << " ";
}
};
PrintNesingLevel(nesting_level);
if (type == Type::Addition) {
std::cout << "Addition";
if (modifier == Modifier::Plus) {
std::cout << "+";
} else if (modifier == Modifier::Star) {
std::cout << "*";
}
std::cout << " " << std::to_string(children.size()) << ":" << std::endl;
} else if (type == Type::Concatenation) {
std::cout << "Concatenation";
if (modifier == Modifier::Plus) {
std::cout << "+";
} else if (modifier == Modifier::Star) {
std::cout << "*";
}
std::cout << " " << children.size() << ":" << std::endl;
} else if (type == Type::Word) {
std::cout << "Word";
if (modifier == Modifier::Plus) {
std::cout << "+";
} else if (modifier == Modifier::Star) {
std::cout << "*";
}
std::cout << ": " << word << std::endl;
}
for (const auto& i: children) {
i->Print(nesting_level + 1);
}
std::cout << std::endl;
}
RegularTree::RegularTree(const std::string& regular) {
node_.Parse(regular);
}
const RegularTree::Node& RegularTree::GetNode() const {
return node_;
}
void RegularTree::Print() const {
node_.Print();
}
}

View file

@ -0,0 +1,40 @@
#include <gtest/gtest.h>
#include "regular/RegularTree.hpp"
using namespace regular;
TEST(parse_regular, only_addition) {
RegularTree("a");
RegularTree("aa");
RegularTree("aaa");
RegularTree("aaaa");
RegularTree("aaaaa");
RegularTree("a|aaaa");
RegularTree("a|a|a|a|a");
RegularTree("hello|world");
RegularTree("qe|wr|lkj|alk");
}
TEST(parse_regular, only_folding) {
RegularTree("(kajfkasf(aksdjf)jka(((aksjdf)K)))jakd");
RegularTree("(kajsdfk(aksdjf)kajsdf)kjasdkfja(skdjf(((aksjdkadf)ksjf(kdja))))");
RegularTree("((((kdjf))))");
RegularTree("kasjf(akjsfkjasdg)kajsdg");
RegularTree("akjsdf(akjdf(kdjfak(jkasdf)))");
RegularTree("123k4j1k351kk21jkj21k6j2k36j1(((((ajkfajsdfkafdalkdjflk)))))");
}
TEST(parse_regular, only_modifiers) {
RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf*alksjdf");
RegularTree("jlakjdf*aksdjflaf8+laksfj*lakjsf*alksjdf*");
RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf+alksjdf");
RegularTree("jlakjd+aksdjflaf8*laksfj*lakjsf*alksjdf+");
}
TEST(parse_regular, all_operations) {
RegularTree("(alkjdfaksdf*|lkasdj*|(kasjdf|kajdf*|kjd)*|laksjf*)+");
RegularTree("(alkjdfaksdf|lkasdj*|(kas+jdf|kajdf*|kjd)*|laksjf*)+");
RegularTree("a|(a|(a|(a|)*))*");
RegularTree("kj*|kjadf*|(kajsdf|(kajsd|kjadf|(kasjdf)|kajs)*)*");
}