Add parsing regular expressions
This commit is contained in:
parent
a1843f4898
commit
f3848d88b3
|
@ -19,7 +19,9 @@ set(SOURCE_FILES
|
|||
src/regular/RegularTree.cpp
|
||||
)
|
||||
|
||||
set(TEST_FILES)
|
||||
set(TEST_FILES
|
||||
tests/regular/parse_regular.cpp
|
||||
)
|
||||
|
||||
add_executable(Formalang src/main.cpp ${SOURCE_FILES})
|
||||
|
||||
|
|
|
@ -10,20 +10,30 @@ class RegularTree {
|
|||
public:
|
||||
class Node {
|
||||
public:
|
||||
enum class Type {
|
||||
Addition, Concatenation, Word
|
||||
};
|
||||
enum class Modifier {
|
||||
Plus, Star, None
|
||||
};
|
||||
|
||||
Node();
|
||||
Node(Type);
|
||||
|
||||
void Parse(const std::string&);
|
||||
void Parse(const std::string_view);
|
||||
|
||||
enum class Type {
|
||||
Addition, Concatenation
|
||||
};
|
||||
std::variant<std::vector<std::unique_ptr<Node>>, std::string> value;
|
||||
void Print() const;
|
||||
std::vector<std::unique_ptr<Node>> children;
|
||||
std::string word;
|
||||
Type type;
|
||||
Modifier modifier = Modifier::None;
|
||||
private:
|
||||
void ParseCurrentType(const std::string_view);
|
||||
void Print(int nesting_level) const;
|
||||
};
|
||||
|
||||
RegularTree(const std::string&);
|
||||
const Node& GetNode() const;
|
||||
void Print() const;
|
||||
|
||||
private:
|
||||
Node node_;
|
||||
|
|
|
@ -3,5 +3,8 @@
|
|||
|
||||
int main() {
|
||||
using namespace regular;
|
||||
RegularTree reg_tree("(ab)*");
|
||||
std::string str;
|
||||
std::cin >> str;
|
||||
RegularTree reg_tree(str);
|
||||
reg_tree.Print();
|
||||
}
|
||||
|
|
|
@ -1,22 +1,188 @@
|
|||
#include "regular/RegularTree.hpp"
|
||||
#include <iostream>
|
||||
#include <exception>
|
||||
|
||||
namespace regular {
|
||||
RegularTree::Node::Node() {}
|
||||
RegularTree::Node::Node(Type type) : type(type) {}
|
||||
|
||||
void RegularTree::Node::Parse(const std::string& regular) {
|
||||
Parse(std::string_view(regular.c_str(), regular.size()));
|
||||
type = Type::Addition;
|
||||
ParseCurrentType(std::string_view(regular.c_str(), regular.size()));
|
||||
}
|
||||
|
||||
void RegularTree::Node::Parse(const std::string_view regular) {
|
||||
const int n = regular.size();
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::cout << regular[i];
|
||||
void RegularTree::Node::Print() const {
|
||||
Print(0);
|
||||
}
|
||||
|
||||
void RegularTree::Node::ParseCurrentType(const std::string_view regular) {
|
||||
const size_t n = regular.size();
|
||||
children.clear();
|
||||
word.clear();
|
||||
|
||||
auto AddChild = [this](const std::string_view regular) {
|
||||
this->children.push_back(std::make_unique<Node>(Type::Addition));
|
||||
this->children.back()->ParseCurrentType(regular);
|
||||
};
|
||||
|
||||
if (n == 1) {
|
||||
type = Type::Word;
|
||||
}
|
||||
|
||||
if (type == Type::Addition) {
|
||||
int balance = 0;
|
||||
int begin_child = 0;
|
||||
bool wrapped_brackets = (regular[0] == '(');
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (regular[i] == '(') {
|
||||
++balance;
|
||||
} else if (regular[i] == ')') {
|
||||
--balance;
|
||||
}
|
||||
|
||||
if (regular[i] == '|') {
|
||||
if (balance == 0) {
|
||||
AddChild(regular.substr(begin_child + wrapped_brackets, i -
|
||||
begin_child - 2 *
|
||||
wrapped_brackets));
|
||||
begin_child = i + 1;
|
||||
|
||||
if (i + 1 < n)
|
||||
wrapped_brackets = (regular[i + 1] == '(');
|
||||
}
|
||||
} else {
|
||||
if (i + 1 == n) {
|
||||
if (children.size() == 0) {
|
||||
type = Type::Concatenation;
|
||||
break;
|
||||
} else {
|
||||
AddChild(regular.substr(begin_child + wrapped_brackets, i -
|
||||
begin_child + 1 - 2 *
|
||||
wrapped_brackets));
|
||||
begin_child = i + 1;
|
||||
if (i + 1 < n)
|
||||
wrapped_brackets = (regular[i + 1] == '(');
|
||||
}
|
||||
} else if (balance == 0) {
|
||||
wrapped_brackets = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (type == Type::Concatenation) {
|
||||
int balance = 0;
|
||||
int begin_child = 0;
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (regular[i] == '(') {
|
||||
++balance;
|
||||
if (balance == 1) {
|
||||
if (begin_child < i) {
|
||||
AddChild(regular.substr(begin_child, i - begin_child));
|
||||
begin_child = i + 1;
|
||||
}
|
||||
}
|
||||
} else if (regular[i] == ')') {
|
||||
--balance;
|
||||
if (balance == 0) {
|
||||
AddChild(regular.substr(begin_child + 1, i - begin_child - 1));
|
||||
begin_child = i + 1;
|
||||
}
|
||||
} else if (i + 1 == n) {
|
||||
if (balance != 0) {
|
||||
throw std::logic_error("invalid regular");
|
||||
}
|
||||
if (children.size() == 0) {
|
||||
type = Type::Word;
|
||||
break;
|
||||
} else {
|
||||
AddChild(regular.substr(begin_child, i - begin_child + 1));
|
||||
begin_child = i + 1;
|
||||
}
|
||||
} else if (balance == 0) {
|
||||
if (regular[i] == '+') {
|
||||
if (begin_child < i) {
|
||||
AddChild(regular.substr(begin_child, i - begin_child));
|
||||
}
|
||||
children.back()->modifier = Modifier::Plus;
|
||||
begin_child = i + 1;
|
||||
} else if (regular[i] == '*') {
|
||||
if (begin_child < i) {
|
||||
AddChild(regular.substr(begin_child, i - begin_child));
|
||||
}
|
||||
children.back()->modifier = Modifier::Star;
|
||||
begin_child = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (type == Type::Word) {
|
||||
bool exist_modifire = regular.back() == '+' || regular.back() == '*';
|
||||
if (regular.back() == '+') {
|
||||
modifier = Modifier::Plus;
|
||||
} else if (regular.back() == '*') {
|
||||
modifier = Modifier::Star;
|
||||
}
|
||||
for (size_t i = 0; i < n - exist_modifire; ++i) {
|
||||
if (regular[i] == '|' || regular[i] == '(' || regular[i] == ')') {
|
||||
throw std::logic_error("invalid regular");
|
||||
}
|
||||
word += regular[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegularTree::Node::Print(int nesting_level) const {
|
||||
auto PrintNesingLevel = [](int nesting_level) {
|
||||
for (int i = 0; i < nesting_level; ++i) {
|
||||
std::cout << " ";
|
||||
}
|
||||
};
|
||||
|
||||
PrintNesingLevel(nesting_level);
|
||||
if (type == Type::Addition) {
|
||||
std::cout << "Addition";
|
||||
if (modifier == Modifier::Plus) {
|
||||
std::cout << "+";
|
||||
} else if (modifier == Modifier::Star) {
|
||||
std::cout << "*";
|
||||
}
|
||||
std::cout << " " << std::to_string(children.size()) << ":" << std::endl;
|
||||
} else if (type == Type::Concatenation) {
|
||||
std::cout << "Concatenation";
|
||||
if (modifier == Modifier::Plus) {
|
||||
std::cout << "+";
|
||||
} else if (modifier == Modifier::Star) {
|
||||
std::cout << "*";
|
||||
}
|
||||
std::cout << " " << children.size() << ":" << std::endl;
|
||||
} else if (type == Type::Word) {
|
||||
std::cout << "Word";
|
||||
if (modifier == Modifier::Plus) {
|
||||
std::cout << "+";
|
||||
} else if (modifier == Modifier::Star) {
|
||||
std::cout << "*";
|
||||
}
|
||||
std::cout << ": " << word << std::endl;
|
||||
}
|
||||
|
||||
for (const auto& i: children) {
|
||||
i->Print(nesting_level + 1);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
RegularTree::RegularTree(const std::string& regular) {
|
||||
node_.Parse(regular);
|
||||
}
|
||||
|
||||
const RegularTree::Node& RegularTree::GetNode() const {
|
||||
return node_;
|
||||
}
|
||||
|
||||
void RegularTree::Print() const {
|
||||
node_.Print();
|
||||
}
|
||||
}
|
||||
|
|
40
tests/regular/parse_regular.cpp
Normal file
40
tests/regular/parse_regular.cpp
Normal file
|
@ -0,0 +1,40 @@
|
|||
#include <gtest/gtest.h>
|
||||
#include "regular/RegularTree.hpp"
|
||||
|
||||
using namespace regular;
|
||||
|
||||
TEST(parse_regular, only_addition) {
|
||||
RegularTree("a");
|
||||
RegularTree("aa");
|
||||
RegularTree("aaa");
|
||||
RegularTree("aaaa");
|
||||
RegularTree("aaaaa");
|
||||
RegularTree("a|aaaa");
|
||||
RegularTree("a|a|a|a|a");
|
||||
RegularTree("hello|world");
|
||||
RegularTree("qe|wr|lkj|alk");
|
||||
}
|
||||
|
||||
TEST(parse_regular, only_folding) {
|
||||
RegularTree("(kajfkasf(aksdjf)jka(((aksjdf)K)))jakd");
|
||||
RegularTree("(kajsdfk(aksdjf)kajsdf)kjasdkfja(skdjf(((aksjdkadf)ksjf(kdja))))");
|
||||
RegularTree("((((kdjf))))");
|
||||
RegularTree("kasjf(akjsfkjasdg)kajsdg");
|
||||
RegularTree("akjsdf(akjdf(kdjfak(jkasdf)))");
|
||||
RegularTree("123k4j1k351kk21jkj21k6j2k36j1(((((ajkfajsdfkafdalkdjflk)))))");
|
||||
}
|
||||
|
||||
TEST(parse_regular, only_modifiers) {
|
||||
RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf*alksjdf");
|
||||
RegularTree("jlakjdf*aksdjflaf8+laksfj*lakjsf*alksjdf*");
|
||||
RegularTree("jlakjdf*aksdjflaf8*laksfj*lakjsf+alksjdf");
|
||||
RegularTree("jlakjd+aksdjflaf8*laksfj*lakjsf*alksjdf+");
|
||||
}
|
||||
|
||||
TEST(parse_regular, all_operations) {
|
||||
RegularTree("(alkjdfaksdf*|lkasdj*|(kasjdf|kajdf*|kjd)*|laksjf*)+");
|
||||
RegularTree("(alkjdfaksdf|lkasdj*|(kas+jdf|kajdf*|kjd)*|laksjf*)+");
|
||||
RegularTree("a|(a|(a|(a|)*))*");
|
||||
RegularTree("kj*|kjadf*|(kajsdf|(kajsd|kjadf|(kasjdf)|kajs)*)*");
|
||||
}
|
||||
|
Loading…
Reference in a new issue