// libsemigroups - C++ library for semigroups and monoids
// Copyright (C) 2021-2023 James D. Mitchell + Maria Tsalakou
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/ >.
//
#include <algorithm> // for count_if, all_of
#include <cstddef> // for size_t
#include <tuple> // for tie
#include <utility> // for pair
#include <vector> // for vector
#include "catch.hpp" // for REQUIRE, REQUIRE_THROWS_AS
#include "test-main.hpp" // for LIBSEMIGROUPS_TEST_CASE
#include "libsemigroups/constants.hpp" // for operator==, POSITIVE_INFINITY
#include "libsemigroups/exception.hpp" // for LIBSEMIGROUPS_EXCEPTION
#include "libsemigroups/int-range.hpp" // for IntegralRange
#include "libsemigroups/types.hpp" // for word_type
#include "libsemigroups/ukkonen.hpp" // for Ukkonen, Ukkonen::State
#include "libsemigroups/wislo.hpp" // for const_wislo_iterator, cbegi...
#include "libsemigroups/word.hpp" // for literals
namespace libsemigroups {
using literals::operator "" _w;
namespace {
// TODO Should really be just a use of presentation::longest_common_subword
auto best_subword(Ukkonen& u) {
ukkonen::detail::GreedyReduceHelper helper(u);
return ukkonen::dfs(u, helper);
}
} // namespace
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "000" , "basic tests" , "[quick][ukkonen]" ) {
Ukkonen t;
// aaeaaa$
// abcd$'
// a -> 0
// b -> 1
// c -> 2
// d -> 3
// e -> 4
t.add_word(004000_w);
REQUIRE(t.nodes().size() == 10);
REQUIRE(ukkonen::is_subword(t, 004000_w));
REQUIRE(ukkonen::is_subword(t, 04_w));
REQUIRE(!ukkonen::is_subword(t, 44_w));
REQUIRE(ukkonen::is_subword(t, "" _w));
REQUIRE(ukkonen::is_subword(t, 0_w));
REQUIRE(ukkonen::is_subword(t, 00_w));
REQUIRE(ukkonen::is_subword(t, 000_w));
REQUIRE(ukkonen::is_subword(t, 000_w));
REQUIRE(!ukkonen::is_subword(t, 0000_w));
REQUIRE(!ukkonen::is_subword(t, 1_w));
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 16);
REQUIRE(cbegin_wislo(5, {}, 0000000_w)->empty());
REQUIRE(std::count_if(
cbegin_wislo(6, {}, 00000000_w),
cend_wislo(6, {}, 00000000_w),
[&t](word_type const & w) { return ukkonen::is_subword(t, w); })
== 16);
REQUIRE(ukkonen::is_subword(t, "" _w)); // 1
REQUIRE(ukkonen::is_subword(t, 004000_w));
REQUIRE(ukkonen::is_subword(t, 00400_w));
REQUIRE(ukkonen::is_subword(t, 0040_w));
REQUIRE(ukkonen::is_subword(t, 004_w)); // 5
REQUIRE(ukkonen::is_subword(t, 00_w));
REQUIRE(ukkonen::is_subword(t, 0_w));
REQUIRE(ukkonen::is_subword(t, 04000_w));
REQUIRE(ukkonen::is_subword(t, 0400_w));
REQUIRE(ukkonen::is_subword(t, 040_w)); // 10
REQUIRE(ukkonen::is_subword(t, 04_w));
REQUIRE(ukkonen::is_subword(t, 4000_w));
REQUIRE(ukkonen::is_subword(t, 400_w));
REQUIRE(ukkonen::is_subword(t, 40_w));
REQUIRE(ukkonen::is_subword(t, 4_w)); // 15
REQUIRE(ukkonen::is_subword(t, 000_w)); // 16
t.add_word(0123_w);
REQUIRE(t.nodes().size() == 15);
REQUIRE(ukkonen::is_subword(t, "" _w)); // 1
REQUIRE(ukkonen::is_subword(t, 004000_w));
REQUIRE(ukkonen::is_subword(t, 00400_w));
REQUIRE(ukkonen::is_subword(t, 0040_w));
REQUIRE(ukkonen::is_subword(t, 004_w)); // 5
REQUIRE(ukkonen::is_subword(t, 00_w));
REQUIRE(ukkonen::is_subword(t, 0_w));
REQUIRE(ukkonen::is_subword(t, 04000_w));
REQUIRE(ukkonen::is_subword(t, 0400_w));
REQUIRE(ukkonen::is_subword(t, 040_w)); // 10
REQUIRE(ukkonen::is_subword(t, 04_w));
REQUIRE(ukkonen::is_subword(t, 4000_w));
REQUIRE(ukkonen::is_subword(t, 400_w));
REQUIRE(ukkonen::is_subword(t, 40_w));
REQUIRE(ukkonen::is_subword(t, 4_w)); // 15
REQUIRE(ukkonen::is_subword(t, 000_w)); // 16
REQUIRE(ukkonen::is_subword(t, 01_w));
REQUIRE(ukkonen::is_subword(t, 012_w));
REQUIRE(ukkonen::is_subword(t, 0123_w));
REQUIRE(ukkonen::is_subword(t, 1_w));
REQUIRE(ukkonen::is_subword(t, 12_w));
REQUIRE(ukkonen::is_subword(t, 123_w));
REQUIRE(ukkonen::is_subword(t, 2_w));
REQUIRE(ukkonen::is_subword(t, 23_w));
REQUIRE(ukkonen::is_subword(t, 3_w));
REQUIRE(!ukkonen::is_subword(t, 33_w));
REQUIRE(!ukkonen::is_subword(t, "ab" ));
REQUIRE(!ukkonen::is_subword(t, std::string("ab" )));
REQUIRE(!ukkonen::is_subword_no_checks(t, 33_w));
REQUIRE(!ukkonen::is_subword_no_checks(t, "ab" ));
REQUIRE(!ukkonen::is_subword_no_checks(t, std::string("ab" )));
REQUIRE_THROWS_AS(ukkonen::is_subword(t, word_type({UNDEFINED})),
LibsemigroupsException);
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 25);
REQUIRE(!ukkonen::is_suffix(t, 1235_w));
REQUIRE(ukkonen::is_suffix(t, 123_w));
REQUIRE(ukkonen::is_suffix(t, "" _w));
REQUIRE(ukkonen::is_suffix(t, 004000_w));
REQUIRE(ukkonen::is_suffix(t, 04000_w));
REQUIRE(ukkonen::is_suffix(t, 4000_w));
REQUIRE(ukkonen::is_suffix(t, 000_w));
REQUIRE(ukkonen::is_suffix(t, 00_w));
REQUIRE(ukkonen::is_suffix(t, 0_w));
REQUIRE(ukkonen::is_suffix(t, 0123_w));
REQUIRE(ukkonen::is_suffix(t, 123_w));
REQUIRE(ukkonen::is_suffix(t, 23_w));
REQUIRE(ukkonen::is_suffix(t, 3_w));
REQUIRE(!ukkonen::is_suffix(t, 33_w));
REQUIRE(!ukkonen::is_suffix(t, "ab" ));
REQUIRE_THROWS_AS(ukkonen::is_suffix(t, word_type({UNDEFINED})),
LibsemigroupsException);
REQUIRE(!ukkonen::is_suffix(t, std::string("ab" )));
REQUIRE(!ukkonen::is_suffix_no_checks(t, 33_w));
REQUIRE(!ukkonen::is_suffix_no_checks(t, "ab" ));
REQUIRE(!ukkonen::is_suffix_no_checks(t, std::string("ab" )));
REQUIRE(std::count_if(
cbegin_wislo(5, {}, 0000000_w),
cend_wislo(5, {}, 0000000_w),
[&t](word_type const & w) { return ukkonen::is_suffix(t, w); })
== 11);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 004000_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 0123_w) == 1);
REQUIRE_THROWS_AS(
ukkonen::length_maximal_piece_prefix(t, word_type({UNDEFINED})),
LibsemigroupsException);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, "ab" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, std::string("ab" )) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 00043456_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 0123456_w) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix_no_checks(t, "ab" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix_no_checks(t, std::string("ab" ))
== 0);
REQUIRE(ukkonen::length_maximal_piece_prefix_no_checks(t, 00043456_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix_no_checks(t, 0123456_w) == 1);
REQUIRE(*ukkonen::maximal_piece_prefix(t, "ab" ) == 'a' );
REQUIRE(*ukkonen::maximal_piece_prefix(t, std::string("ab" )) == 'a' );
REQUIRE(*ukkonen::maximal_piece_prefix(t, 00043456_w) == 0);
REQUIRE(*ukkonen::maximal_piece_prefix(t, 0123456_w) == 1);
REQUIRE(*ukkonen::maximal_piece_prefix_no_checks(t, "ab" ) == 'a' );
REQUIRE(*ukkonen::maximal_piece_prefix_no_checks(t, std::string("ab" ))
== 'a' );
REQUIRE(*ukkonen::maximal_piece_prefix_no_checks(t, 00043456_w) == 0);
REQUIRE(*ukkonen::maximal_piece_prefix_no_checks(t, 0123456_w) == 1);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"001" ,
"maximal_piece_prefix 1" ,
"[quick][ukkonen]" ) {
Ukkonen t;
t.add_word({0, 5, 7});
t.add_word({1, 6, 7});
t.add_word({7, 2});
t.add_word({3, 4});
t.add_word({4, 8});
t.add_word({9});
t.add_word({5, 7, 10});
t.add_word({6, 7, 11});
REQUIRE(t.nodes().size() == 32);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {0, 5, 7}) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {1, 6, 7}) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {7, 2}) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {3, 4}) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {4, 8}) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {9}) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {5, 7, 10}) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, {6, 7, 11}) == 2);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"002" ,
"maximal_piece_prefix 2" ,
"[quick][ukkonen]" ) {
Ukkonen t;
t.add_word(004000_w);
t.add_word(45_w);
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 18);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 004000_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 45_w) == 1);
t.add_word(0123_w);
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 27);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 004000_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 45_w) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 0123_w) == 1);
t.add_word(004_w);
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 27);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 004000_w) == 3);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, "00456789" _w) == 3);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 45_w) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 0123_w) == 1);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 004_w) == 3);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"003" ,
"maximal_piece_prefix 3" ,
"[quick][ukkonen]" ) {
Ukkonen t;
REQUIRE(t.number_of_distinct_words() == 0);
t.add_word(012_w);
REQUIRE(t.number_of_distinct_words() == 1);
t.add_word(124_w);
REQUIRE(t.number_of_distinct_words() == 2);
REQUIRE(t.nodes().size() == 11);
REQUIRE(ukkonen::number_of_distinct_subwords(t) == 10);
REQUIRE(ukkonen::is_subword(t, "" _w));
REQUIRE(ukkonen::is_subword(t, 0_w));
REQUIRE(ukkonen::is_subword(t, 1_w));
REQUIRE(ukkonen::is_subword(t, 2_w));
REQUIRE(ukkonen::is_subword(t, 4_w));
REQUIRE(ukkonen::is_subword(t, 01_w));
REQUIRE(ukkonen::is_subword(t, 12_w));
REQUIRE(ukkonen::is_subword(t, 24_w));
REQUIRE(ukkonen::is_subword(t, 012_w));
REQUIRE(ukkonen::is_subword(t, 124_w));
REQUIRE_THROWS_AS(
ukkonen::is_subword(
t, std::vector<size_t>({static_cast <size_t>(-1), 124})),
LibsemigroupsException);
REQUIRE(!ukkonen::is_subword(t, 123_w));
REQUIRE(!ukkonen::is_subword(t, 1234_w));
REQUIRE(!ukkonen::is_subword(t, 3_w));
REQUIRE(!ukkonen::is_subword(t, 13_w));
REQUIRE(std::count_if(
cbegin_wislo(5, {}, 00000_w),
cend_wislo(5, {}, 00000_w),
[&t](word_type const & w) { return ukkonen::is_subword(t, w); })
== 10);
REQUIRE(ukkonen::is_suffix(t, "" _w));
REQUIRE(!ukkonen::is_suffix(t, 0_w));
REQUIRE(!ukkonen::is_suffix(t, 1_w));
REQUIRE(ukkonen::is_suffix(t, 2_w));
REQUIRE(ukkonen::is_suffix(t, 4_w));
REQUIRE(!ukkonen::is_suffix(t, 01_w));
REQUIRE(ukkonen::is_suffix(t, 12_w));
REQUIRE(ukkonen::is_suffix(t, 24_w));
REQUIRE(ukkonen::is_suffix(t, 012_w));
REQUIRE(ukkonen::is_suffix(t, 124_w));
REQUIRE_THROWS_AS(ukkonen::is_suffix(t, {static_cast <size_t>(-1), 124}),
LibsemigroupsException);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 012_w) == 0);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, 012_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_suffix_no_checks(t, 012_w) == 2);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, 124_w) == 0);
REQUIRE(ukkonen::length_maximal_piece_suffix_no_checks(t, 124_w) == 0);
REQUIRE(*ukkonen::maximal_piece_suffix(t, 012_w) == 1);
auto w = 124_w;
REQUIRE(ukkonen::maximal_piece_suffix(t, w) == w.cend());
REQUIRE(*ukkonen::maximal_piece_suffix_no_checks(t, 012_w) == 1);
REQUIRE(ukkonen::maximal_piece_suffix_no_checks(t, w) == w.cend());
REQUIRE(ukkonen::number_of_pieces(t, 012_w) == POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces_no_checks(t, 012_w) == POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces_no_checks(t, "abc" ) == POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces_no_checks(t, std::string("abc" ))
== POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces(t, 012_w) == POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces(t, "abc" ) == POSITIVE_INFINITY);
REQUIRE(ukkonen::number_of_pieces(t, std::string("abc" ))
== POSITIVE_INFINITY);
REQUIRE(ukkonen::pieces(t, 012_w) == std::vector<word_type>());
REQUIRE(ukkonen::length_maximal_piece_prefix(t, 124_w) == 2);
REQUIRE(ukkonen::number_of_pieces(t, 124_w) == POSITIVE_INFINITY);
REQUIRE(ukkonen::pieces(t, 124_w) == std::vector<word_type>());
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"004" ,
"number_of_pieces" ,
"[quick][ukkonen]" ) {
Ukkonen t;
t.add_word(012_w);
t.add_word(0_w);
t.add_word(1_w);
t.add_word(2_w);
REQUIRE(ukkonen::number_of_pieces(t, 012_w) == 3);
REQUIRE(ukkonen::pieces(t, 012_w)
== std::vector<word_type>({0_w, 1_w, 2_w}));
REQUIRE(ukkonen::number_of_pieces(t, 0_w) == 1);
REQUIRE(ukkonen::pieces(t, 0_w) == std::vector<word_type>({0_w}));
REQUIRE(ukkonen::number_of_pieces(t, 1_w) == 1);
REQUIRE(ukkonen::pieces(t, 1_w) == std::vector<word_type>({1_w}));
REQUIRE(ukkonen::number_of_pieces(t, 2_w) == 1);
REQUIRE(ukkonen::pieces(t, 2_w) == std::vector<word_type>({2_w}));
t.add_word("01284567" _w);
t.add_word(012_w); // does nothing
t.add_word(845_w);
t.add_word(56_w);
t.add_word(567_w);
REQUIRE(t.number_of_distinct_words() == 8);
REQUIRE(t.number_of_words() == 9);
REQUIRE(ukkonen::number_of_pieces(t, 012_w) == 1);
REQUIRE(ukkonen::pieces(t, 012_w) == std::vector<word_type>({012_w}));
REQUIRE(ukkonen::number_of_pieces(t, 0_w) == 1);
REQUIRE(ukkonen::number_of_pieces(t, 1_w) == 1);
REQUIRE(ukkonen::number_of_pieces(t, 2_w) == 1);
REQUIRE(ukkonen::number_of_pieces(t, "01284567" _w) == 3);
REQUIRE(ukkonen::pieces(t, "01284567" _w)
== std::vector<word_type>({012_w, 845_w, 67_w}));
REQUIRE(ukkonen::is_piece(t, 012_w));
REQUIRE(ukkonen::is_piece(t, 845_w));
REQUIRE(ukkonen::is_piece(t, 67_w));
REQUIRE(ukkonen::is_piece_no_checks(t, 012_w));
REQUIRE(ukkonen::is_piece_no_checks(t, 845_w));
REQUIRE(ukkonen::is_piece_no_checks(t, 67_w));
REQUIRE(ukkonen::number_of_pieces(t, 845_w) == 1);
REQUIRE(ukkonen::pieces(t, 845_w) == std::vector<word_type>({845_w}));
REQUIRE(ukkonen::number_of_pieces(t, 56_w) == 1);
REQUIRE(ukkonen::pieces(t, 56_w) == std::vector<word_type>({56_w}));
REQUIRE(ukkonen::number_of_pieces(t, 567_w) == 1);
REQUIRE(ukkonen::pieces(t, 567_w) == std::vector<word_type>({567_w}));
REQUIRE(ukkonen::number_of_pieces(t, 12845_w) == 2);
REQUIRE(ukkonen::pieces(t, 12845_w)
== std::vector<word_type>({12_w, 845_w}));
auto w = "0128456701284567" _w;
REQUIRE(ukkonen::pieces(t, w.cbegin(), w.cend()).size() == 7);
REQUIRE(ukkonen::pieces(t, w)
== std::vector<word_type>(
{{0, 1, 2}, {8, 4, 5}, {6, 7}, {0, 1, 2}, {8, 4, 5}, {6, 7}}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "005" , "traverse" , "[quick][ukkonen]" ) {
using State = Ukkonen::State;
Ukkonen t;
t.add_word(004000_w);
auto s = ukkonen::traverse(t, {});
REQUIRE(s.first.v == 0);
REQUIRE(s.first.pos == 0);
s = ukkonen::traverse(t, 4_w);
REQUIRE(s.first.v == 4);
REQUIRE(s.first.pos == 1);
s = ukkonen::traverse(t, 40_w);
REQUIRE(s.first.v == 4);
REQUIRE(s.first.pos == 2);
s = ukkonen::traverse(t, 4000_w);
REQUIRE(s.first.v == 4);
REQUIRE(s.first.pos == 4);
s = ukkonen::traverse(t, 0_w);
REQUIRE(s.first.v == 2);
REQUIRE(s.first.pos == 1);
s = ukkonen::traverse(t, 04_w);
REQUIRE(s.first.v == 3);
REQUIRE(s.first.pos == 1);
s = ukkonen::traverse(t, 04000_w);
REQUIRE(s.first.v == 3);
REQUIRE(s.first.pos == 4);
REQUIRE(s.first == State(3, 4));
s = ukkonen::traverse(t, 002_w);
REQUIRE(t.distance_from_root(t.nodes()[s.first.v]));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "006" , "dot" , "[quick][ukkonen]" ) {
{
Ukkonen t;
t.add_word(00_w);
t.add_word(00_w);
t.add_word(010_w);
t.add_word(00_w);
t.add_word(0101_w);
t.add_word(010_w);
REQUIRE_NOTHROW(ukkonen::dot(t));
}
{
Ukkonen u;
// No words
REQUIRE_THROWS_AS(ukkonen::dot(u), LibsemigroupsException);
ukkonen::add_words(
u, cbegin_wislo(2, "" _w, "00000" _w), cend_wislo(2, "" _w, "00000" _w));
REQUIRE(u.number_of_distinct_words() == 30);
// Too many words
REQUIRE_THROWS_AS(ukkonen::dot(u), LibsemigroupsException);
}
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "007" , "strings" , "[quick][ukkonen]" ) {
Ukkonen t;
t.add_word("aaaeaa" );
t.add_word("abcd" );
REQUIRE(t.number_of_distinct_words() == 2);
t.add_word("" );
REQUIRE(t.number_of_distinct_words() == 2);
REQUIRE(t.nodes().size() == 15);
REQUIRE(ukkonen::number_of_pieces(t, "aaaeaa" ) == POSITIVE_INFINITY);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, "aaaeaa" ) == 2);
auto w = "aaaeaa" ;
REQUIRE(std::string(ukkonen::maximal_piece_suffix(t, w), w + 6) == "aa" );
REQUIRE(std::string(ukkonen::maximal_piece_suffix_no_checks(t, w), w + 6)
== "aa" );
REQUIRE(ukkonen::length_maximal_piece_suffix(t, w) == 2);
REQUIRE(ukkonen::length_maximal_piece_suffix_no_checks(t, w) == 2);
REQUIRE(std::string(w, ukkonen::maximal_piece_prefix_no_checks(t, w))
== "aa" );
std::string ww = "aaaeaa" ;
REQUIRE(std::string(ukkonen::maximal_piece_suffix(t, ww), ww.cend())
== "aa" );
REQUIRE(
std::string(ukkonen::maximal_piece_suffix_no_checks(t, ww), ww.cend())
== "aa" );
REQUIRE(ukkonen::pieces(t, ww) == std::vector<std::string>());
REQUIRE(ukkonen::pieces(t, "aaaaaa" )
== std::vector<std::string>({"aa" , "aa" , "aa" }));
REQUIRE_THROWS_AS(ukkonen::pieces(t, word_type({UNDEFINED})),
LibsemigroupsException);
REQUIRE(
std::string(ww.cbegin(), ukkonen::maximal_piece_prefix_no_checks(t, ww))
== "aa" );
REQUIRE(ukkonen::length_maximal_piece_suffix(t, ww) == 2);
REQUIRE(ukkonen::length_maximal_piece_suffix_no_checks(t, ww) == 2);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, "abcd" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, "aaaeaa" ) == 2);
REQUIRE(!ukkonen::is_suffix(t, "aaaeaaaaaaaaaaaaaaaa" ));
REQUIRE(ukkonen::is_suffix(t, "" ));
REQUIRE(ukkonen::length_maximal_piece_prefix(t, "" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, "" ) == 0);
REQUIRE(ukkonen::number_of_pieces(t, "" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_prefix(t, "xxx" ) == 0);
REQUIRE(ukkonen::length_maximal_piece_suffix(t, "xxx" ) == 0);
REQUIRE(ukkonen::number_of_pieces(t, "xxx" ) == POSITIVE_INFINITY);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "008" , "dfs #01" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word(1212_w);
t.add_word(0_w);
t.add_word(1213121312131213_w);
t.add_word(0_w);
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type(1213_w));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "009" , "dfs #02" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aaaaa" );
t.add_word("bbb" );
t.add_word("ababa" );
t.add_word("aaabaabaaabaa" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type({97, 98, 97}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "010" , "dfs #03" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aaaaa" );
t.add_word("bbb" );
t.add_word("cba" );
t.add_word("aaccaca" );
t.add_word("aba" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type());
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "011" , "dfs #04" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word(00_w);
t.add_word(10_w);
t.add_word(01_w);
t.add_word(20_w);
t.add_word(02_w);
t.add_word(30_w);
t.add_word(03_w);
t.add_word(11_w);
t.add_word(23_w);
t.add_word(222_w);
t.add_word(12121212121212_w);
t.add_word(12131213121312131213121312131213_w);
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == 12131213_w);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "012" , "dfs #05" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word(00_w);
t.add_word(10_w);
t.add_word(01_w);
t.add_word(20_w);
t.add_word(02_w);
t.add_word(30_w);
t.add_word(03_w);
t.add_word(11_w);
t.add_word(23_w);
t.add_word(222_w);
t.add_word(12121212121212_w);
t.add_word(44444444_w);
t.add_word(1213_w);
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == 12_w);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "013" , "dfs #06" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word(00_w);
t.add_word(10_w);
t.add_word(01_w);
t.add_word(20_w);
t.add_word(02_w);
t.add_word(30_w);
t.add_word(03_w);
t.add_word(11_w);
t.add_word(23_w);
t.add_word(222_w);
t.add_word(5555555_w);
t.add_word(44444444_w);
t.add_word(513_w);
t.add_word(12_w);
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == 4444_w);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "014" , "dfs #07" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aaaaaaaaaaaaaa" );
t.add_word("bbbbbbbbbbbbbb" );
t.add_word("cccccccccccccc" );
t.add_word("aaaaba" );
t.add_word("bbb" );
t.add_word("bbbbab" );
t.add_word("aaa" );
t.add_word("aaaaca" );
t.add_word("ccc" );
t.add_word("ccccac" );
t.add_word("aaa" );
t.add_word("bbbbcb" );
t.add_word("ccc" );
t.add_word("ccccbc" );
t.add_word("bbb" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type({99, 99, 99}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "015" , "dfs #08" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aaaaaaaaaaaaaa" );
t.add_word("bbbbbbbbbbbbbb" );
t.add_word("ddddcc" );
t.add_word("aaaaba" );
t.add_word("bbb" );
t.add_word("bbbbab" );
t.add_word("aaa" );
t.add_word("aaaaca" );
t.add_word("dcac" );
t.add_word("aaa" );
t.add_word("bbbbcb" );
t.add_word("dcbc" );
t.add_word("bbb" );
t.add_word("ccc" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type({98, 98, 98}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "016" , "dfs #09" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
t.add_word("bbb" );
t.add_word("ababa" );
t.add_word("aaaaaaaaaaaaaaaabaaaabaaaaaaaaaaaaaaaabaaaa" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last)
== word_type({
97,
97,
97,
97,
}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "017" , "dfs #10" , "[quick][ukkonen]" ) {
using const_iterator = typename Ukkonen::const_iterator;
Ukkonen t;
t.add_word("aBCbac" );
t.add_word("bACbaacA" );
t.add_word("accAABab" );
const_iterator first, last;
std::tie(first, last) = best_subword(t);
REQUIRE(word_type(first, last) == word_type());
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" , "018" , "pieces" , "[quick][ukkonen]" ) {
Ukkonen t;
detail::StringToWord string_to_word("ab" );
t.add_word(string_to_word("baabbaaaa" ));
t.add_word(string_to_word("abababbbaa" ));
REQUIRE(ukkonen::number_of_pieces(t, string_to_word("baabbaaaa" )) == 3);
REQUIRE(ukkonen::pieces(t, string_to_word("baabbaaaa" ))
== std::vector<word_type>({100_w, 1100_w, 00_w}));
REQUIRE(ukkonen::is_piece(t, string_to_word("baa" )));
REQUIRE(ukkonen::is_piece(t, string_to_word("bbaa" )));
REQUIRE(ukkonen::is_piece(t, string_to_word("aa" )));
REQUIRE(!ukkonen::is_piece(t, "aa" ));
REQUIRE(!ukkonen::is_piece(t, std::string("aa" )));
REQUIRE(!ukkonen::is_piece_no_checks(t, "aa" ));
REQUIRE(!ukkonen::is_piece_no_checks(t, std::string("aa" )));
REQUIRE(ukkonen::is_piece_no_checks(t, string_to_word("baa" )));
REQUIRE(ukkonen::is_piece_no_checks(t, string_to_word("bbaa" )));
REQUIRE(ukkonen::is_piece_no_checks(t, string_to_word("aa" )));
REQUIRE(ukkonen::number_of_pieces(t, string_to_word("abababbbaa" )) == 3);
REQUIRE(ukkonen::pieces(t, string_to_word("abababbbaa" ))
== std::vector<word_type>({0101_w, 011_w, 100_w}));
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"019" ,
"code coverage" ,
"[quick][ukkonen]" ) {
Ukkonen u;
u.add_word_no_checks(0001000_w);
auto w = "abcdefabababab" ;
u.add_word_no_checks(w, w + std::strlen(w));
u.add_word_no_checks(w);
u.add_word_no_checks({1, 2, 3, 4, 0, 0, 1, 1, 0, 0, 1});
std::string ww("abdbadbabdbabdabdj" );
u.add_word_no_checks(ww);
u.add_word(ww);
auto www = 0001000_w;
u.add_word_no_checks(www.cbegin(), www.cend());
REQUIRE(u.nodes().size() == 78);
REQUIRE(u.length_of_distinct_words() == 50);
REQUIRE(u.length_of_words() == 89);
REQUIRE(std::equal(u.cbegin(), u.cend(), u.begin(), u.end()));
REQUIRE(word_type(u.begin(), u.end())
== word_type({0,
0,
0,
1,
0,
0,
0,
u.unique_letter(0),
97,
98,
99,
100,
101,
102,
97,
98,
97,
98,
97,
98,
97,
98,
u.unique_letter(1),
1,
2,
3,
4,
0,
0,
1,
1,
0,
0,
1,
u.unique_letter(2),
97,
98,
100,
98,
97,
100,
98,
97,
98,
100,
98,
97,
98,
100,
97,
98,
100,
106,
u.unique_letter(3)}));
std::vector<size_t> distances(u.nodes().size(), 0);
std::transform(u.nodes().cbegin(),
u.nodes().cend(),
distances.begin(),
[&u](auto const & n) { return u.distance_from_root(n); });
REQUIRE(distances
== std::vector<size_t>(
{0, 8, 2, 7, 1, 6, 5, 3, 4, 3, 2, 1, 15, 14, 13, 12,
11, 10, 2, 9, 1, 8, 6, 7, 5, 6, 4, 5, 3, 4, 3, 2,
1, 1, 12, 11, 10, 9, 3, 8, 2, 7, 6, 3, 5, 4, 3, 2,
1, 19, 18, 1, 17, 2, 16, 1, 15, 3, 14, 13, 5, 12, 4, 11,
5, 10, 4, 9, 3, 8, 2, 7, 6, 5, 4, 3, 2, 1}));
distances.resize(u.number_of_distinct_words(), 0);
auto range = IntegralRange<size_t>(0, u.number_of_distinct_words());
std::transform(range.cbegin(),
range.cend(),
distances.begin(),
[&u](auto i) { return u.multiplicity(i); });
REQUIRE(distances == std::vector<size_t>({2, 2, 1, 2}));
std::vector<word_type> v = {www, www};
ukkonen::add_words_no_checks(u, v);
ukkonen::add_words(u, v);
ukkonen::add_words_no_checks(u, v.cbegin(), v.cend());
ukkonen::add_words(u, v.cbegin(), v.cend());
REQUIRE(u.nodes().size() == 78);
}
LIBSEMIGROUPS_TEST_CASE("Ukkonen" ,
"020" ,
"code coverage" ,
"[quick][ukkonen]" ) {
Ukkonen u;
REQUIRE(u.is_suffix(Ukkonen::State()) == UNDEFINED);
REQUIRE(ukkonen::is_suffix(u, "" _w) == true );
}
} // namespace libsemigroups
quality 90%
¤ Dauer der Verarbeitung: 0.21 Sekunden
(vorverarbeitet)
¤
*© Formatika GbR, Deutschland