17 #include <sdsl/suffix_trees.hpp> 53 template <
typename index_t>
63 using size_type =
typename index_type::size_type;
138 template <detail::SdslIndex csa_t>
143 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
144 assert(r_fwd + 1 >= l_fwd);
145 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
147 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
152 cc = csa.char2comp[c];
153 if (cc == 0 && c > 0)
158 if (r_fwd + 1 - l_fwd == csa.size())
162 _r_fwd = csa.C[cc + 1] - 1;
168 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
169 size_type const rank_l = std::get<0>(r_s_b);
170 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
171 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
172 _l_fwd = c_begin + rank_l;
173 _r_fwd = c_begin + rank_r;
178 if (_r_fwd >= _l_fwd)
184 assert(r_fwd + 1 >= l_fwd);
185 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
192 template <detail::SdslIndex csa_t>
198 assert((l_parent <= r_parent) && (r_parent < csa.size()));
204 c_begin = csa.C[csa.char2comp[c]];
206 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
208 b = std::get<2>(r_s_b),
209 rank_l = std::get<0>(r_s_b),
210 rank_r = r_parent - l_parent - s - b + rank_l;
212 size_type const _l_fwd = c_begin + rank_l;
213 size_type const _r_fwd = c_begin + rank_r;
215 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
217 if (_r_fwd >= _l_fwd)
223 assert(r_fwd + 1 >= l_fwd);
224 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
249 sigma(_index.fwd_fm.index.sigma - index_t::is_collection_),
268 assert(index !=
nullptr);
270 assert(!(fwd_lb == rhs.fwd_lb && fwd_rb == rhs.fwd_rb && depth == rhs.depth) ||
272 (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb && _last_char == rhs._last_char));
274 return std::tie(fwd_lb, fwd_rb, depth) ==
std::tie(rhs.fwd_lb, rhs.fwd_rb, rhs.depth);
291 assert(index !=
nullptr);
293 return !(*
this == rhs);
316 fwd_cursor_last_used =
true;
319 assert(index !=
nullptr);
326 fwd_lb, fwd_rb, rev_lb, rev_rb))
333 parent_lb = new_parent_lb;
334 parent_rb = new_parent_rb;
364 fwd_cursor_last_used =
false;
367 assert(index !=
nullptr);
374 rev_lb, rev_rb, fwd_lb, fwd_rb))
381 parent_lb = new_parent_lb;
382 parent_rb = new_parent_rb;
406 template <Alphabet
char_t>
410 fwd_cursor_last_used =
true;
413 assert(index !=
nullptr);
414 assert(index->fwd_fm.sigma == alphabet_size<char_t>);
421 parent_lb = new_parent_lb;
422 parent_rb = new_parent_rb;
446 template <Alphabet
char_t>
450 fwd_cursor_last_used =
false;
453 assert(index !=
nullptr);
454 assert(index->fwd_fm.sigma == alphabet_size<char_t>);
461 parent_lb = new_parent_lb;
462 parent_rb = new_parent_rb;
488 template <std::ranges::Range seq_t>
492 assert(index !=
nullptr);
499 fwd_cursor_last_used = (first != last);
507 for (
auto it = first; it != last; ++len, ++it)
511 new_parent_lb = _fwd_lb;
512 new_parent_rb = _fwd_rb;
522 parent_lb = new_parent_lb;
523 parent_rb = new_parent_rb;
551 template <std::ranges::Range seq_t>
555 assert(index !=
nullptr);
564 fwd_cursor_last_used =
false;
573 for (
auto it = first; it != last; ++len, ++it)
577 new_parent_lb = _rev_lb;
578 new_parent_rb = _rev_rb;
588 parent_lb = new_parent_lb;
589 parent_rb = new_parent_rb;
626 assert(fwd_cursor_last_used);
635 parent_lb, parent_rb, fwd_lb, fwd_rb, rev_lb, rev_rb))
679 assert(!fwd_cursor_last_used);
687 parent_lb, parent_rb, rev_lb, rev_rb, fwd_lb, fwd_rb))
722 return index->fwd_fm.index.comp2char[
_last_char] - 1;
739 assert(index !=
nullptr);
745 fwd_rb == index->size() - 1));
771 assert(index !=
nullptr);
779 if (!fwd_cursor_last_used)
816 assert(index !=
nullptr);
824 if (fwd_cursor_last_used)
851 template <std::ranges::Range text_t>
854 requires !index_t::is_collection_
858 static_assert(dimension_v<text_t> == 1,
"The input cannot be a text collection.");
859 assert(index !=
nullptr);
867 template <std::ranges::Range text_t>
870 requires index_t::is_collection_
874 static_assert(dimension_v<text_t> == 2,
"The input must be a text collection.");
875 assert(index !=
nullptr);
879 size_type const query_begin = loc - index->fwd_fm.text_begin_rs.rank(loc + 1) + 1;
896 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
898 return 1 + fwd_rb -
fwd_lb;
914 requires !index_t::is_collection_
917 assert(index !=
nullptr);
922 occ[i] =
offset() - index->fwd_fm.index[fwd_lb + i];
930 requires index_t::is_collection_
933 assert(index !=
nullptr);
940 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
941 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
961 requires !index_t::is_collection_
964 assert(index !=
nullptr);
969 return _offset - index->fwd_fm.index[sa_pos];
976 requires index_t::is_collection_
979 assert(index !=
nullptr);
984 return _offset - index->fwd_fm.index[sa_pos];
988 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
989 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition: bi_fm_index_cursor.hpp:447
typename index_type::sdsl_char_type sdsl_char_type
Type of the representation of characters in the underlying SDSL index.
Definition: bi_fm_index_cursor.hpp:78
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition: bi_fm_index_cursor.hpp:361
bool operator!=(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:289
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (TransformationTrait shortcut).
Definition: pre.hpp:48
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
index_t index_type
Type of the index.
Definition: bi_fm_index_cursor.hpp:58
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition: bi_fm_index_cursor.hpp:675
bi_fm_index_cursor & operator=(bi_fm_index_cursor const &) noexcept=default
Copy assignment.
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: bi_fm_index_cursor.hpp:407
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:103
constexpr auto reverse
A range adaptor that presents the underlying range in reverse order.
Definition: ranges:721
rev_cursor to_rev_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the reversed text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:814
~bi_fm_index_cursor()=default
Destructor.
std::vector< size_type > locate() const
Locates the occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:912
sdsl_char_type _last_char
Label of the last edge moved down. Needed for cycle_back() or cycle_front().
Definition: bi_fm_index_cursor.hpp:116
The main SeqAn3 namespace.
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:894
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a ranges::view is returned ...
Definition: bi_fm_index_cursor.hpp:959
constexpr auto join
Flattens a View of ranges into a View.
Definition: ranges:683
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:144
bool operator==(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:266
std::vector< std::pair< size_type, size_type > > locate() const
Definition: bi_fm_index_cursor.hpp:928
Meta-header for the alphabet module.
bool fwd_cursor_last_used
Stores the information which cursor has been used last for extend_*([...]) to allow for assert() in...
Definition: bi_fm_index_cursor.hpp:127
typename index_type::sdsl_sigma_type sdsl_sigma_type
Type of the alphabet size in the underlying SDSL index.
Definition: bi_fm_index_cursor.hpp:80
Adaptations of concepts from the Ranges TS.
size_type fwd_rb
Right suffix array interval of the forward cursor (for extend_right).
Definition: bi_fm_index_cursor.hpp:91
Specifies requirements of a Range type for which begin returns a type that models std::BidirectionalI...
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (TransformationTrait shortcut).
Definition: range.hpp:191
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
size_type offset() const noexcept
Helper function to recompute text positions since the indexed text is reversed.
Definition: bi_fm_index_cursor.hpp:131
size_type depth
Depth of the node in the suffix tree, i.e. length of the searched query.
Definition: bi_fm_index_cursor.hpp:120
fwd_cursor to_fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:769
constexpr auto iota
Generates a sequence of elements by repeatedly incrementing an initial value.
Definition: ranges:647
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
bool bidirectional_search(csa_t const &csa, sdsl_char_type const c, size_type &l_fwd, size_type &r_fwd, size_type &l_bwd, size_type &r_bwd) const noexcept
Optimized bidirectional search without alphabet mapping.
Definition: bi_fm_index_cursor.hpp:139
index_type const * index
Type of the underlying FM index.
Definition: bi_fm_index_cursor.hpp:83
Provides the bidirectional seqan3::bi_fm_index.
size_type fwd_lb
Left suffix array interval of the forward cursor (for extend_right).
Definition: bi_fm_index_cursor.hpp:89
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index_cursor.hpp:64
size_type parent_lb
Left suffix array interval of the parent node.
Definition: bi_fm_index_cursor.hpp:112
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:54
size_type last_rank() noexcept
Outputs the rightmost respectively leftmost rank depending on whether extend_right() or extend_left()...
Definition: bi_fm_index_cursor.hpp:718
bool bidirectional_search_cycle(csa_t const &csa, sdsl_char_type const c, size_type const l_parent, size_type const r_parent, size_type &l_fwd, size_type &r_fwd, size_type &l_bwd, size_type &r_bwd) const noexcept
Optimized bidirectional search for cycle_back() and cycle_front() without alphabet mapping...
Definition: bi_fm_index_cursor.hpp:193
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: bi_fm_index_cursor.hpp:622
Provides seqan3::view::slice.
bi_fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
bi_fm_index_cursor(index_t const &_index) noexcept
Construct from given index.
Definition: bi_fm_index_cursor.hpp:246
Provides various transformation traits used by the range module.
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: bi_fm_index_cursor.hpp:852
size_type rev_rb
Right suffix array interval of the reverse cursor (for extend_left).
Definition: bi_fm_index_cursor.hpp:95
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: bi_fm_index_cursor.hpp:313
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:678
sdsl_sigma_type sigma
Alphabet size of the index without delimiters.
Definition: bi_fm_index_cursor.hpp:99
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: bi_fm_index_cursor.hpp:489
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition: bi_fm_index_cursor.hpp:552
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
size_type query_length() const noexcept
Returns the depth of the cursor node in the implicit suffix tree, i.e. the length of the sequence sea...
Definition: bi_fm_index_cursor.hpp:737
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:64
size_type rev_lb
Left suffix array interval of the reverse cursor (for extend_left).
Definition: bi_fm_index_cursor.hpp:93
T emplace_back(T... args)
size_type parent_rb
Left suffix array interval of the parent node.
Definition: bi_fm_index_cursor.hpp:114