Viewing file: regex_nfa.h (10.65 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version.
// This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // <http://www.gnu.org/licenses/>.
/** * @file bits/regex_nfa.h * This is an internal header file, included by other library headers. * Do not attempt to use it directly. @headername{regex} */
namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION
/** * @addtogroup regex-detail * @{ */
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice. class _Automaton { public: typedef unsigned int _SizeT;
public: virtual ~_Automaton() { }
virtual _SizeT _M_sub_count() const = 0;
#ifdef _GLIBCXX_DEBUG virtual std::ostream& _M_dot(std::ostream& __ostr) const = 0; #endif };
/// Generic shared pointer to an automaton. typedef std::shared_ptr<_Automaton> _AutomatonPtr;
/// Operation codes that define the type of transitions within the base NFA /// that represents the regular expression. enum _Opcode { _S_opcode_unknown = 0, _S_opcode_alternative = 1, _S_opcode_subexpr_begin = 4, _S_opcode_subexpr_end = 5, _S_opcode_match = 100, _S_opcode_accept = 255 };
/// Provides a generic facade for a templated match_results. struct _Results { virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; virtual void _M_set_matched(int __i, bool __is_matched) = 0; };
/// Tags current state (for subexpr begin/end). typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
/// Start state tag. template<typename _FwdIterT, typename _TraitsT> struct _StartTagger { explicit _StartTagger(int __i) : _M_index(__i) { }
void operator()(const _PatternCursor& __pc, _Results& __r) { __r._M_set_pos(_M_index, 0, __pc); }
int _M_index; };
/// End state tag. template<typename _FwdIterT, typename _TraitsT> struct _EndTagger { explicit _EndTagger(int __i) : _M_index(__i) { }
void operator()(const _PatternCursor& __pc, _Results& __r) { __r._M_set_pos(_M_index, 1, __pc); }
int _M_index; _FwdIterT _M_pos; };
/// Indicates if current state matches cursor current. typedef std::function<bool (const _PatternCursor&)> _Matcher;
/// Matches any character inline bool _AnyMatcher(const _PatternCursor&) { return true; }
/// Matches a single character template<typename _InIterT, typename _TraitsT> struct _CharMatcher { typedef typename _TraitsT::char_type char_type;
explicit _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT()) : _M_traits(__t), _M_c(_M_traits.translate(__c)) { }
bool operator()(const _PatternCursor& __pc) const { typedef const _SpecializedCursor<_InIterT>& _CursorT; _CursorT __c = static_cast<_CursorT>(__pc); return _M_traits.translate(__c._M_current()) == _M_c; }
const _TraitsT& _M_traits; char_type _M_c; };
/// Matches a character range (bracket expression) template<typename _InIterT, typename _TraitsT> struct _RangeMatcher { typedef typename _TraitsT::char_type _CharT; typedef std::basic_string<_CharT> _StringT;
explicit _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT()) : _M_traits(__t), _M_is_non_matching(__is_non_matching) { }
bool operator()(const _PatternCursor& __pc) const { typedef const _SpecializedCursor<_InIterT>& _CursorT; _CursorT __c = static_cast<_CursorT>(__pc); return true; }
void _M_add_char(_CharT __c) { }
void _M_add_collating_element(const _StringT& __s) { }
void _M_add_equivalence_class(const _StringT& __s) { }
void _M_add_character_class(const _StringT& __s) { }
void _M_make_range() { }
const _TraitsT& _M_traits; bool _M_is_non_matching; };
/// Identifies a state in the NFA. typedef int _StateIdT;
/// The special case in which a state identifier is not an index. static const _StateIdT _S_invalid_state_id = -1;
/** * @brief struct _State * * An individual state in an NFA * * In this case a "state" is an entry in the NFA definition coupled * with its outgoing transition(s). All states have a single outgoing * transition, except for accepting states (which have no outgoing * transitions) and alt states, which have two outgoing transitions. */ struct _State { typedef int _OpcodeT;
_OpcodeT _M_opcode; // type of outgoing transition _StateIdT _M_next; // outgoing transition _StateIdT _M_alt; // for _S_opcode_alternative unsigned int _M_subexpr; // for _S_opcode_subexpr_* _Tagger _M_tagger; // for _S_opcode_subexpr_* _Matcher _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode) : _M_opcode(__opcode), _M_next(_S_invalid_state_id) { }
_State(const _Matcher& __m) : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m) { }
_State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t) : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s), _M_tagger(__t) { }
_State(_StateIdT __next, _StateIdT __alt) : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt) { }
#ifdef _GLIBCXX_DEBUG std::ostream& _M_print(std::ostream& ostr) const;
// Prints graphviz dot commands for state. std::ostream& _M_dot(std::ostream& __ostr, _StateIdT __id) const; #endif };
/// The Grep Matcher works on sets of states. Here are sets of states. typedef std::set<_StateIdT> _StateSet;
/** * @brief struct _Nfa * * A collection of all states making up an NFA. * * An NFA is a 4-tuple M = (K, S, s, F), where * K is a finite set of states, * S is the alphabet of the NFA, * s is the initial state, * F is a set of final (accepting) states. * * This NFA class is templated on S, a type that will hold values of the * underlying alphabet (without regard to semantics of that alphabet). The * other elements of the tuple are generated during construction of the NFA * and are available through accessor member functions. */ class _Nfa : public _Automaton, public std::vector<_State> { public: typedef _State _StateT; typedef unsigned int _SizeT; typedef regex_constants::syntax_option_type _FlagT;
_Nfa(_FlagT __f) : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0) { }
~_Nfa() { }
_FlagT _M_options() const { return _M_flags; }
_StateIdT _M_start() const { return _M_start_state; }
const _StateSet& _M_final_states() const { return _M_accepting_states; }
_SizeT _M_sub_count() const { return _M_subexpr_count; }
_StateIdT _M_insert_accept() { this->push_back(_StateT(_S_opcode_accept)); _M_accepting_states.insert(this->size()-1); return this->size()-1; }
_StateIdT _M_insert_alt(_StateIdT __next, _StateIdT __alt) { this->push_back(_StateT(__next, __alt)); return this->size()-1; }
_StateIdT _M_insert_matcher(_Matcher __m) { this->push_back(_StateT(__m)); return this->size()-1; }
_StateIdT _M_insert_subexpr_begin(const _Tagger& __t) { this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, __t)); return this->size()-1; }
_StateIdT _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t) { this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t)); return this->size()-1; }
#ifdef _GLIBCXX_DEBUG std::ostream& _M_dot(std::ostream& __ostr) const; #endif
private: _FlagT _M_flags; _StateIdT _M_start_state; _StateSet _M_accepting_states; _SizeT _M_subexpr_count; };
/// Describes a sequence of one or more %_State, its current start /// and end(s). This structure contains fragments of an NFA during /// construction. class _StateSeq { public: // Constructs a single-node sequence _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id) : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e) { } // Constructs a split sequence from two other sequencces _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2) : _M_nfa(__e1._M_nfa), _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)), _M_end1(__e1._M_end1), _M_end2(__e2._M_end1) { }
// Constructs a split sequence from a single sequence _StateSeq(const _StateSeq& __e, _StateIdT __id) : _M_nfa(__e._M_nfa), _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)), _M_end1(__id), _M_end2(__e._M_end1) { }
// Constructs a copy of a %_StateSeq _StateSeq(const _StateSeq& __rhs) : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start), _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2) { }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT _M_front() const { return _M_start; }
// Extends a sequence by one. void _M_push_back(_StateIdT __id);
// Extends and maybe joins a sequence. void _M_append(_StateIdT __id);
void _M_append(_StateSeq& __rhs);
// Clones an entire sequence. _StateIdT _M_clone();
private: _Nfa& _M_nfa; _StateIdT _M_start; _StateIdT _M_end1; _StateIdT _M_end2;
};
//@} regex-detail _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail } // namespace std
#include <bits/regex_nfa.tcc>
|