Interface for building the automaton for detecting lexems used as basic entities by pattern matching in text.
More...
#include <patternLexerInstanceInterface.hpp>
|
virtual | ~PatternLexerInstanceInterface () |
| Destructor. More...
|
|
virtual void | defineOption (const std::string &name, double value)=0 |
| Define an option value for the compilation. More...
|
|
virtual void | defineLexemName (unsigned int id, const std::string &name)=0 |
| Assign a name to a lexem or symbol identifier. More...
|
|
virtual void | defineLexem (unsigned int id, const std::string &expression, unsigned int resultIndex, unsigned int level, analyzer::PositionBind posbind)=0 |
| Define a pattern for detecting a basic lexem of this pattern matching lexer. More...
|
|
virtual void | defineSymbol (unsigned int id, unsigned int lexemid, const std::string &name)=0 |
| Define a symbol, an instance of a basic lexem, that gets a different id than the basic lexem. More...
|
|
virtual unsigned int | getSymbol (unsigned int lexemid, const std::string &name) const =0 |
| Get the value of a defined symbol. More...
|
|
virtual const char * | getLexemName (unsigned int id) const =0 |
| Retrieve the name given to a lexem or symbol. More...
|
|
virtual bool | compile ()=0 |
| Compile all patterns and symbols defined. More...
|
|
virtual
PatternLexerContextInterface * | createContext () const =0 |
| Create the context to process a chunk of text with this text matcher. More...
|
|
virtual analyzer::FunctionView | view () const =0 |
| Get the definition of the function as structure for introspection. More...
|
|
Interface for building the automaton for detecting lexems used as basic entities by pattern matching in text.
virtual strus::PatternLexerInstanceInterface::~PatternLexerInstanceInterface |
( |
| ) |
|
|
inlinevirtual |
virtual bool strus::PatternLexerInstanceInterface::compile |
( |
| ) |
|
|
pure virtual |
Compile all patterns and symbols defined.
- Returns
- true on success, false on error (error reported in error buffer)
Create the context to process a chunk of text with this text matcher.
- Returns
- the lexer context
virtual void strus::PatternLexerInstanceInterface::defineLexem |
( |
unsigned int |
id, |
|
|
const std::string & |
expression, |
|
|
unsigned int |
resultIndex, |
|
|
unsigned int |
level, |
|
|
analyzer::PositionBind |
posbind |
|
) |
| |
|
pure virtual |
Define a pattern for detecting a basic lexem of this pattern matching lexer.
- Parameters
-
[in] | id | identifier given to the lexem, 0 if the lexem is not part of the output (only used for assigning ordinal positions). |
[in] | expression | expression string defining the lexem (usually a regular expression, appromaxitive matching with edit distance is declared by convenience with a '~' followed by the edit distance, e.g. "[Hh]ello [Ww]orld ~2") |
[in] | resultIndex | index of subexpression that defines the result lexem, 0 for the whole match |
[in] | level | weight of this lexical pattern. A lexical pattern match causes the suppressing of all lexems of lower level that are completely covered by one lexem of this pattern |
[in] | posbind | defines how the ordinal position is assigned to the result lexem |
virtual void strus::PatternLexerInstanceInterface::defineLexemName |
( |
unsigned int |
id, |
|
|
const std::string & |
name |
|
) |
| |
|
pure virtual |
Assign a name to a lexem or symbol identifier.
- Parameters
-
[in] | id | identifier given to the lexem or symbol |
[in] | name | name assigned to the lexem or symbol |
- Note
- Has no meaning but for retrieval with getLexemName(unsigned int)const
virtual void strus::PatternLexerInstanceInterface::defineOption |
( |
const std::string & |
name, |
|
|
double |
value |
|
) |
| |
|
pure virtual |
Define an option value for the compilation.
- Parameters
-
[in] | name | option name |
[in] | value | option value |
virtual void strus::PatternLexerInstanceInterface::defineSymbol |
( |
unsigned int |
id, |
|
|
unsigned int |
lexemid, |
|
|
const std::string & |
name |
|
) |
| |
|
pure virtual |
Define a symbol, an instance of a basic lexem, that gets a different id than the basic lexem.
- Parameters
-
[in] | id | identifier given to this symbol |
[in] | lexemid | identifier of the basic lexem (defined with defineLexem) this symbol belongs to |
[in] | name | name (value string) of the symbol |
- Note
- The idea of symbols is to keep the automaton for lexical pattern detection small and detect symbols as combination of a lexical pattern match plus an ordinary dictionary lookup
virtual const char* strus::PatternLexerInstanceInterface::getLexemName |
( |
unsigned int |
id | ) |
const |
|
pure virtual |
Retrieve the name given to a lexem or symbol.
- Parameters
-
[in] | id | identifier of the lexem or symbol |
- Returns
- the name of the lexem or symbol or 0, if not defined
virtual unsigned int strus::PatternLexerInstanceInterface::getSymbol |
( |
unsigned int |
lexemid, |
|
|
const std::string & |
name |
|
) |
| const |
|
pure virtual |
Get the value of a defined symbol.
- Parameters
-
[in] | lexemid | identifier of the basic lexem this symbol belongs to |
[in] | name | name (value string) of the symbol |
- Returns
- the symbol identifier or 0, if not defined
Get the definition of the function as structure for introspection.
- Returns
- structure for introspection
The documentation for this class was generated from the following file: