10 #ifndef _STRUS_ANALYZER_DOCUMENT_ANALYZER_MAP_INTERFACE_HPP_INCLUDED
11 #define _STRUS_ANALYZER_DOCUMENT_ANALYZER_MAP_INTERFACE_HPP_INCLUDED
23 class DocumentAnalyzerContextInterface;
25 class DocumentAnalyzerInstanceInterface;
39 const std::string& mimeType,
40 const std::string& scheme)
const=0;
47 const std::string& mimeType,
48 const std::string& scheme,
55 const std::string& mimeType,
56 const std::string& scheme)
const=0;
64 const std::string& content,
Structure describing the internal representation of a document analyzer map for introspection.
Definition: documentAnalyzerMapView.hpp:44
Defines a description of the properties of an original document processed by the segmenter.
Definition: documentClass.hpp:21
Defines the context for analyzing multi part documents, iterating on the sub documents defined...
Definition: documentAnalyzerContextInterface.hpp:21
virtual DocumentAnalyzerContextInterface * createContext(const analyzer::DocumentClass &dclass) const =0
Create the context used for analyzing multipart or very big documents.
virtual analyzer::DocumentAnalyzerMapView view() const =0
Return a structure with all definitions for introspection.
Defines a program for analyzing a document, splitting it into normalized terms that can be fed to the...
Definition: documentAnalyzerMapInterface.hpp:28
Structure describing the MIME type plus some attributes that could be relevant for analysis of a docu...
virtual DocumentAnalyzerInstanceInterface * createAnalyzer(const std::string &mimeType, const std::string &scheme) const =0
Declare a an analyzer interface to instrument and and add with addAnalyzer.
virtual const DocumentAnalyzerInstanceInterface * getAnalyzer(const std::string &mimeType, const std::string &scheme) const =0
Get the analyzer interface assigned to a document class.
virtual void addAnalyzer(const std::string &mimeType, const std::string &scheme, DocumentAnalyzerInstanceInterface *analyzer)=0
Declare a an analyzer to be used for the analysis of a specific document class.
Defines a program for analyzing a document, splitting it into normalized terms that can be fed to the...
Definition: documentAnalyzerInstanceInterface.hpp:43
Structure of a document as result of document analysis.
virtual analyzer::Document analyze(const std::string &content, const analyzer::DocumentClass &dclass) const =0
Segment and tokenize a document, assign types to tokens and metadata and normalize their values...
Structure of a document created as result of a document analysis.
Definition: document.hpp:25
virtual ~DocumentAnalyzerMapInterface()
Destructor.
Definition: documentAnalyzerMapInterface.hpp:32