52 #ifndef _CACIFFILESYSTEM
53 #define _CACIFFILESYSTEM
54 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
56 #include "libMRML/include/TID.h"
57 #include "libMRML/include/CSelfDestroyPointer.h"
58 #include "libMRML/include/CArraySelfDestroyPointer.h"
59 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
60 #include "libMRML/include/CMutex.h"
62 #include "libGIFTAcInvertedFile/include/CADIHash.h"
63 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
64 #include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
71 #define HASH_MAP hash_map
78 #include "libMRML/include/CMagic.h"
81 typedef TID TFeatureID ;
156 streampos inPosition,
157 ostream& inOpenOffsetFile);
268 double inDocumentFrequency)
const;
277 virtual pair<bool,TID>
URLToID(
const string& inURL)
const;
289 list<TID>::size_type)
const;
299 list<CAccessorElement>::size_type inSize)
const;
320 operator bool()
const;
An accessor to an inverted file.
Definition: CAcIFFileSystem.h:93
CDocumentFrequencyList * DIDToFeatureList(TID inDID) const
List of features contained by a document with ID inDID.
list< TID > * getAllFeatureIDs() const
Getting a list of all features contained in this.
string IDToURL(TID inID) const
Translate a DocumentID to a URL (for output)
CSelfDestroyPointer< istream > mInvertedFile
The inverted file.
Definition: CAcIFFileSystem.h:117
bool operator()() const
for testing if the inverted file is correctly constructed
void getAllAccessorElements(list< CAccessorElement > &) const
List of triplets (ID,imageURL,thumbnailURL) of all the documents present in the inverted file...
void getRandomIDs(list< TID > &, list< TID >::size_type) const
get a given number of random C-AccessorElement-s
This class captures the structure of an XML element.
Definition: CXMLElement.h:51
A list of Document Frequency Elements (the main part of an inverted file)
Definition: CDocumentFrequencyList.h:58
double FeatureToCollectionFrequency(TFeatureID) const
Collection frequency for a given feature.
CAcIFFileSystem(const CXMLElement &inCollectionElement)
This opens an exsisting inverted file, and then inits this structure.
string mOffsetFileName
Name of the Offset file.
Definition: CAcIFFileSystem.h:129
double DIDToDFSquareSum(TID) const
Returns the document-frequency square sum for a given document ID.
HASH_MAP< TID, unsigned int > mFeatureDescription
map from the feature ID to the feature description
Definition: CAcIFFileSystem.h:145
CDocumentFrequencyList * FeatureToList(TFeatureID) const
List of documents containing the feature.
void getRandomAccessorElements(list< CAccessorElement > &outResult, list< CAccessorElement >::size_type inSize) const
For drawing random sets.
string mInvertedFileBuffer
A buffer, if the inverted file is to be held in ram.
Definition: CAcIFFileSystem.h:109
CADIHash.
Definition: CADIHash.h:53
TID getMaximumFeatureID() const
This is interesting for browsing.
An accessor to an inverted file.
Definition: CAcInvertedFile.h:83
virtual pair< bool, CAccessorElement > IDToAccessorElement(TID inID) const
Translate a DocumentID to an accessor Element.
bool checkConsistency()
Check the consistency of the inverted file system accessed by this accessor.
~CAcIFFileSystem()
Destructor.
CDocumentFrequencyList * getFeatureFile(string inFileName) const
loads a *.fts file.
CIDToOffset mIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:137
string mInvertedFileName
Name of the inverted file.
Definition: CAcIFFileSystem.h:126
CSelfDestroyPointer< CAcURL2FTS > mURL2FTS
In order to have just one parent, I have to limit on single inheritance.
Definition: CAcIFFileSystem.h:103
double DIDToMaxDocumentFrequency(TID) const
returns the maximum document frequency for one document ID
int size() const
The number of images in this accessor.
unsigned int getFeatureDescription(TID inFeatureID) const
What kind of feature is the feature with ID inFeatureID?
double DIDToSquareDFLogICFSum(TID) const
Returns this function for a given document ID.
This class offers an abstraction from the locking method used.
Definition: CMutex.h:40
CMutex mMutex
the mutex for multi threading
Definition: CAcIFFileSystem.h:97
bool init(bool)
called by constructors
void writeOffsetFileElement(TID inFeatureID, streampos inPosition, ostream &inOpenOffsetFile)
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file constructio...
bool generateInvertedFile()
Generating an inverted File, if there is none.
HASH_MAP< TID, streampos > CIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:135
bool findWithinStream(TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID...
HASH_MAP< TID, double > mFeatureToCollectionFrequency
map from feature to the collection frequency
Definition: CAcIFFileSystem.h:140
void getAllIDs(list< TID > &) const
List of the IDs of all documents present in the inverted file.
string mFeatureDescriptionFileName
Name for the file with the feature description.
Definition: CAcIFFileSystem.h:132
CADIHash mDocumentInformation
additional information about the document like, e.g.
Definition: CAcIFFileSystem.h:150
virtual pair< bool, TID > URLToID(const string &inURL) const
Translate an URL to its document ID.
bool newGenerateInvertedFile()
Generating an inverted File, if there is none.
ifstream mOffsetFile
Feature -> Offset in inverted file.
Definition: CAcIFFileSystem.h:120
CDocumentFrequencyList * URLToFeatureList(string inURL) const
List of features contained by a document.
ifstream mFeatureDescriptionFile
File of feature descriptions.
Definition: CAcIFFileSystem.h:123
TID mMaximumFeatureID
the maximum feature ID arising in this file
Definition: CAcIFFileSystem.h:105
string mTemporaryIndexingFileBase
Some place for putting temporary indexing data.
Definition: CAcIFFileSystem.h:115