10#include <boost/filesystem.hpp>
11#include <boost/filesystem/fstream.hpp>
12#include <boost/tokenizer.hpp>
13#include <boost/iostreams/device/file.hpp>
14#include <boost/iostreams/filtering_stream.hpp>
15#include <boost/iostreams/filter/gzip.hpp>
16#include <boost/iostreams/filter/bzip2.hpp>
41 Xapian::WritableDatabase& ioDatabase) {
48 Xapian::TermGenerator lTermGenerator;
49 lTermGenerator.set_database (ioDatabase);
50 lTermGenerator.set_document (ioDocument);
56 for (Place::TermSetMap_T::const_iterator itStringSet = lTermSetMap.begin();
57 itStringSet != lTermSetMap.end(); ++itStringSet) {
59 const Weight_T& lWeight = itStringSet->first;
60 const Xapian::termcount lWDFInc =
61 static_cast<const Xapian::termcount
> (lWeight);
65 for (Place::StringSet_T::const_iterator itString = lTermSet.begin();
66 itString != lTermSet.end(); ++itString) {
67 const std::string& lString = *itString;
68 lTermGenerator.index_text (lString, lWDFInc);
77 for (Place::StringSet_T::const_iterator itTerm = lSpellingSet.begin();
78 itTerm != lSpellingSet.end(); ++itTerm) {
79 const std::string& lTerm = *itTerm;
80 ioDatabase.add_spelling (lTerm);
86 <<
" into " << ioDocument.get_description());
90 void IndexBuilder::addDocumentToIndex(Xapian::WritableDatabase& ioDatabase,
92 const OTransliterator& iTransliterator) {
95 Xapian::Document lDocument;
99 const RawDataString_T& lRawDataString = ioPlace.getRawDataString();
104 lDocument.set_data (lRawDataString);
108 ioPlace.buildIndexSets (iTransliterator);
114 const Xapian::docid& lDocID = ioDatabase.add_document (lDocument);
118 ioPlace.setDocID (lDocID);
123 buildSearchIndex (Xapian::WritableDatabase* ioXapianDB_ptr,
124 const DBType& iSQLDBType, soci::session* ioSociSessionPtr,
125 std::istream& iPORFileStream,
133 std::string itReadLine;
134 while (std::getline (iPORFileStream, itReadLine)) {
143 if (!iIncludeNonIATAPOR) {
144 const unsigned short lFirstSeparatorPos = itReadLine.find_first_of (
"^");
145 if (lFirstSeparatorPos != 3) {
154 ++oNbOfEntriesInPORFile;
162 PORStringParser lStringParser (itReadLine);
165 const Location& lLocation = lStringParser.generateLocation();
177 const std::string& lCommonName = lLocation.getCommonName();
178 if (lCommonName ==
"NotAvailable") {
183 lPlace.setLocation (lLocation);
187 if (ioXapianDB_ptr != NULL) {
188 IndexBuilder::addDocumentToIndex (*ioXapianDB_ptr, lPlace,
193 if (ioSociSessionPtr != NULL) {
204 ++oNbOfEntries; ++oNbOfEntriesInPORFile;
207 if (oNbOfEntries % 1000 == 0) {
208 std::cout.imbue( std::locale (std::locale::classic(),
new NumSep));
209 std::cout <<
"Number of actually parsed records: " << oNbOfEntries
210 <<
", out of " << oNbOfEntriesInPORFile
211 <<
" records in the POR data file so far" << std::endl;
218 lPlace.resetMatrix();
219 lPlace.resetIndexSets();
236 soci::session* lSociSession_ptr = NULL;
237 Xapian::WritableDatabase* lXapianDatabase_ptr = NULL;
247 if (iShouldIndexPORInXapian) {
253 lXapianDatabase_ptr =
255 assert (lXapianDatabase_ptr != NULL);
259 << iTravelIndexFilePath
260 <<
"') has been re-created, checked and opened");
271 lXapianDatabase_ptr->begin_transaction();
275 << iTravelIndexFilePath <<
"')");
283 if (iShouldAddPORInSQLDB) {
304 if (lSociSession_ptr == NULL) {
305 std::ostringstream errorStr;
306 errorStr <<
"Error when trying to connect to the SQL database ('"
307 << iSQLDBConnStr <<
"')";
309 throw SQLDatabaseImpossibleConnectionException (errorStr.str());
311 assert (lSociSession_ptr != NULL);
325 const PORFileHelper lPORFileHelper (iPORFilePath);
326 std::istream& lPORFileStream = lPORFileHelper.getFileStreamRef();
331 oNbOfEntries = buildSearchIndex (lXapianDatabase_ptr, iSQLDBType,
332 lSociSession_ptr, lPORFileStream,
333 iIncludeNonIATAPOR, iTransliterator);
339 if (iShouldIndexPORInXapian) {
340 assert (lXapianDatabase_ptr != NULL);
341 lXapianDatabase_ptr->commit_transaction();
354 if (iShouldIndexPORInXapian) {
355 assert (lXapianDatabase_ptr != NULL);
356 lXapianDatabase_ptr->close();
360 if (iShouldAddPORInSQLDB) {
365 assert (lSociSession_ptr != NULL);
373 assert (lSociSession_ptr != NULL);
#define OPENTREP_LOG_ERROR(iToBeLogged)
#define OPENTREP_LOG_DEBUG(iToBeLogged)
static void terminateSQLDBSession(const DBType &, const SQLDBConnectionString_T &, soci::session &)
static void createSQLDBTables(soci::session &)
static soci::session * initSQLDBSession(const DBType &, const SQLDBConnectionString_T &)
static void createSQLDBIndexes(soci::session &)
static void insertPlaceInDB(soci::session &, const Place &)
static FacPlace & instance()
static FacXapianDB & instance()
Xapian::WritableDatabase * create(const TravelDBFilePath_T &, const int &iXapianActionFlag)
static void recreateXapianDirectory(const std::string &iTravelDBFilePath)
Class modelling a place/POR (point of reference).
std::map< const Weight_T, StringSet_T > TermSetMap_T
std::string describeSets() const
const StringSet_T & getSpellingSet() const
std::set< std::string > StringSet_T
std::string describeKey() const
const TermSetMap_T & getTermSetMap() const
void addToXapian(const Place &iPlace, Xapian::Document &ioDocument, Xapian::WritableDatabase &ioDatabase)
bool shouldAddPORInSQLDB_T
unsigned int NbOfDBEntries_T
bool shouldIndexPORInXapian_T
bool shouldIndexNonIATAPOR_T
Enumeration of database types.