#include "CLucene/StdHeader.h"
#ifndef CLUCENE_LITE
#include "IndexWriter.h"
#ifdef USE_INFO_STREAM
# include <ostream>
#endif
#include "CLucene/document/Document.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/Lock.h"
#include "CLucene/store/TransactionalRAMDirectory.h"
#include "CLucene/util/VoidList.h"
#include "DocumentWriter.h"
#include "SegmentInfos.h"
#include "SegmentMerger.h"
using
namespace
std;
using
namespace
lucene::store;
using
namespace
lucene::util;
namespace
lucene{
namespace
index {
IndexWriter::IndexWriter(
const
char_t* path, lucene::analysis::Analyzer& a,
const
bool
create):
directory( FSDirectory::getDirectory(path, create) ),
analyzer(a),
segmentInfos (*
new
SegmentInfos){
CND_PRECONDITION(path != NULL,
"path is NULL"
)
ownDir =
true
;
_IndexWriter ( create );
}
IndexWriter::IndexWriter(lucene::store::Directory& d, lucene::analysis::Analyzer& a,
const
bool
create):
directory(d),
analyzer(a),
segmentInfos (*
new
SegmentInfos){
ownDir =
false
;
_IndexWriter ( create );
}
void
IndexWriter::_IndexWriter(
const
bool
create){
maxFieldLength = IndexWriter::FIELD_TRUNC_POLICY__WARN;
ramDirectory =
new
lucene::store::TransactionalRAMDirectory;
CND_CONDITION(ramDirectory != NULL,
"ramDirectory is NULL"
)
infoStream = NULL;
writeLock = NULL;
mergeFactor = 10;
maxMergeDocs = INT_MAX;
LuceneLock* newLock = directory.makeLock(_T(
"write.lock"
));
CND_CONDITION(newLock != NULL,
"No memory could be allocated for LuceneLock newLock"
)
if
(!newLock->obtain()){
_DELETE(newLock);
_finalize();
_THROWC(
"Index locked for write or no write access."
);
}
writeLock = newLock;
LuceneLock* lock = directory.makeLock(_T(
"commit.lock"
));
CND_CONDITION(lock != NULL,
"No memory could be allocated for LuceneLock lock"
)
IndexWriterLockWith with ( lock,
this
,create );
LOCK_MUTEX(DIRECTORIES_MUTEX);
with.run();
UNLOCK_MUTEX(DIRECTORIES_MUTEX);
_DELETE(lock);
}
void
IndexWriter::_finalize(){
if
(writeLock != NULL){
writeLock->release();
_DELETE( writeLock );
}
_DELETE(ramDirectory)
if
(&segmentInfos != NULL){
delete
&segmentInfos;
}
}
IndexWriter::~IndexWriter() {
_finalize();
}
void
* IndexWriterLockWith::doBody() {
CND_PRECONDITION(writer != NULL,
"writer is NULL"
)
if
(create){
writer->segmentInfos.write(writer->directory);
}
else
{
writer->segmentInfos.read(writer->directory);
}
return
NULL;
}
void
* IndexWriterLockWith2::doBody(){
CND_PRECONDITION(writer != NULL,
"writer is NULL"
)
writer->segmentInfos.write(writer->directory);
writer->deleteSegments(*segmentsToDelete);
return
NULL;
}
void
IndexWriter::close(
const
bool
closeDir ) {
LOCK_MUTEX(close_LOCK);
flushRamSegments();
ramDirectory->close();
if
(closeDir || ownDir){
directory.close();
}
if
(writeLock != NULL){
writeLock->release();
_DELETE( writeLock );
}
UNLOCK_MUTEX(close_LOCK);
}
int_t IndexWriter::docCount(){
int_t count = 0;
LOCK_MUTEX(docCount_LOCK);
for
(uint_t i = 0; i < segmentInfos.size(); i++) {
SegmentInfo& si = segmentInfos.info(i);
count += si.docCount;
}
UNLOCK_MUTEX(docCount_LOCK);
return
count;
}
void
IndexWriter::addDocument(lucene::document::Document& doc) {
LOCK_MUTEX(THIS_LOCK);
CND_PRECONDITION(ramDirectory != NULL,
"ramDirectory is NULL"
)
ramDirectory->transStart();
try
{
char_t* segmentName = newSegmentName();
CND_CONDITION(segmentName != NULL,
"segmentName is NULL"
)
_TRY {
DocumentWriter* dw =
new
DocumentWriter(
*ramDirectory, analyzer, maxFieldLength
);
CND_CONDITION(dw != NULL,
"dw is NULL"
)
_TRY {
dw->addDocument(segmentName, doc);
} _FINALLY(
_DELETE(dw);
);
SegmentInfo *Si =
new
SegmentInfo(segmentName, 1, *ramDirectory);
CND_CONDITION(Si != NULL,
"Si is NULL"
)
segmentInfos.push_back(Si);
} _FINALLY(
_DELETE_ARRAY(segmentName);
);
maybeMergeSegments();
}
catch
(...) {
ramDirectory->transAbort();
throw
;
}
ramDirectory->transCommit();
UNLOCK_MUTEX(THIS_LOCK);
}
void
IndexWriter::optimize() {
LOCK_MUTEX(optimize_LOCK);
flushRamSegments();
while
(segmentInfos.size() > 1 ||
(segmentInfos.size() == 1 &&
(SegmentReader::hasDeletions(segmentInfos.info(0)) ||&segmentInfos.info(0).dir != &directory ))){
int_t minSegment = segmentInfos.size() - mergeFactor;
mergeSegments(minSegment < 0 ? 0 : minSegment);
}
UNLOCK_MUTEX(optimize_LOCK);
}
void
IndexWriter::addIndexes(Directory** dirs,
const
int_t dirsLength) {
CND_PRECONDITION(dirs != NULL,
"dirs is NULL"
)
CND_PRECONDITION(dirsLength > 0,
"dirsLength is a negative number"
)
LOCK_MUTEX(addIndexes_LOCK);
optimize();
for
(int_t i = 0; i < dirsLength; i++) {
SegmentInfos sis(
false
);
sis.read( *dirs[i]);
for
(uint_t j = 0; j < sis.size(); j++) {
segmentInfos.push_back(&sis.info(j));
}
}
optimize();
UNLOCK_MUTEX(addIndexes_LOCK);
}
void
IndexWriter::flushRamSegments(){
CND_PRECONDITION(ramDirectory != NULL,
"ramDirectory is NULL"
)
int_t minSegment = segmentInfos.size()-1;
CND_CONDITION(minSegment >= -1,
"minSegment must be >= -1"
)
int_t docCount = 0;
while
(minSegment >= 0 &&
&segmentInfos.info(minSegment).dir == ramDirectory) {
docCount += segmentInfos.info(minSegment).docCount;
minSegment--;
}
if
(minSegment < 0 ||
(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
!(&segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
minSegment++;
CND_CONDITION(minSegment >= 0,
"minSegment must be >= 0"
)
if
(
static_cast
<uint_t>(minSegment) >= segmentInfos.size())
return
;
mergeSegments(minSegment);
}
void
IndexWriter::maybeMergeSegments() {
long_t targetMergeDocs = mergeFactor;
while
(targetMergeDocs <= maxMergeDocs) {
int_t minSegment = segmentInfos.size();
int_t mergeDocs = 0;
while
(--minSegment >= 0) {
SegmentInfo& si = segmentInfos.info(minSegment);
if
(si.docCount >= targetMergeDocs)
break
;
mergeDocs += si.docCount;
}
if
(mergeDocs >= targetMergeDocs){
mergeSegments(minSegment+1);
}
else
break
;
targetMergeDocs *= mergeFactor;
}
}
void
IndexWriter::mergeSegments(
const
uint_t minSegment) {
const
char_t* mergedName = newSegmentName();
int_t mergedDocCount = 0;
#ifdef USE_INFO_STREAM
if
(infoStream != NULL)
*infoStream<<
"merging segments"
<<
"\n"
;
#endif
SegmentMerger merger(directory, mergedName);
lucene::util::VoidList<SegmentReader*> segmentsToDelete;
for
(uint_t i = minSegment; i < segmentInfos.size(); i++) {
SegmentInfo& si = segmentInfos.info(i);
#ifdef USE_INFO_STREAM
if
( infoStream != NULL)
*infoStream <<
" "
<< si.name <<
" ("
<< si.docCount <<
" docs)"
;
#endif
SegmentReader* reader =
new
SegmentReader(si,
false
);
merger.add(*reader);
if
((&reader->directory == &
this
->directory) ||
(&reader->directory ==
this
->ramDirectory))
segmentsToDelete.push_back(reader);
mergedDocCount += si.docCount;
}
#ifdef USE_INFO_STREAM
if
(infoStream != NULL) {
*infoStream<<
"\n into "
<<mergedName<<
" ("
<<mergedDocCount<<
" docs)"
;
}
#endif
merger.merge();
while
( segmentInfos.size() > minSegment )
segmentInfos.pop_back();
segmentInfos.push_back(
new
SegmentInfo(mergedName, mergedDocCount, directory));
LuceneLock* lock = directory.makeLock(_T(
"commit.lock"
));
IndexWriterLockWith2 with ( lock,
this
,&segmentsToDelete );
LOCK_MUTEX(DIRECTORIES_MUTEX);
with.run();
UNLOCK_MUTEX(DIRECTORIES_MUTEX);
delete
lock;
delete
[] mergedName;
}
void
IndexWriter::deleteSegments(lucene::util::VoidList<SegmentReader*> &segments) {
StringArrayConst deletable;
deletable.setDoDelete(DELETE_TYPE_DELETE_ARRAY);
StringArrayConst &deleteArray = readDeleteableFiles();
deleteFiles(deleteArray, deletable);
delete
&deleteArray;
for
(uint_t i = 0; i < segments.size(); i++) {
SegmentReader* reader = segments.at(i);
StringArrayConst& files = reader->files();
if
(&reader->directory == &
this
->directory)
deleteFiles(files, deletable);
else
deleteFiles(files, reader->directory);
delete
&files;
}
writeDeleteableFiles(deletable);
}
void
IndexWriter::deleteFiles(
const
StringArrayConst& files, lucene::store::Directory& directory) {
for
(uint_t i = 0; i < files.size(); i++)
directory.deleteFile( files[i] );
}
void
IndexWriter::deleteFiles(
const
StringArrayConst& files, StringArrayConst& deletable) {
for
(uint_t i = 0; i < files.size(); i++) {
const
char_t* file = files[i];
try
{
directory.deleteFile(file);
}
catch
(THROW_TYPE e) {
if
(directory.fileExists(file)) {
#ifdef USE_INFO_STREAM
if
(infoStream != NULL)
*infoStream << e.errstr <<
"; Will re-try later.\n"
;
#endif
deletable.push_back(stringDuplicate(file));
}
}
}
}
StringArrayConst& IndexWriter::readDeleteableFiles() {
StringArrayConst& result = *
new
StringArrayConst(
true
,DELETE_TYPE_DELETE_ARRAY);
if
(!directory.fileExists(_T(
"deletable"
)))
return
result;
InputStream& input = directory.openFile(_T(
"deletable"
));
_TRY {
for
(int_t i = input.readInt(); i > 0; i--)
result.push_back(input.readString());
} _FINALLY(
input.close();
delete
&input;
);
return
result;
}
void
IndexWriter::writeDeleteableFiles(StringArrayConst& files) {
OutputStream& output = directory.createFile(_T(
"deleteable.new"
));
_TRY {
output.writeInt(files.size());
for
(uint_t i = 0; i < files.size(); i++)
output.writeString( files.at(i) );
} _FINALLY(
output.close();
delete
&output;
);
directory.renameFile(_T(
"deleteable.new"
), _T(
"deletable"
));
}
char_t* IndexWriter::newSegmentName() {
char_t buf[9];
LOCK_MUTEX(newSegmentName_LOCK);
integerToString(segmentInfos.counter++,buf,9);
UNLOCK_MUTEX(newSegmentName_LOCK);
return
lucene::util::Misc::join( _T(
"_"
),buf);
}
}}
#endif