FAQ
Hi There

I am noticing file handle leaks appearing on Index files. I think the
leaks occur during the Lucene merge operation.
Lsof reports the following:

java 28604 root 213r REG 8,33 1098681
57409621 /var/index/vol201009/_a4w.cfs (deleted)
java 28604 root 214r REG 8,33 35164
57409699 /var/index/vol201009/_a4x.cfs (deleted)
java 28604 root 215r REG 8,33 46139
57409691 /var/index/vol201009/_a4y.cfs (deleted)
java 28604 root 216r REG 8,33 40342
57409673 /var/index/vol201009/_a4z.cfs (deleted)
java 28604 root 217r REG 8,33 44204
57409675 /var/index/vol201009/_a50.cfs (deleted)

We are using Lucene's realtime search feature so have handles open on
the index. Could it be that we are not
handling the merge situation correctly or something? Your ideas are most
appreciated.

The source code to our index file as follows:

package com.stimulus.archiva.index;
import com.stimulus.util.*;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.commons.logging.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.store.FSDirectory;
import com.stimulus.archiva.domain.Config;
import com.stimulus.archiva.exception.*;
import com.stimulus.archiva.language.AnalyzerFactory;
import com.stimulus.archiva.search.*;
import java.util.*;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.AlreadyClosedException;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.*;

public class LuceneIndex extends Thread {

protected ArrayBlockingQueue<LuceneDocument> queue;
protected static final Log logger =
LogFactory.getLog(LuceneIndex.class.getName());
protected static final Log indexLog =
LogFactory.getLog("indexlog");
IndexWriter writer = null;
protected static ScheduledExecutorService scheduler;
protected static ScheduledFuture<?> scheduledTask;
protected LuceneDocument EXIT_REQ = null;
ReentrantLock indexLock = new ReentrantLock();
ArchivaAnalyzer analyzer = new ArchivaAnalyzer();
File indexLogFile;
PrintStream indexLogOut;
IndexProcessor indexProcessor;
String friendlyName;
String indexPath;
int maxSimultaneousDocs;
int indexThreads;
IndexReader reader = null;
volatile boolean reopen = false;
FSDirectory fsDirectory;
ReentrantLock readerLock = new ReentrantLock();
enum Status { READY, SHUTDOWN };
Status status = Status.SHUTDOWN;

public LuceneIndex(int queueSize, LuceneDocument exitReq,
String friendlyName, String indexPath, int maxSimultaneousDocs, int
indexThreads) {
this.queue = new
ArrayBlockingQueue<LuceneDocument>(queueSize);
this.EXIT_REQ = exitReq;
this.friendlyName = friendlyName;
this.indexPath = indexPath;
this.maxSimultaneousDocs = maxSimultaneousDocs;
this.indexThreads = indexThreads;
this.status = Status.SHUTDOWN;
// if (indexLog.isDebugEnabled()) {
//setLog(friendlyName);
//}
}


public int getMaxSimultaneousDocs() {
return maxSimultaneousDocs;
}

public void setMaxSimultaneousDocs(int maxSimultaneousDocs) {
this.maxSimultaneousDocs = maxSimultaneousDocs;
}


public ReentrantLock getIndexLock() {
return indexLock;
}

protected void setLog(String logName) {

try {
indexLogFile = getIndexLogFile(logName);
if (indexLogFile!=null) {
if (indexLogFile.length()>10485760)
indexLogFile.delete();
indexLogOut = new PrintStream(indexLogFile);
}
logger.debug("set index log file path
{path='"+indexLogFile.getCanonicalPath()+"'}");
} catch (Exception e) {
logger.error("failed to open index log
file:"+e.getMessage(),e);
}

}

protected File getIndexLogFile(String logName) {
try {
String logfilepath =
Config.getFileSystem().getLogPath()+File.separator+"indexdebug_"+logName+".log";
return new File(logfilepath);
} catch (Exception e) {
logger.error("failed to open index log
file:"+e.getMessage(),e);
return null;
}
}



protected void openIndex() throws MessageSearchException {
Exception lastError = null;

if (writer==null) {
logger.debug("openIndex() index "+friendlyName+" will
be opened. it is currently closed.");
} else {
logger.debug("openIndex() did not bother opening index
"+friendlyName+". it is already open.");
return;
}
logger.debug("opening index "+friendlyName+" for write");
logger.debug("opening search index "+friendlyName+" for
write {indexpath='"+indexPath+"'}");
boolean writelock;
int attempt = 0;
int maxattempt = 10;

if
(Config.getConfig().getIndex().getMultipleIndexProcesses()) {
maxattempt = 10000;
} else {
maxattempt = 10;
}

do {
writelock = false;
try {
fsDirectory = FSDirectory.open(new
File(indexPath));
int maxIndexChars =
Config.getConfig().getIndex().getMaxIndexPerFieldChars();
writer = new
IndexWriter(fsDirectory,analyzer,new
IndexWriter.MaxFieldLength(maxIndexChars));
if (indexLog.isDebugEnabled() &&
indexLogOut!=null) {
writer.setInfoStream(indexLogOut);
}
} catch (LockObtainFailedException lobfe) {
logger.debug("write lock on index
"+friendlyName+". will reopen in 50ms.");
try { Thread.sleep(50); } catch (Exception e) {}
attempt++;
writelock = true;
} catch (CorruptIndexException cie) {
throw new MessageSearchException("index
"+friendlyName+" appears to be corrupt. please reindex the active
volume."+cie.getMessage(),logger);
} catch (Throwable io) {
throw new MessageSearchException("failed to write
document to index "+friendlyName+":"+io.getMessage(),logger);
}
} while (writelock && attempt<maxattempt);
if (attempt>=10000)
throw new MessageSearchException("failed to open index
"+friendlyName+" writer {indexPath='"+indexPath+"'}",lastError,logger);
}

public void indexDocument(LuceneDocument luceneDocument) throws
MessageSearchException {
logger.debug("index document {"+luceneDocument+"}");
if (status==Status.SHUTDOWN) {
throw new MessageSearchException("index is
shutdown.",logger);
}
long s = (new Date()).getTime();
if (luceneDocument == null)
throw new MessageSearchException("assertion failure:
null document",logger);
try {
queue.put(luceneDocument);
} catch (InterruptedException ie) {
throw new MessageSearchException("failed to add
document to queue:"+ie.getMessage(),ie,logger);
}
logger.debug("document indexed successfully
{"+luceneDocument+"}");

logger.debug("indexing message end {"+luceneDocument+"}");
long e = (new Date()).getTime();
logger.debug("indexing time {time='"+(e-s)+"'}");
}

public class DocWriter implements Runnable {

LuceneDocument doc;
String language;
LinkedList<LuceneDocument> pushbacks;
ReentrantLock pushbackLock;

public DocWriter(LuceneDocument doc,String
language,LinkedList<LuceneDocument> pushbacks, ReentrantLock pushbackLock) {
this.doc = doc;
this.language = language;
this.pushbacks = pushbacks;
}

public void run() {
try {

writer.addDocument(doc.getDocument(),AnalyzerFactory.getAnalyzer(language,AnalyzerFactory.Operation.INDEX));
} catch (IOException io) {
logger.error("failed to add document to
index:"+io.getMessage(),io);
} catch (AlreadyClosedException e) {
try {
pushbackLock.lock();
pushbacks.add(doc);
} finally {
pushbackLock.unlock();
}
}
}

}



public class IndexProcessor extends Thread {

public IndexProcessor() {
setName("index processor");
}

public void run() {
boolean exit = false;
LuceneDocument luceneDocument = null;
LinkedList<LuceneDocument> pushbacks = new
LinkedList<LuceneDocument>();
ReentrantLock pushbackLock = new ReentrantLock();


while (!exit) {


//documentPool =
Executors.newFixedThreadPool(Config.getConfig().getArchiver().getArchiveThreads());
luceneDocument = null;
try {
luceneDocument = (LuceneDocument) queue.take();
} catch (InterruptedException e) {
logger.debug("index exit req received. exiting");
exit = true;
continue;
}
if (luceneDocument==EXIT_REQ) {
logger.debug("index exit req received. exiting");
exit = true;
continue;
}
try {

indexLock.lock();

if (luceneDocument==null) {
logger.debug("index info is null");
}
int i = 0;
ExecutorService threadPool =
Executors.newFixedThreadPool(indexThreads,ThreadUtil.getFlexibleThreadFactory("indexwritepool",Thread.NORM_PRIORITY,true));

while(luceneDocument!=null &&
i<maxSimultaneousDocs) {
Document doc =
luceneDocument.getDocument();
String language = doc.get("lang");
if (language==null) {
language =
Config.getConfig().getIndex().getIndexLanguage();
}
DocWriter docWriter = new
DocWriter(luceneDocument,language,pushbacks,pushbackLock);
threadPool.submit(docWriter);

i++;
if (i<maxSimultaneousDocs) {
luceneDocument = (LuceneDocument)
queue.poll();

if (luceneDocument==null) {
logger.debug("index info is
null");
}

if (luceneDocument==EXIT_REQ) {
logger.debug("index exit
req received. exiting (2)");
exit = true;
break;
}
}

}
threadPool.shutdown();
threadPool.awaitTermination(30,TimeUnit.MINUTES);
try {
pushbackLock.lock();
if (pushbacks.size()>0) {
for (LuceneDocument pushback :
pushbacks) {
try {

writer.addDocument(pushback.getDocument());
} catch (IOException io) {
logger.error("failed to add
document to index:"+io.getMessage(),io);
} catch (AlreadyClosedException
e) {
pushbacks.add(pushback);
}
i++;
}
}
} finally {
pushbackLock.unlock();
}
logger.debug("index commit");
try {
if (writer!=null) {
writer.commit();
}
} catch (Exception e) {
logger.error("failed to commit
index:"+e.getMessage(),e);
try {
readerLock.lock();
closeIndex();
openIndex();
} finally {
readerLock.unlock();
}
}

} catch (Throwable ie) {
logger.error("index write
interrupted:"+ie.getMessage(),ie);
} finally {

indexLock.unlock();
}
}
logger.debug("exit indexer");
}

public class IndexDocument extends Thread {

LuceneDocument luceneDocument = null;
List<LuceneDocument> pushbacks = null;

public IndexDocument(LuceneDocument
luceneDocument,List<LuceneDocument> pushbacks) {
this.luceneDocument = luceneDocument;
this.pushbacks = pushbacks;
setName("index document");
}

public void run() {
try {

writer.addDocument(luceneDocument.getDocument());
} catch (IOException io) {
logger.error("failed to add document to
index:"+io.getMessage(),io);
} catch (AlreadyClosedException e) {
pushbacks.add(luceneDocument);
} catch (Throwable t) {
logger.error("failed to add document to
index:"+t.getMessage(),t);
}
}};
}

protected void closeIndex() {
try {
indexLock.lock();


if (writer!=null) {
writer.close();
}

if (fsDirectory!=null) {
fsDirectory.close();
}
} catch (Throwable io) {
logger.error("failed to close index
writer:"+io.getMessage(),io);
} finally {
writer = null;
indexLock.unlock();
}
}



public void optimize() throws MessageSearchException {
logger.debug("optimize volume");
try {
indexLock.lock();
try {
writer.optimize(false);
} catch (Exception io) {
throw new MessageSearchException("failed to
optimize the index:"+io.getMessage(),io,logger);
}
} catch (Throwable t) { // diskspace problems could arise
logger.error("failed to optimize
index:"+t.getMessage(),t);
} finally {
indexLock.unlock();
}

}
public void deleteDocs(Term[] terms) throws
MessageSearchException {
logger.debug("delete docs");
if (status==Status.SHUTDOWN) {
throw new MessageSearchException("index is
shutdown.",logger);
}
try {
indexLock.lock();
openIndex();
try {
writer.deleteDocuments(terms);
} catch (Exception e) {
throw new MessageSearchException("failed to
delete doc from index:"+e.getMessage(),e,logger);
} finally {
try {
writer.commit();
writer.expungeDeletes(false);
} catch (Exception io) {
throw new MessageSearchException("failed to
expunge docs from index:"+io.getMessage(),io,logger);
}
}
} catch (Throwable t) {
logger.error("failed to delete docs from
index."+t.getMessage(),t);
} finally {
indexLock.unlock();
}
}


public void deleteIndex() throws MessageSearchException {
logger.debug("delete index
{indexpath='"+indexPath+"'}");
try {
indexLock.lock();
closeIndex();
File indexFile = new File(indexPath);
//deleteDirContents(indexFile);
try {
int maxIndexChars =
Config.getConfig().getIndex().getMaxIndexPerFieldChars();
writer = new
IndexWriter(FSDirectory.open(indexFile),analyzer,true,new
IndexWriter.MaxFieldLength(maxIndexChars));
} catch (Throwable cie) {
logger.error("failed to delete index
{index='"+indexPath+"'}",cie);
return;
} finally {
try { writer.close(); } catch (Exception e) {
logger.debug("failed to close writer:"+e.getMessage()); }
writer = null;
}
} finally {
openIndex();
indexLock.unlock();
}
}

public void startup() throws MessageSearchException {
logger.debug("luceneindex is starting up");


File lockFile = new File(indexPath+File.separatorChar +
"write.lock");
if (lockFile.exists()) {
if
(Config.getConfig().getIndex().getMultipleIndexProcesses()) {
logger.debug("index lock file detected on
volumeindex startup.");
} else {
logger.warn("index lock file detected. the server
was shutdown incorrectly. automatically deleting lock file.
{lockFile='"+lockFile.getPath()+"'}");
lockFile.delete();
}
}
openIndex();
scheduler =
Executors.newScheduledThreadPool(1,ThreadUtil.getFlexibleThreadFactory("index
reopen",Thread.NORM_PRIORITY-1,true));
scheduledTask = scheduler.scheduleWithFixedDelay(new
Runnable() { public void run() { reopen=true; }},1,1,TimeUnit.SECONDS);

indexProcessor = new IndexProcessor();
indexProcessor.start();


Runtime.getRuntime().addShutdownHook(this);
status = Status.READY;
}

public IndexReader getReader() throws MessageSearchException {
if (status==Status.SHUTDOWN) {
throw new MessageSearchException("index is
shutdown.",logger);
}
readerLock.lock();
try {
if (writer==null) {
throw new MessageSearchException("cannot retrieve
reader. writer is closed (or null)",logger);
}
if (reader == null) {
reader = new VolumeIndexReader(writer.getReader(5));
} else {
try {
if (reopen) {
reader = new
VolumeIndexReader(writer.getReader(5));
reopen = false;
}
} catch (AlreadyClosedException ace) {
logger.debug("reader was found closed.
reopening");
reader = new
VolumeIndexReader(writer.getReader(5));
}
}
} catch (IOException io) {
throw new MessageSearchException("failed to retrieve
reader from writer:"+io.getMessage(),io,logger);
} finally {
readerLock.unlock();
}
return reader;
}


public void shutdown() {
status = Status.SHUTDOWN;
try { queue.put(EXIT_REQ); } catch (InterruptedException
e) {}

if (reader!=null) {
try {
reader.close();
} catch (Exception e) {
logger.error("failed to close index
reader:"+e.getMessage());
}
}
reader = null;
if (scheduler!=null) {
scheduler.shutdown();
}
closeIndex();

indexProcessor.interrupt();

if (scheduler!=null) {
scheduler.shutdownNow();
}

}

@Override
public void run() {
shutdown();
}


public interface LuceneDocument {

public String toString();
public Document getDocument();
public void finalize();

}

public static void deleteDirContents(File path) {
if( path.exists() ) {
File[] files = path.listFiles();
for(int i=0; i<files.length; i++) {
if(files[i].isFile()) {
files[i].delete();
}
}
}
}



}





---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org

Search Discussions

  • Uwe Schindler at Sep 29, 2010 at 6:22 pm
    The "deleted" files are only freed by OS kernel if no longer an IndexReader
    accesses them. Did you get a new realtime reader after merging and *closed*
    the old one?

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail: uwe@thetaphi.de

    -----Original Message-----
    From: Jamie
    Sent: Wednesday, September 29, 2010 10:48 AM
    To: java-user@lucene.apache.org
    Subject: File Handle Leaks During Lucene 3.0.2 Merge

    Hi There

    I am noticing file handle leaks appearing on Index files. I think the
    leaks occur
    during the Lucene merge operation.
    Lsof reports the following:

    java 28604 root 213r REG 8,33 1098681
    57409621 /var/index/vol201009/_a4w.cfs (deleted)
    java 28604 root 214r REG 8,33 35164
    57409699 /var/index/vol201009/_a4x.cfs (deleted)
    java 28604 root 215r REG 8,33 46139
    57409691 /var/index/vol201009/_a4y.cfs (deleted)
    java 28604 root 216r REG 8,33 40342
    57409673 /var/index/vol201009/_a4z.cfs (deleted)
    java 28604 root 217r REG 8,33 44204
    57409675 /var/index/vol201009/_a50.cfs (deleted)

    We are using Lucene's realtime search feature so have handles open on the
    index. Could it be that we are not handling the merge situation correctly or
    something? Your ideas are most appreciated.

    The source code to our index file as follows:

    package com.stimulus.archiva.index;
    import com.stimulus.util.*;

    import java.io.File;
    import java.io.IOException;
    import java.io.PrintStream;
    import org.apache.commons.logging.*;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.*;
    import org.apache.lucene.store.FSDirectory;
    import com.stimulus.archiva.domain.Config;
    import com.stimulus.archiva.exception.*; import
    com.stimulus.archiva.language.AnalyzerFactory;
    import com.stimulus.archiva.search.*;
    import java.util.*;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.store.AlreadyClosedException;
    import java.util.concurrent.locks.ReentrantLock;
    import java.util.concurrent.*;

    public class LuceneIndex extends Thread {

    protected ArrayBlockingQueue<LuceneDocument> queue;
    protected static final Log logger =
    LogFactory.getLog(LuceneIndex.class.getName());
    protected static final Log indexLog =
    LogFactory.getLog("indexlog");
    IndexWriter writer = null;
    protected static ScheduledExecutorService scheduler;
    protected static ScheduledFuture<?> scheduledTask;
    protected LuceneDocument EXIT_REQ = null;
    ReentrantLock indexLock = new ReentrantLock();
    ArchivaAnalyzer analyzer = new ArchivaAnalyzer();
    File indexLogFile;
    PrintStream indexLogOut;
    IndexProcessor indexProcessor;
    String friendlyName;
    String indexPath;
    int maxSimultaneousDocs;
    int indexThreads;
    IndexReader reader = null;
    volatile boolean reopen = false;
    FSDirectory fsDirectory;
    ReentrantLock readerLock = new ReentrantLock();
    enum Status { READY, SHUTDOWN };
    Status status = Status.SHUTDOWN;

    public LuceneIndex(int queueSize, LuceneDocument exitReq, String
    friendlyName, String indexPath, int maxSimultaneousDocs, int
    indexThreads) {
    this.queue = new
    ArrayBlockingQueue<LuceneDocument>(queueSize);
    this.EXIT_REQ = exitReq;
    this.friendlyName = friendlyName;
    this.indexPath = indexPath;
    this.maxSimultaneousDocs = maxSimultaneousDocs;
    this.indexThreads = indexThreads;
    this.status = Status.SHUTDOWN;
    // if (indexLog.isDebugEnabled()) {
    //setLog(friendlyName);
    //}
    }


    public int getMaxSimultaneousDocs() {
    return maxSimultaneousDocs;
    }

    public void setMaxSimultaneousDocs(int maxSimultaneousDocs) {
    this.maxSimultaneousDocs = maxSimultaneousDocs;
    }


    public ReentrantLock getIndexLock() {
    return indexLock;
    }

    protected void setLog(String logName) {

    try {
    indexLogFile = getIndexLogFile(logName);
    if (indexLogFile!=null) {
    if (indexLogFile.length()>10485760)
    indexLogFile.delete();
    indexLogOut = new PrintStream(indexLogFile);
    }
    logger.debug("set index log file path
    {path='"+indexLogFile.getCanonicalPath()+"'}");
    } catch (Exception e) {
    logger.error("failed to open index log
    file:"+e.getMessage(),e);
    }

    }

    protected File getIndexLogFile(String logName) {
    try {
    String logfilepath =
    Config.getFileSystem().getLogPath()+File.separator+"indexdebug_"+logName+".
    log";
    return new File(logfilepath);
    } catch (Exception e) {
    logger.error("failed to open index log
    file:"+e.getMessage(),e);
    return null;
    }
    }



    protected void openIndex() throws MessageSearchException {
    Exception lastError = null;

    if (writer==null) {
    logger.debug("openIndex() index "+friendlyName+" will be
    opened. it
    is currently closed.");
    } else {
    logger.debug("openIndex() did not bother opening index
    "+friendlyName+". it is already open.");
    return;
    }
    logger.debug("opening index "+friendlyName+" for write");
    logger.debug("opening search index "+friendlyName+" for write
    {indexpath='"+indexPath+"'}");
    boolean writelock;
    int attempt = 0;
    int maxattempt = 10;

    if
    (Config.getConfig().getIndex().getMultipleIndexProcesses()) {
    maxattempt = 10000;
    } else {
    maxattempt = 10;
    }

    do {
    writelock = false;
    try {
    fsDirectory = FSDirectory.open(new
    File(indexPath));
    int maxIndexChars =
    Config.getConfig().getIndex().getMaxIndexPerFieldChars();
    writer = new
    IndexWriter(fsDirectory,analyzer,new
    IndexWriter.MaxFieldLength(maxIndexChars));
    if (indexLog.isDebugEnabled() &&
    indexLogOut!=null) {
    writer.setInfoStream(indexLogOut);
    }
    } catch (LockObtainFailedException lobfe) {
    logger.debug("write lock on index
    "+friendlyName+". will
    reopen in 50ms.");
    try { Thread.sleep(50); } catch (Exception e) {}
    attempt++;
    writelock = true;
    } catch (CorruptIndexException cie) {
    throw new MessageSearchException("index
    "+friendlyName+"
    appears to be corrupt. please reindex the active
    volume."+cie.getMessage(),logger);
    } catch (Throwable io) {
    throw new MessageSearchException("failed to write
    document to
    index "+friendlyName+":"+io.getMessage(),logger);
    }
    } while (writelock && attempt<maxattempt);
    if (attempt>=10000)
    throw new MessageSearchException("failed to open index
    "+friendlyName+" writer {indexPath='"+indexPath+"'}",lastError,logger);
    }

    public void indexDocument(LuceneDocument luceneDocument) throws
    MessageSearchException {
    logger.debug("index document {"+luceneDocument+"}");
    if (status==Status.SHUTDOWN) {
    throw new MessageSearchException("index is
    shutdown.",logger);
    }
    long s = (new Date()).getTime();
    if (luceneDocument == null)
    throw new MessageSearchException("assertion failure:
    null document",logger);
    try {
    queue.put(luceneDocument);
    } catch (InterruptedException ie) {
    throw new MessageSearchException("failed to add document to
    queue:"+ie.getMessage(),ie,logger);
    }
    logger.debug("document indexed successfully
    {"+luceneDocument+"}");
    logger.debug("indexing message end {"+luceneDocument+"}");
    long e = (new Date()).getTime();
    logger.debug("indexing time {time='"+(e-s)+"'}");
    }

    public class DocWriter implements Runnable {

    LuceneDocument doc;
    String language;
    LinkedList<LuceneDocument> pushbacks;
    ReentrantLock pushbackLock;

    public DocWriter(LuceneDocument doc,String
    language,LinkedList<LuceneDocument> pushbacks, ReentrantLock
    pushbackLock) {
    this.doc = doc;
    this.language = language;
    this.pushbacks = pushbacks;
    }

    public void run() {
    try {

    writer.addDocument(doc.getDocument(),AnalyzerFactory.getAnalyzer(language
    ,AnalyzerFactory.Operation.INDEX));
    } catch (IOException io) {
    logger.error("failed to add document to
    index:"+io.getMessage(),io);
    } catch (AlreadyClosedException e) {
    try {
    pushbackLock.lock();
    pushbacks.add(doc);
    } finally {
    pushbackLock.unlock();
    }
    }
    }

    }



    public class IndexProcessor extends Thread {

    public IndexProcessor() {
    setName("index processor");
    }

    public void run() {
    boolean exit = false;
    LuceneDocument luceneDocument = null;
    LinkedList<LuceneDocument> pushbacks = new
    LinkedList<LuceneDocument>();
    ReentrantLock pushbackLock = new ReentrantLock();


    while (!exit) {


    //documentPool =
    Executors.newFixedThreadPool(Config.getConfig().getArchiver().getArchiveThr
    eads());
    luceneDocument = null;
    try {
    luceneDocument = (LuceneDocument) queue.take();
    } catch (InterruptedException e) {
    logger.debug("index exit req received. exiting");
    exit = true;
    continue;
    }
    if (luceneDocument==EXIT_REQ) {
    logger.debug("index exit req received. exiting");
    exit = true;
    continue;
    }
    try {

    indexLock.lock();

    if (luceneDocument==null) {
    logger.debug("index info is null");
    }
    int i = 0;
    ExecutorService threadPool =
    Executors.newFixedThreadPool(indexThreads,ThreadUtil.getFlexibleThreadFact
    ory("indexwritepool",Thread.NORM_PRIORITY,true));

    while(luceneDocument!=null &&
    i<maxSimultaneousDocs) {
    Document doc =
    luceneDocument.getDocument();
    String language = doc.get("lang");
    if (language==null) {
    language =
    Config.getConfig().getIndex().getIndexLanguage();
    }
    DocWriter docWriter = new
    DocWriter(luceneDocument,language,pushbacks,pushbackLock);
    threadPool.submit(docWriter);

    i++;
    if (i<maxSimultaneousDocs) {
    luceneDocument = (LuceneDocument)
    queue.poll();

    if (luceneDocument==null) {
    logger.debug("index info is
    null");
    }

    if (luceneDocument==EXIT_REQ) {
    logger.debug("index exit
    req received. exiting (2)");
    exit = true;
    break;
    }
    }

    }
    threadPool.shutdown();
    threadPool.awaitTermination(30,TimeUnit.MINUTES);
    try {
    pushbackLock.lock();
    if (pushbacks.size()>0) {
    for (LuceneDocument pushback :
    pushbacks) {
    try {

    writer.addDocument(pushback.getDocument());
    } catch (IOException io) {
    logger.error("failed to add
    document to index:"+io.getMessage(),io);
    } catch (AlreadyClosedException
    e) {
    pushbacks.add(pushback);
    }
    i++;
    }
    }
    } finally {
    pushbackLock.unlock();
    }
    logger.debug("index commit");
    try {
    if (writer!=null) {
    writer.commit();
    }
    } catch (Exception e) {
    logger.error("failed to commit
    index:"+e.getMessage(),e);
    try {
    readerLock.lock();
    closeIndex();
    openIndex();
    } finally {
    readerLock.unlock();
    }
    }

    } catch (Throwable ie) {
    logger.error("index write
    interrupted:"+ie.getMessage(),ie);
    } finally {

    indexLock.unlock();
    }
    }
    logger.debug("exit indexer");
    }

    public class IndexDocument extends Thread {

    LuceneDocument luceneDocument = null;
    List<LuceneDocument> pushbacks = null;

    public IndexDocument(LuceneDocument
    luceneDocument,List<LuceneDocument> pushbacks) {
    this.luceneDocument = luceneDocument;
    this.pushbacks = pushbacks;
    setName("index document");
    }

    public void run() {
    try {

    writer.addDocument(luceneDocument.getDocument());
    } catch (IOException io) {
    logger.error("failed to add document to
    index:"+io.getMessage(),io);
    } catch (AlreadyClosedException e) {
    pushbacks.add(luceneDocument);
    } catch (Throwable t) {
    logger.error("failed to add document to
    index:"+t.getMessage(),t);
    }
    }};
    }

    protected void closeIndex() {
    try {
    indexLock.lock();


    if (writer!=null) {
    writer.close();
    }

    if (fsDirectory!=null) {
    fsDirectory.close();
    }
    } catch (Throwable io) {
    logger.error("failed to close index
    writer:"+io.getMessage(),io);
    } finally {
    writer = null;
    indexLock.unlock();
    }
    }



    public void optimize() throws MessageSearchException {
    logger.debug("optimize volume");
    try {
    indexLock.lock();
    try {
    writer.optimize(false);
    } catch (Exception io) {
    throw new MessageSearchException("failed to
    optimize the index:"+io.getMessage(),io,logger);
    }
    } catch (Throwable t) { // diskspace problems could arise
    logger.error("failed to optimize
    index:"+t.getMessage(),t);
    } finally {
    indexLock.unlock();
    }

    }
    public void deleteDocs(Term[] terms) throws
    MessageSearchException {
    logger.debug("delete docs");
    if (status==Status.SHUTDOWN) {
    throw new MessageSearchException("index is
    shutdown.",logger);
    }
    try {
    indexLock.lock();
    openIndex();
    try {
    writer.deleteDocuments(terms);
    } catch (Exception e) {
    throw new MessageSearchException("failed to
    delete doc from index:"+e.getMessage(),e,logger);
    } finally {
    try {
    writer.commit();
    writer.expungeDeletes(false);
    } catch (Exception io) {
    throw new MessageSearchException("failed to
    expunge docs from index:"+io.getMessage(),io,logger);
    }
    }
    } catch (Throwable t) {
    logger.error("failed to delete docs from
    index."+t.getMessage(),t);
    } finally {
    indexLock.unlock();
    }
    }


    public void deleteIndex() throws MessageSearchException {
    logger.debug("delete index
    {indexpath='"+indexPath+"'}");
    try {
    indexLock.lock();
    closeIndex();
    File indexFile = new File(indexPath);
    //deleteDirContents(indexFile);
    try {
    int maxIndexChars =
    Config.getConfig().getIndex().getMaxIndexPerFieldChars();
    writer = new
    IndexWriter(FSDirectory.open(indexFile),analyzer,true,new
    IndexWriter.MaxFieldLength(maxIndexChars));
    } catch (Throwable cie) {
    logger.error("failed to delete index
    {index='"+indexPath+"'}",cie);
    return;
    } finally {
    try { writer.close(); } catch (Exception e) {
    logger.debug("failed to close writer:"+e.getMessage()); }
    writer = null;
    }
    } finally {
    openIndex();
    indexLock.unlock();
    }
    }

    public void startup() throws MessageSearchException {
    logger.debug("luceneindex is starting up");


    File lockFile = new File(indexPath+File.separatorChar +
    "write.lock");
    if (lockFile.exists()) {
    if
    (Config.getConfig().getIndex().getMultipleIndexProcesses()) {
    logger.debug("index lock file detected on
    volumeindex startup.");
    } else {
    logger.warn("index lock file detected. the server
    was shutdown incorrectly. automatically deleting lock file.
    {lockFile='"+lockFile.getPath()+"'}");
    lockFile.delete();
    }
    }
    openIndex();
    scheduler =
    Executors.newScheduledThreadPool(1,ThreadUtil.getFlexibleThreadFactory("in
    dex
    reopen",Thread.NORM_PRIORITY-1,true));
    scheduledTask = scheduler.scheduleWithFixedDelay(new
    Runnable() { public void run() { reopen=true; }},1,1,TimeUnit.SECONDS);

    indexProcessor = new IndexProcessor();
    indexProcessor.start();


    Runtime.getRuntime().addShutdownHook(this);
    status = Status.READY;
    }

    public IndexReader getReader() throws MessageSearchException {
    if (status==Status.SHUTDOWN) {
    throw new MessageSearchException("index is
    shutdown.",logger);
    }
    readerLock.lock();
    try {
    if (writer==null) {
    throw new MessageSearchException("cannot retrieve
    reader. writer is closed (or null)",logger);
    }
    if (reader == null) {
    reader = new
    VolumeIndexReader(writer.getReader(5));
    } else {
    try {
    if (reopen) {
    reader = new
    VolumeIndexReader(writer.getReader(5));
    reopen = false;
    }
    } catch (AlreadyClosedException ace) {
    logger.debug("reader was found closed.
    reopening");
    reader = new
    VolumeIndexReader(writer.getReader(5));
    }
    }
    } catch (IOException io) {
    throw new MessageSearchException("failed to retrieve
    reader from writer:"+io.getMessage(),io,logger);
    } finally {
    readerLock.unlock();
    }
    return reader;
    }


    public void shutdown() {
    status = Status.SHUTDOWN;
    try { queue.put(EXIT_REQ); } catch (InterruptedException
    e) {}

    if (reader!=null) {
    try {
    reader.close();
    } catch (Exception e) {
    logger.error("failed to close index
    reader:"+e.getMessage());
    }
    }
    reader = null;
    if (scheduler!=null) {
    scheduler.shutdown();
    }
    closeIndex();

    indexProcessor.interrupt();

    if (scheduler!=null) {
    scheduler.shutdownNow();
    }

    }

    @Override
    public void run() {
    shutdown();
    }


    public interface LuceneDocument {

    public String toString();
    public Document getDocument();
    public void finalize();

    }

    public static void deleteDirContents(File path) {
    if( path.exists() ) {
    File[] files = path.listFiles();
    for(int i=0; i<files.length; i++) {
    if(files[i].isFile()) {
    files[i].delete();
    }
    }
    }
    }



    }





    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Jamie at Sep 29, 2010 at 6:51 pm
    Hi Uwe

    Thanks in advance for your help. Well, I just tried searching again and
    it made no difference. My LuceneIndex getReader() function
    will call writer.getReader() on occasion or return a cached copy. To
    make sure that IndexReader's are closed when they are no longer needed,
    I wrap the IndexReader as follows:

    public class VolumeIndexReader extends FilterIndexReader {

    public VolumeIndexReader(IndexReader in) {
    super(in);
    }

    public void finalize() {
    try { in.close(); } catch (Exception e) {}
    }

    public IndexReader reopen(boolean readonly) throws IOException {
    return super.reopen(readonly);
    }
    }

    You'll notice finalizer calls IndexReader.close(). After users conduct
    multiple searches, the index reader should be closed in time. Therefore,
    its confusing to me to see that open handles are still present. Clearly,
    I am doing something wrong, but what?

    Jamie


    On 2010/09/29 8:21 PM, Uwe Schindler wrote:
    The "deleted" files are only freed by OS kernel if no longer an IndexReader
    accesses them. Did you get a new realtime reader after merging and*closed*
    the old one?

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail:uwe@thetaphi.de
  • Uwe Schindler at Sep 30, 2010 at 8:56 am
    The finalize() thing does not work correctly, as the reader holds still
    references to other stuff when not explicitely closed. As it references
    them, the finalizer() is never called, as it is not to be gc'd.

    You must close the reader explicit, that's all. So just close it afterusing.
    With Near Realtime Search, you normally get an IR, then wrap it with
    IndexSearcher, do your search, and close it after that. You can even call
    writer.getReader() from different threads, refcounting will close the
    readers correctly. So for each request, take a new one and close after
    usage.

    Uwe

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail: uwe@thetaphi.de

    -----Original Message-----
    From: Jamie
    Sent: Wednesday, September 29, 2010 11:50 AM
    To: java-user@lucene.apache.org
    Subject: Re: File Handle Leaks During Lucene 3.0.2 Merge

    Hi Uwe

    Thanks in advance for your help. Well, I just tried searching again and it made
    no difference. My LuceneIndex getReader() function will call
    writer.getReader() on occasion or return a cached copy. To make sure that
    IndexReader's are closed when they are no longer needed, I wrap the
    IndexReader as follows:

    public class VolumeIndexReader extends FilterIndexReader {

    public VolumeIndexReader(IndexReader in) {
    super(in);
    }

    public void finalize() {
    try { in.close(); } catch (Exception e) {}
    }

    public IndexReader reopen(boolean readonly) throws IOException {
    return super.reopen(readonly);
    }
    }

    You'll notice finalizer calls IndexReader.close(). After users conduct multiple
    searches, the index reader should be closed in time. Therefore, its
    confusing to
    me to see that open handles are still present. Clearly, I am doing something
    wrong, but what?

    Jamie


    On 2010/09/29 8:21 PM, Uwe Schindler wrote:
    The "deleted" files are only freed by OS kernel if no longer an
    IndexReader accesses them. Did you get a new realtime reader after
    merging and*closed* the old one?

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail:uwe@thetaphi.de


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Jamie at Sep 30, 2010 at 9:27 am
    Uwe

    If I recall correctly when you call writer.getReader(), the returned
    IndexReader can consume alot of memory with large indexes. To ensure
    that the same index reader is reused across multiple search threads, I
    keep a cached copy of the reader and return it. If a search thread
    closes the reader, then it will be closed for the other search threads
    and the search will fail. From my test, the finalize method in
    VolumeIndexReader example I gave you is called. The file handle leaks
    are coming from the core index loop, where I call .commit() as opposed
    to closing the index. Since the writer stays open, handles left by merge
    operations are never deleted. A solution is too close the index
    periodically to force the handles to be swept up by the OS.

    Jamie
    On 2010/09/30 10:55 AM, Uwe Schindler wrote:
    The finalize() thing does not work correctly, as the reader holds still
    references to other stuff when not explicitely closed. As it references
    them, the finalizer() is never called, as it is not to be gc'd.

    You must close the reader explicit, that's all. So just close it afterusing.
    With Near Realtime Search, you normally get an IR, then wrap it with
    IndexSearcher, do your search, and close it after that. You can even call
    writer.getReader() from different threads, refcounting will close the
    readers correctly. So for each request, take a new one and close after
    usage.

    Uwe

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Michael McCandless at Sep 30, 2010 at 9:34 am
    Comments inline...
    On Thu, Sep 30, 2010 at 5:26 AM, Jamie wrote:
    Uwe

    If I recall correctly when you call writer.getReader(), the returned
    IndexReader can consume alot of memory with large indexes
    The reopened reader shares sub-readers with the previous one, so, if
    all that's changed since a last reopen was flushing a small segment,
    then the additional resources consumed will be small.
    To ensure that
    the same index reader is reused across multiple search threads, I keep a
    cached copy of the reader and return it. If a search thread closes the
    reader, then it will be closed for the other search threads and the search
    will fail.
    It's good to cache the reader, but, finalize would worry me too since
    you have no control over when GC gets around to calling it... you risk
    tying up resources for longer than necessary.
    From my test, the finalize method in VolumeIndexReader example I
    gave you is called. The file handle leaks are coming from the core index
    loop, where I call .commit() as opposed to closing the index. Since the
    writer stays open, handles left by merge operations are never deleted. A
    solution is too close the index periodically to force the handles to be
    swept up by the OS.
    IndexWriter has a reader pool, internally, where it holds open
    SegmentReaders for the still-live segments in the index. This is used
    by IndexReader.reopen to share open SegmentReaders.

    But the open files should correspond only to segments still "live" in
    the index. After segments are merged away, these readers are dropped.
    Is this what you are seeing?

    Mike

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Jamie at Sep 30, 2010 at 10:00 am
    Hi Michael / Uwe
    It's good to cache the reader, but, finalize would worry me too since
    you have no control over when GC gets around to calling it... you risk
    tying up resources for longer than necessary.
    I did it this way, as I didn't want to over complicate the code by
    introducing mechanisms to track the number of search threads using a
    shared indexreader. Admittedly, its not a very clean solution but in my
    case it does work. Is there a particular technique for knowing when to a
    close a reader when there are multiple search threads using that reader?
    Should I keep some kind of counter and override the close method of the
    reader such that the underlying reader is only closed when everyone's
    done with it?
    IndexWriter has a reader pool, internally, where it holds open
    SegmentReaders for the still-live segments in the index. This is used
    by IndexReader.reopen to share open SegmentReaders.

    But the open files should correspond only to segments still "live" in
    the index. After segments are merged away, these readers are dropped.
    Is this what you are seeing?
    I dont fully understand your explanation/question. When I run lsof, I am
    seeing the following:

    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyr.cfs
    (deleted)
    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyp.cfs
    (deleted)

    I assume these are left by the OS after the merge operation tried to
    delete old segments. The OS is unable to delete the files. I think its
    because our new code never closes the indexwriter, but rather uses the
    indexwriter.commit() method to apply the changes. Is this correct?

    Jamie


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Uwe Schindler at Sep 30, 2010 at 10:12 am
    Hi Jamie,
    It's good to cache the reader, but, finalize would worry me too since
    you
    have no control over when GC gets around to calling it... you risk >tying up
    resources for longer than necessary.

    I did it this way, as I didn't want to over complicate the code by
    introducing
    mechanisms to track the number of search threads using a shared
    indexreader.
    Admittedly, its not a very clean solution but in my case it does work. Is there a
    particular technique for knowing when to a close a reader when there are
    multiple search threads using that reader?
    Should I keep some kind of counter and override the close method of the
    reader such that the underlying reader is only closed when everyone's done
    with it?
    The easiest would be an AtomicInteger for each cached reader that gets
    incremented before you start a search and decremented on finishing search.
    You can safely close the reader, when the integer is 0.

    Uwe


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Michael McCandless at Sep 30, 2010 at 9:53 pm
    You can also use the IndexReader's incRef/decRef methods.

    Mike
    On Thu, Sep 30, 2010 at 6:12 AM, Uwe Schindler wrote:
    Hi Jamie,
    It's good to cache the reader, but, finalize would worry me too since
    you
    have no control over when GC gets around to calling it... you risk  >tying up
    resources for longer than necessary.

    I did it this way, as I didn't want to over complicate the code by
    introducing
    mechanisms to track the number of search threads using a shared
    indexreader.
    Admittedly, its not a very clean solution but in my case it does work. Is there a
    particular technique for knowing when to a close a reader when there are
    multiple search threads using that reader?
    Should I keep some kind of counter and override the close method of the
    reader such that the underlying reader is only closed when everyone's done
    with it?
    The easiest would be an AtomicInteger for each cached reader that gets
    incremented before you start a search and decremented on finishing search.
    You can safely close the reader, when the integer is 0.

    Uwe


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Michael McCandless at Sep 30, 2010 at 9:58 pm

    On Thu, Sep 30, 2010 at 5:59 AM, Jamie wrote:
    Hi Michael / Uwe
    It's good to cache the reader, but, finalize would worry me too since
    you have no control over when GC gets around to calling it... you risk
    tying up resources for longer than necessary.
    I did it this way, as I didn't want to over complicate the code by
    introducing mechanisms to track the number of search threads using a shared
    indexreader. Admittedly, its not a very clean solution but in my case it
    does work. Is there a particular technique for knowing when to a close a
    reader when there are multiple search threads using that reader? Should I
    keep some kind of counter and override the close method of the reader such
    that the underlying reader is only closed when everyone's done with it?
    See Uwe's response (or SearcherManager).
    IndexWriter has a reader pool, internally, where it holds open
    SegmentReaders for the still-live segments in the index.  This is used
    by IndexReader.reopen to share open SegmentReaders.

    But the open files should correspond only to segments still "live" in
    the index.  After segments are merged away, these readers are dropped.
    Is this what you are seeing?
    I dont fully understand your explanation/question. When I run lsof, I am
    seeing the following:

    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyr.cfs
    (deleted)
    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyp.cfs
    (deleted)

    I assume these are left by the OS after the merge operation tried to delete
    old segments. The OS is unable to delete the files. I think its because our
    new code never closes the indexwriter, but rather uses the
    indexwriter.commit() method to apply the changes. Is this correct?
    Ahh I see they are deleted but held open... hmmm.

    Though this is also what you'd see if there were still a reader open.
    Are you certain all readers were closed (finalized) when you ran lsof?

    Mike

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Jamie at Oct 1, 2010 at 5:42 am
    Hi Mike

    I managed to get hold of a copy of your book through Safari Books.
    Quite an impressive online reading system they have there! I integrated
    your SearchManager class into our code, but I am still seeing file
    handles marked deleted in the index directory. I am running the
    following command on Linux:

    sudo watch -n 0 "lsof | grep /var/index | grep deleted | wc -l"

    Every 0.1s: lsof | grep /var/index | grep deleted |... Fri Oct 1
    09:37:36 2010

    54

    The deleted file handles fluctuate up and down. 54 -> 102 -> 64 -> 32,
    etc. They seem stable though. Is this to be expected when using NRT search?

    I am pretty certain that all Searchers are released at the end of
    every search. I double checked it at least twenty times.

    Jamie


    On 2010/09/30 11:56 PM, Michael McCandless wrote:
    On Thu, Sep 30, 2010 at 5:59 AM, Jamiewrote:
    Hi Michael / Uwe
    It's good to cache the reader, but, finalize would worry me too since
    you have no control over when GC gets around to calling it... you risk
    tying up resources for longer than necessary.
    I did it this way, as I didn't want to over complicate the code by
    introducing mechanisms to track the number of search threads using a shared
    indexreader. Admittedly, its not a very clean solution but in my case it
    does work. Is there a particular technique for knowing when to a close a
    reader when there are multiple search threads using that reader? Should I
    keep some kind of counter and override the close method of the reader such
    that the underlying reader is only closed when everyone's done with it?
    See Uwe's response (or SearcherManager).
    IndexWriter has a reader pool, internally, where it holds open
    SegmentReaders for the still-live segments in the index. This is used
    by IndexReader.reopen to share open SegmentReaders.

    But the open files should correspond only to segments still "live" in
    the index. After segments are merged away, these readers are dropped.
    Is this what you are seeing?
    I dont fully understand your explanation/question. When I run lsof, I am
    seeing the following:

    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyr.cfs
    (deleted)
    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyp.cfs
    (deleted)

    I assume these are left by the OS after the merge operation tried to delete
    old segments. The OS is unable to delete the files. I think its because our
    new code never closes the indexwriter, but rather uses the
    indexwriter.commit() method to apply the changes. Is this correct?
    Ahh I see they are deleted but held open... hmmm.

    Though this is also what you'd see if there were still a reader open.
    Are you certain all readers were closed (finalized) when you ran lsof?

    Mike

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Uwe Schindler at Oct 1, 2010 at 6:00 am
    Hi Jamie,

    YES, ist expected for the reasons described above (segments are still
    referenced by the open IndexReaders, but files were already deleted by
    IndexWriter). The approx. number of open, but already deleted files should
    be approx. stable.

    Uwe

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail: uwe@thetaphi.de

    -----Original Message-----
    From: Jamie
    Sent: Friday, October 01, 2010 7:41 AM
    To: java-user@lucene.apache.org
    Subject: Re: File Handle Leaks During Lucene 3.0.2 Merge

    Hi Mike

    I managed to get hold of a copy of your book through Safari Books.
    Quite an impressive online reading system they have there! I integrated your
    SearchManager class into our code, but I am still seeing file handles marked
    deleted in the index directory. I am running the following command on Linux:
    sudo watch -n 0 "lsof | grep /var/index | grep deleted | wc -l"

    Every 0.1s: lsof | grep /var/index | grep deleted |... Fri Oct 1
    09:37:36 2010

    54

    The deleted file handles fluctuate up and down. 54 -> 102 -> 64 -> 32, etc. They
    seem stable though. Is this to be expected when using NRT search?

    I am pretty certain that all Searchers are released at the end of every search.
    I double checked it at least twenty times.

    Jamie


    On 2010/09/30 11:56 PM, Michael McCandless wrote:
    On Thu, Sep 30, 2010 at 5:59 AM, Jamiewrote:
    Hi Michael / Uwe
    It's good to cache the reader, but, finalize would worry me too
    since you have no control over when GC gets around to calling it...
    you risk tying up resources for longer than necessary.
    I did it this way, as I didn't want to over complicate the code by
    introducing mechanisms to track the number of search threads using a
    shared indexreader. Admittedly, its not a very clean solution but in
    my case it does work. Is there a particular technique for knowing
    when to a close a reader when there are multiple search threads using
    that reader? Should I keep some kind of counter and override the
    close method of the reader such that the underlying reader is only
    closed
    when everyone's done with it?
    See Uwe's response (or SearcherManager).
    IndexWriter has a reader pool, internally, where it holds open
    SegmentReaders for the still-live segments in the index. This is
    used by IndexReader.reopen to share open SegmentReaders.

    But the open files should correspond only to segments still "live"
    in the index. After segments are merged away, these readers are
    dropped.
    Is this what you are seeing?
    I dont fully understand your explanation/question. When I run lsof, I
    am seeing the following:

    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyr.cf
    s
    (deleted)
    /usr/local/mailarchiva/server/webapps/ROOT/WEB-INF/logs/index/_jyp.cf
    s
    (deleted)

    I assume these are left by the OS after the merge operation tried to
    delete old segments. The OS is unable to delete the files. I think
    its because our new code never closes the indexwriter, but rather
    uses the
    indexwriter.commit() method to apply the changes. Is this correct?
    Ahh I see they are deleted but held open... hmmm.

    Though this is also what you'd see if there were still a reader open.
    Are you certain all readers were closed (finalized) when you ran lsof?

    Mike

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Michael McCandless at Oct 1, 2010 at 9:48 am

    On Fri, Oct 1, 2010 at 1:41 AM, Jamie wrote:

    I managed to get hold of a copy of your book through Safari Books. Quite an
    impressive online reading system they have there!
    Neat! I had never heard of Safari Books until now, but it looks cool.
    I integrated your
    SearchManager class into our code, but I am still seeing file handles marked
    deleted in the index directory. I am running the following command on Linux:

    sudo watch -n 0 "lsof | grep /var/index | grep deleted | wc -l"

    Every 0.1s: lsof | grep /var/index | grep deleted |...  Fri Oct  1 09:37:36
    2010

    54

    The deleted file handles fluctuate up and down. 54 -> 102 -> 64 -> 32, etc.
    They seem stable though. Is this to be expected when using NRT search?

    I am pretty certain that all Searchers are released at the end of every
    search. I double checked it at least twenty times.
    SearcherManager always keeps one IndexReader open (the current "live"
    one), so it's expected that this reader is holding open references to
    deleted files if in fact IndexWriter has completed merges since the
    reader was opened. Though.. I would expect you to sometimes see 0
    deleted files in your lsof...

    Maybe try this: 1) pause your indexing, 2) reopen the reader (call
    SearcherManager.reopen), and then 3) run lsof again, at which point
    you should see no deleted files still held open.

    But note that to truly pause your indexing, you should switch
    (temporarily, for this test) your IndexWriter to the
    SerialMergeScheduler, and then ensure you stop calling IndexWriter ops
    while you run steps 2 and 3.

    Mike

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
  • Thomas Rewig at Nov 10, 2010 at 11:18 am
    Hello,
    please excuse that I hijack this old thread but I have the same problem
    with the deleted file handles, so I think this is the right place for.

    I also integrated the searchManager in our Code and see the file handles
    fluctuate up and down. At first glance the situation seems stable but
    the count of the deleted file handles never drops to zero and I think
    the deleted file handles are constantly rising. About +50 handles stay
    per day by refreshing the searchers every 2 minutes and committing after
    10 minutes. I think some deleted handles remain open after the
    committing, but I could not figure out why. The max value of deleted
    handles until now was about 250, because the Index will be changed after
    a few days. Jamie could you achieve that all deleted handles have been
    removed? Do you have some information how to repair this or is that an
    expected behavior and the amount of the deleted file handles will be
    stable - but first at a amount of 500 or so.

    Thanks in advance

    Thomas
    I integrated your
    SearchManager class into our code, but I am still seeing file handles marked
    deleted in the index directory. I am running the following command on Linux:

    sudo watch -n 0 "lsof | grep /var/index | grep deleted | wc -l"

    Every 0.1s: lsof | grep /var/index | grep deleted |... Fri Oct 1 09:37:36
    2010

    54

    The deleted file handles fluctuate up and down. 54 -> 102 -> 64 -> 32, etc.
    They seem stable though. Is this to be expected when using NRT search?

    I am pretty certain that all Searchers are released at the end of every
    search. I double checked it at least twenty times.
    SearcherManager always keeps one IndexReader open (the current "live"
    one), so it's expected that this reader is holding open references to
    deleted files if in fact IndexWriter has completed merges since the
    reader was opened. Though.. I would expect you to sometimes see 0
    deleted files in your lsof...

    Maybe try this: 1) pause your indexing, 2) reopen the reader (call
    SearcherManager.reopen), and then 3) run lsof again, at which point
    you should see no deleted files still held open.

    But note that to truly pause your indexing, you should switch
    (temporarily, for this test) your IndexWriter to the
    SerialMergeScheduler, and then ensure you stop calling IndexWriter ops
    while you run steps 2 and 3.

    Mike
  • Michael McCandless at Sep 30, 2010 at 9:28 am
    Opening an NRT reader per-search can be too costly if you have a high
    search rate.

    It's better to rate-limit for that case, eg to at most 10X per second
    (every 100 msec) reopens. There's a useful class in the Lucene in
    Action 2 source code (NOTE: I am a co-author), SearcherManager, which
    simplifies this for you. You can download the source code from
    http://manning.com/lucene, but we are also in the process of donating
    this source code to Lucene....

    Also note that you need not worry about when Lucene does merges
    under-the-hood. Ie, Lucene takes care of this, and there's nothing
    the app needs to "do" to handle merges & NRT readers, unless you want
    to install a segment warmer that pre-warms newly merged segments
    before making them visible to the next NRT reader (the SearcherManager
    also makes this easy -- subclass it and override the warm method).

    Mike
    On Thu, Sep 30, 2010 at 4:55 AM, Uwe Schindler wrote:
    The finalize() thing does not work correctly, as the reader holds still
    references to other stuff when not explicitely closed. As it references
    them, the finalizer() is never called, as it is not to be gc'd.

    You must close the reader explicit, that's all. So just close it afterusing.
    With Near Realtime Search, you normally get an IR, then wrap it with
    IndexSearcher, do your search, and close it after that. You can even call
    writer.getReader() from different threads, refcounting will close the
    readers correctly. So for each request, take a new one and close after
    usage.

    Uwe

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail: uwe@thetaphi.de

    -----Original Message-----
    From: Jamie
    Sent: Wednesday, September 29, 2010 11:50 AM
    To: java-user@lucene.apache.org
    Subject: Re: File Handle Leaks During Lucene 3.0.2 Merge

    Hi Uwe

    Thanks in advance for your help. Well, I just tried searching again and it made
    no difference. My LuceneIndex getReader() function will call
    writer.getReader() on occasion or return a cached copy. To make sure that
    IndexReader's are closed when they are no longer needed, I wrap the
    IndexReader as follows:

    public class VolumeIndexReader extends FilterIndexReader {

    public VolumeIndexReader(IndexReader in) {
    super(in);
    }

    public void finalize() {
    try { in.close(); } catch (Exception e) {}
    }

    public IndexReader reopen(boolean readonly) throws IOException {
    return super.reopen(readonly);
    }
    }

    You'll notice finalizer calls IndexReader.close(). After users conduct multiple
    searches, the index reader should be closed in time. Therefore, its
    confusing to
    me to see that open handles are still present. Clearly, I am doing something
    wrong, but what?

    Jamie


    On 2010/09/29 8:21 PM, Uwe Schindler wrote:
    The "deleted" files are only freed by OS kernel if no longer an
    IndexReader accesses them. Did you get a new realtime reader after
    merging and*closed* the old one?

    -----
    Uwe Schindler
    H.-H.-Meier-Allee 63, D-28213 Bremen
    http://www.thetaphi.de
    eMail:uwe@thetaphi.de


    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org
    ---------------------------------------------------------------------
    To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
    For additional commands, e-mail: java-user-help@lucene.apache.org

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupjava-user @
categorieslucene
postedSep 29, '10 at 5:49p
activeNov 10, '10 at 11:18a
posts15
users4
websitelucene.apache.org

People

Translate

site design / logo © 2022 Grokbase