FAQ

svn commit: r884884 - in /lucene/lucy/trunk: core/Lucy/Store/ core/Lucy/Test/Store/ perl/lib/Lucy/ perl/t/core/

Marvin
Nov 27, 2009 at 2:40 pm
Author: marvin
Date: Fri Nov 27 14:40:16 2009
New Revision: 884884

URL: http://svn.apache.org/viewvc?rev=884884&view=rev
Log:
Commit LUCY-75, adding CompoundFileWriter.

Added:
lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp (with props)
lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c (with props)
lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp (with props)
lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c (with props)
lucene/lucy/trunk/perl/t/core/112-cf_writer.t (with props)
Modified:
lucene/lucy/trunk/perl/lib/Lucy/Test.pm

Added: lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp?rev=884884&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp Fri Nov 27 14:40:16 2009
@@ -0,0 +1,56 @@
+parcel Lucy;
+
+/**
+ * Consolidate segment files.
+ *
+ * CompoundFileWriter combines all the data files in a directory into a single
+ * "compound" file named "cf.dat". Metadata describing filename and
+ * filepointer information is stored in a "cfmeta.json" file.
+ *
+ * Nested subdirectories and files ending in ".json" are excluded from
+ * consolidation.
+ *
+ * Any given directory may only be consolidated once.
+ */
+
+class Lucy::Store::CompoundFileWriter cnick CFWriter
+ extends Lucy::Object::Obj {
+
+ Folder *folder;
+
+ inert i32_t current_file_format;
+
+ inert incremented CompoundFileWriter*
+ new(Folder *folder);
+
+ inert CompoundFileWriter*
+ init(CompoundFileWriter *self, Folder *folder);
+
+ /** Perform the consolidation operation, building the cf.dat and
+ * cfmeta.json files.
+ *
+ * The commit point is a rename op, where a temp file gets renamed to the
+ * cfmeta file. After the commit completes, the source files are deleted.
+ */
+ void
+ Consolidate(CompoundFileWriter *self);
+
+ public void
+ Destroy(CompoundFileWriter *self);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.bp
------------------------------------------------------------------------------
svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c?rev=884884&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c (added)
+++ lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c Fri Nov 27 14:40:16 2009
@@ -0,0 +1,170 @@
+#define C_LUCY_COMPOUNDFILEWRITER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Store/CompoundFileWriter.h"
+#include "Lucy/Store/Folder.h"
+#include "Lucy/Store/InStream.h"
+#include "Lucy/Store/OutStream.h"
+#include "Lucy/Util/Json.h"
+
+i32_t CFWriter_current_file_format = 2;
+
+static ZombieCharBuf cfmeta_file = ZCB_LITERAL("cfmeta.json");
+static ZombieCharBuf cfmeta_temp = ZCB_LITERAL("cfmeta.json.temp");
+static ZombieCharBuf cf_file = ZCB_LITERAL("cf.dat");
+
+/* Helper which does the heavy lifting for CFWriter_consolidate. */
+static void
+S_do_consolidate(CompoundFileWriter *self);
+
+/* Clean up files which may be left over from previous merge attempts. */
+static void
+S_clean_up_old_temp_files(CompoundFileWriter *self);
+
+CompoundFileWriter*
+CFWriter_new(Folder *folder)
+{
+ CompoundFileWriter *self
+ = (CompoundFileWriter*)VTable_Make_Obj(COMPOUNDFILEWRITER);
+ return CFWriter_init(self, folder);
+}
+
+CompoundFileWriter*
+CFWriter_init(CompoundFileWriter *self, Folder *folder)
+{
+ self->folder = (Folder*)INCREF(folder);
+ return self;
+}
+
+void
+CFWriter_destroy(CompoundFileWriter *self)
+{
+ DECREF(self->folder);
+ SUPER_DESTROY(self, COMPOUNDFILEWRITER);
+}
+
+void
+CFWriter_consolidate(CompoundFileWriter *self)
+{
+ if (Folder_Exists(self->folder, (CharBuf*)&cfmeta_file)) {
+ THROW(ERR, "Merge already performed for %o",
+ Folder_Get_Path(self->folder));
+ }
+ else {
+ S_clean_up_old_temp_files(self);
+ S_do_consolidate(self);
+ }
+}
+
+static void
+S_clean_up_old_temp_files(CompoundFileWriter *self)
+{
+ Folder *folder = self->folder;
+
+ if (Folder_Exists(folder, (CharBuf*)&cf_file)) {
+ if (!Folder_Delete(folder, (CharBuf*)&cf_file)) {
+ THROW(ERR, "Can't delete '%o'", &cf_file);
+ }
+ }
+ if (Folder_Exists(folder, (CharBuf*)&cfmeta_temp)) {
+ if (!Folder_Delete(folder, (CharBuf*)&cfmeta_temp)) {
+ THROW(ERR, "Can't delete '%o'", &cfmeta_temp);
+ }
+ }
+}
+
+static void
+S_do_consolidate(CompoundFileWriter *self)
+{
+ Folder *folder = self->folder;
+ Hash *metadata = Hash_new(0);
+ Hash *sub_files = Hash_new(0);
+ VArray *files = Folder_List(folder, NULL);
+ OutStream *outstream = Folder_Open_Out(folder, (CharBuf*)&cf_file);
+ u32_t i, max;
+ bool_t rename_success;
+
+ if (!outstream) { RETHROW(INCREF(Err_get_error())); }
+
+ /* Start metadata. */
+ Hash_Store_Str(metadata, "files", 5, INCREF(sub_files));
+ Hash_Store_Str(metadata, "format", 6,
+ (Obj*)CB_newf("%i32", CFWriter_current_file_format) );
+
+ VA_Sort(files, NULL, NULL);
+ for (i = 0, max = VA_Get_Size(files); i < max; i++) {
+ CharBuf *infilename = (CharBuf*)VA_Fetch(files, i);
+
+ if (!CB_Ends_With_Str(infilename, ".json", 5)) {
+ InStream *instream = Folder_Open_In(folder, infilename);
+ Hash *file_data = Hash_new(2);
+ i64_t offset, len;
+
+ if (!instream) { RETHROW(INCREF(Err_get_error())); }
+
+ /* Absorb the file. */
+ offset = OutStream_Tell(outstream);
+ OutStream_Absorb(outstream, instream);
+ len = OutStream_Tell(outstream) - offset;
+
+ /* Record offset and length. */
+ Hash_Store_Str(file_data, "offset", 6,
+ (Obj*)CB_newf("%i64", offset) );
+ Hash_Store_Str(file_data, "length", 6,
+ (Obj*)CB_newf("%i64", len) );
+ Hash_Store(sub_files, (Obj*)infilename, (Obj*)file_data);
+
+ /* Add filler NULL bytes so that every sub-file begins on a file
+ * position multiple of 8. */
+ {
+ i64_t filler_bytes = (8 - (len % 8)) % 8;
+ while (filler_bytes--) { OutStream_Write_U8(outstream, 0); }
+ }
+
+ InStream_Close(instream);
+ DECREF(instream);
+ }
+ }
+
+ /* Write metadata to cfmeta file. */
+ Json_spew_json((Obj*)metadata, (Folder*)self->folder,
+ (CharBuf*)&cfmeta_temp);
+ rename_success = Folder_Rename(self->folder, (CharBuf*)&cfmeta_temp,
+ (CharBuf*)&cfmeta_file);
+ if (!rename_success) { RETHROW(INCREF(Err_get_error())); }
+
+ /* Clean up. */
+ OutStream_Close(outstream);
+ DECREF(outstream);
+ DECREF(files);
+ DECREF(metadata);
+ {
+ CharBuf *merged_file;
+ Obj *ignore;
+ Hash_Iter_Init(sub_files);
+ while (Hash_Iter_Next(sub_files, (Obj**)&merged_file, &ignore)) {
+ if (!Folder_Delete(folder, merged_file)) {
+ CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file);
+ DECREF(sub_files);
+ Err_throw_mess(ERR, mess);
+ }
+ }
+ }
+ DECREF(sub_files);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Store/CompoundFileWriter.c
------------------------------------------------------------------------------
svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp?rev=884884&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp Fri Nov 27 14:40:16 2009
@@ -0,0 +1,23 @@
+parcel Lucy;
+
+inert class Lucy::Test::Store::TestCompoundFileWriter
+ cnick TestCFWriter {
+ inert void
+ run_tests();
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.bp
------------------------------------------------------------------------------
svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c?rev=884884&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c Fri Nov 27 14:40:16 2009
@@ -0,0 +1,135 @@
+#define C_LUCY_CHARBUF
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Store/TestCompoundFileWriter.h"
+#include "Lucy/Store/CompoundFileWriter.h"
+#include "Lucy/Store/FileHandle.h"
+#include "Lucy/Store/OutStream.h"
+#include "Lucy/Store/RAMFolder.h"
+#include "Lucy/Util/Json.h"
+
+static CharBuf cfmeta_file = ZCB_LITERAL("cfmeta.json");
+static CharBuf cfmeta_temp = ZCB_LITERAL("cfmeta.json.temp");
+static CharBuf cf_file = ZCB_LITERAL("cf.dat");
+static CharBuf foo = ZCB_LITERAL("foo");
+static CharBuf bar = ZCB_LITERAL("bar");
+static CharBuf seg_1 = ZCB_LITERAL("seg_1");
+
+static Folder*
+S_folder_with_contents()
+{
+ RAMFolder *folder = RAMFolder_new(&seg_1);
+ OutStream *foo_out = RAMFolder_Open_Out(folder, &foo);
+ OutStream *bar_out = RAMFolder_Open_Out(folder, &bar);
+ OutStream_Write_Bytes(foo_out, "foo", 3);
+ OutStream_Write_Bytes(bar_out, "bar", 3);
+ OutStream_Close(foo_out);
+ OutStream_Close(bar_out);
+ DECREF(foo_out);
+ DECREF(bar_out);
+ return (Folder*)folder;
+}
+
+static void
+test_Consolidate(TestBatch *batch)
+{
+ Folder *folder = S_folder_with_contents();
+ FileHandle *fh;
+
+ /* Fake up detritus from failed consolidation. */
+ fh = Folder_Open_FileHandle(folder, &cf_file,
+ FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE );
+ DECREF(fh);
+ fh = Folder_Open_FileHandle(folder, &cfmeta_temp,
+ FH_CREATE | FH_WRITE_ONLY | FH_EXCLUSIVE );
+ DECREF(fh);
+
+ {
+ CompoundFileWriter *cf_writer = CFWriter_new(folder);
+ CFWriter_Consolidate(cf_writer);
+ PASS(batch, "Consolidate completes despite leftover files");
+ DECREF(cf_writer);
+ }
+
+ ASSERT_TRUE(batch, Folder_Exists(folder, &cf_file),
+ "cf.dat file written");
+ ASSERT_TRUE(batch, Folder_Exists(folder, &cfmeta_file),
+ "cfmeta.json file written");
+ ASSERT_FALSE(batch, Folder_Exists(folder, &foo),
+ "original file zapped");
+ ASSERT_FALSE(batch, Folder_Exists(folder, &cfmeta_temp),
+ "detritus from failed consolidation zapped");
+
+ DECREF(folder);
+}
+
+static void
+test_offsets(TestBatch *batch)
+{
+ Folder *folder = S_folder_with_contents();
+ CompoundFileWriter *cf_writer = CFWriter_new(folder);
+ Hash *cf_metadata;
+ Hash *files;
+
+ CFWriter_Consolidate(cf_writer);
+
+ cf_metadata = (Hash*)CERTIFY(
+ Json_slurp_json(folder, &cfmeta_file), HASH);
+ files = (Hash*)CERTIFY(
+ Hash_Fetch_Str(cf_metadata, "files", 5), HASH);
+ {
+ CharBuf *file;
+ Obj *filestats;
+ bool_t offsets_ok = true;
+
+ ASSERT_TRUE(batch, Hash_Get_Size(files) > 0, "Multiple files");
+
+ Hash_Iter_Init(files);
+ while (Hash_Iter_Next(files, (Obj**)&file, &filestats)) {
+ Hash *stats = (Hash*)CERTIFY(filestats, HASH);
+ Obj *offset = CERTIFY(Hash_Fetch_Str(stats, "offset", 6), OBJ);
+ if (Obj_To_I64(offset) % 8 != 0) {
+ offsets_ok = false;
+ FAIL(batch, "Offset %o for %o not a multiple of 8: %o",
+ offset, file);
+ break;
+ }
+ }
+ if (offsets_ok) {
+ PASS(batch, "All offsets are multiples of 8");
+ }
+ }
+
+ DECREF(cf_metadata);
+ DECREF(cf_writer);
+ DECREF(folder);
+}
+
+void
+TestCFWriter_run_tests()
+{
+ TestBatch *batch = Test_new_batch("TestCompoundFileWriter", 7, NULL);
+
+ PLAN(batch);
+ test_Consolidate(batch);
+ test_offsets(batch);
+
+ batch->destroy(batch);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Store/TestCompoundFileWriter.c
------------------------------------------------------------------------------
svn:eol-style = native

Modified: lucene/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Test.pm?rev=884884&r1=884883&r2=884884&view=diff
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ lucene/lucy/trunk/perl/lib/Lucy/Test.pm Fri Nov 27 14:40:16 2009
@@ -37,6 +37,9 @@
lucy_TestVArray_run_tests();
}
/* Lucy::Store */
+ else if (strEQ(package, "TestCompoundFileWriter")) {
+ lucy_TestCFWriter_run_tests();
+ }
else if (strEQ(package, "TestFileHandle")) {
lucy_TestFH_run_tests();
}

Added: lucene/lucy/trunk/perl/t/core/112-cf_writer.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/core/112-cf_writer.t?rev=884884&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/core/112-cf_writer.t (added)
+++ lucene/lucy/trunk/perl/t/core/112-cf_writer.t Fri Nov 27 14:40:16 2009
@@ -0,0 +1,6 @@
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestCompoundFileWriter");
+

Propchange: lucene/lucy/trunk/perl/t/core/112-cf_writer.t
------------------------------------------------------------------------------
svn:eol-style = native
reply

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post

1 user in discussion

Marvin: 1 post