FAQ
Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out Tue Jul 26 00:12:38 2011
@@ -0,0 +1,478 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ b
+ TableScan
+ alias: b
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-09_745_6885930417140409942/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-09_850_4835801559936339690/-mr-10000
+POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-09_850_4835801559936339690/-mr-10000
+82 82
+83 83
+83 83
+83 83
+83 83
+84 84
+84 84
+84 84
+84 84
+85 85
+86 86
+87 87
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-1 depends on stages: Stage-4, Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ tmp_index:ind0:default__src_src_index__
+ TableScan
+ alias: default__src_src_index__
+ filterExpr:
+ expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offset
+ type: bigint
+ expr: _bitmaps
+ type: array<bigint>
+ outputColumnNames: _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: collect_set(_col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: array<bigint>
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: collect_set(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-30_115_3399336884670906022/-mr-10003
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ filterExpr:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ b
+ TableScan
+ alias: b
+ filterExpr:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-30_115_3399336884670906022/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ tmp_index:ind0:default__src_src_index__
+ TableScan
+ alias: default__src_src_index__
+ filterExpr:
+ expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offset
+ type: bigint
+ expr: _bitmaps
+ type: array<bigint>
+ outputColumnNames: _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: collect_set(_col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: array<bigint>
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: collect_set(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-30_115_3399336884670906022/-mr-10004
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-30_457_5636524164788077312/-mr-10000
+POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-52-30_457_5636524164788077312/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+82 82
+83 83
+83 83
+83 83
+83 83
+84 84
+84 84
+84 84
+84 84
+85 85
+86 86
+87 87
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]

Modified: hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out Tue Jul 26 00:12:38 2011
@@ -131,7 +131,7 @@ STAGE PLANS:
Move Operator
files:
hdfs directory: true
- destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-39_938_2166512808072912128/-mr-10002
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-23_12-44-48_432_5538477370809694831/-mr-10002

Stage: Stage-1
Map Reduce
@@ -184,14 +184,14 @@ PREHOOK: Input: default@default__srcpart
PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-40_311_5688266146614431290/-mr-10000
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-23_12-44-48_843_98274460719858219/-mr-10000
POSTHOOK: query: SELECT key, value FROM srcpart WHERE key=86 AND ds='2008-04-09' ORDER BY key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11
POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-40_311_5688266146614431290/-mr-10000
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-23_12-44-48_843_98274460719858219/-mr-10000
POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]

Search Discussions

  • Jvs at Jul 26, 2011 at 12:13 am
    Author: jvs
    Date: Tue Jul 26 00:12:38 2011
    New Revision: 1150962

    URL: http://svn.apache.org/viewvc?rev=1150962&view=rev
    Log:
    HIVE-2128. Automatic Indexing with multiple tables.
    (Syed Albiz via jvs)


    Added:
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
    hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out
    Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java Tue Jul 26 00:12:38 2011
    @@ -82,7 +82,7 @@ public class HiveIndexResult {
    BytesRefWritable[] bytesRef = new BytesRefWritable[2];
    boolean ignoreHdfsLoc = false;

    - public HiveIndexResult(String indexFile, JobConf conf) throws IOException,
    + public HiveIndexResult(List<String> indexFiles, JobConf conf) throws IOException,
    HiveException {
    job = conf;

    @@ -90,18 +90,20 @@ public class HiveIndexResult {
    bytesRef[1] = new BytesRefWritable();
    ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC);

    - if (indexFile != null) {
    - Path indexFilePath = new Path(indexFile);
    + if (indexFiles != null && indexFiles.size() > 0) {
    FileSystem fs = FileSystem.get(conf);
    - FileStatus indexStat = fs.getFileStatus(indexFilePath);
    List<Path> paths = new ArrayList<Path>();
    - if (indexStat.isDir()) {
    - FileStatus[] fss = fs.listStatus(indexFilePath);
    - for (FileStatus f : fss) {
    - paths.add(f.getPath());
    + for (String indexFile : indexFiles) {
    + Path indexFilePath = new Path(indexFile);
    + FileStatus indexStat = fs.getFileStatus(indexFilePath);
    + if (indexStat.isDir()) {
    + FileStatus[] fss = fs.listStatus(indexFilePath);
    + for (FileStatus f : fss) {
    + paths.add(f.getPath());
    + }
    + } else {
    + paths.add(indexFilePath);
    }
    - } else {
    - paths.add(indexFilePath);
    }

    long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES);

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Tue Jul 26 00:12:38 2011
    @@ -20,12 +20,18 @@ package org.apache.hadoop.hive.ql.index;

    import java.io.IOException;
    import java.util.ArrayList;
    +import java.util.List;
    import java.util.Iterator;
    import java.util.Set;
    +import java.util.Map;
    +import java.util.Arrays;
    +import java.util.HashMap;

    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.fs.FileStatus;
    +import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.hive.conf.HiveConf;
    import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
    import org.apache.hadoop.hive.ql.exec.Utilities;
    @@ -91,15 +97,27 @@ public class HiveIndexedInputFormat exte
    return result.toArray(new HiveInputSplit[result.size()]);
    }

    + public static List<String> getIndexFiles(String indexFileStr) {
    + // tokenize and store string of form (path,)+
    + if (indexFileStr == null) {
    + return null;
    + }
    + String[] chunks = indexFileStr.split(",");
    + return Arrays.asList(chunks);
    + }
    +
    @Override
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    String indexFileStr = job.get(indexFile);
    l4j.info("index_file is " + indexFileStr);
    + List<String> indexFiles = getIndexFiles(indexFileStr);

    HiveIndexResult hiveIndexResult = null;
    - if (indexFileStr != null) {
    + if (indexFiles != null) {
    + boolean first = true;
    + StringBuilder newInputPaths = new StringBuilder();
    try {
    - hiveIndexResult = new HiveIndexResult(indexFileStr, job);
    + hiveIndexResult = new HiveIndexResult(indexFiles, job);
    } catch (HiveException e) {
    l4j.error("Unable to read index..");
    throw new IOException(e);
    @@ -107,8 +125,6 @@ public class HiveIndexedInputFormat exte

    Set<String> inputFiles = hiveIndexResult.buckets.keySet();
    Iterator<String> iter = inputFiles.iterator();
    - boolean first = true;
    - StringBuilder newInputPaths = new StringBuilder();
    while(iter.hasNext()) {
    String path = iter.next();
    if (path.trim().equalsIgnoreCase("")) {
    @@ -121,7 +137,6 @@ public class HiveIndexedInputFormat exte
    }
    newInputPaths.append(path);
    }
    -
    FileInputFormat.setInputPaths(job, newInputPaths.toString());
    } else {
    return super.getSplits(job, numSplits);

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java Tue Jul 26 00:12:38 2011
    @@ -81,14 +81,6 @@ public class BitmapIndexHandler extends
    return; // abort if we couldn't pull out anything from the predicate
    }

    - // Build reentrant QL for index query
    - StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
    -
    - String tmpFile = pctx.getContext().getMRTmpFileURI();
    - qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
    - qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
    - qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
    -
    List<BitmapInnerQuery> iqs = new ArrayList<BitmapInnerQuery>(indexes.size());
    int i = 0;
    for (Index index : indexes) {
    @@ -100,6 +92,17 @@ public class BitmapIndexHandler extends
    "ind" + i++));
    }
    }
    + // setup TableScanOperator to change input format for original query
    + queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
    +
    + // Build reentrant QL for index query
    + StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
    +
    + String tmpFile = pctx.getContext().getMRTmpFileURI();
    + qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
    + qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
    + qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
    +

    BitmapQuery head = iqs.get(0);
    for ( i = 1; i < iqs.size(); i++) {
    @@ -113,10 +116,7 @@ public class BitmapIndexHandler extends
    Driver driver = new Driver(pctx.getConf());
    driver.compile(qlCommand.toString(), false);

    - // setup TableScanOperator to change input format for original query
    - queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
    queryContext.setIndexIntermediateFile(tmpFile);
    -
    queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
    queryContext.setQueryTasks(driver.getPlan().getRootTasks());
    }

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Jul 26 00:12:38 2011
    @@ -161,11 +161,14 @@ public class CompactIndexHandler extends

    // pass residual predicate back out for further processing
    queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
    + // setup TableScanOperator to change input format for original query
    + queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());

    // Build reentrant QL for index query
    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");

    String tmpFile = pctx.getContext().getMRTmpFileURI();
    + queryContext.setIndexIntermediateFile(tmpFile);
    qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
    qlCommand.append("SELECT `_bucketname` , `_offsets` FROM ");
    qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
    @@ -179,9 +182,6 @@ public class CompactIndexHandler extends
    Driver driver = new Driver(pctx.getConf());
    driver.compile(qlCommand.toString(), false);

    - // setup TableScanOperator to change input format for original query
    - queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
    - queryContext.setIndexIntermediateFile(tmpFile);

    queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
    queryContext.setQueryTasks(driver.getPlan().getRootTasks());

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java Tue Jul 26 00:12:38 2011
    @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.parse.P
    import org.apache.hadoop.hive.ql.parse.SemanticException;
    import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
    import org.apache.hadoop.hive.ql.plan.FilterDesc;
    +import org.apache.hadoop.hive.ql.plan.TableDesc;
    import org.apache.hadoop.hive.ql.plan.TableScanDesc;
    import org.apache.hadoop.hive.ql.plan.MapredWork;

    @@ -83,11 +84,15 @@ public class IndexWhereProcessor impleme
    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    + if (operatorDesc == null) {
    + return null;
    + }
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();

    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    +
    if (predicate == null) {
    LOG.info("null predicate pushed down");
    return null;
    @@ -114,47 +119,42 @@ public class IndexWhereProcessor impleme

    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    - Collection<List<Index>> tableIndexes = indexes.values();
    - for (List<Index> indexesOnTable : tableIndexes) {
    - List<List<Index>> indexesByType = new ArrayList<List<Index>>();
    - for (Index index : indexesOnTable) {
    - boolean added = false;
    - for (List<Index> indexType : indexesByType) {
    - if (indexType.isEmpty()) {
    - indexType.add(index);
    - added = true;
    - } else if (indexType.get(0).getIndexHandlerClass().equals(
    - index.getIndexHandlerClass())) {
    - indexType.add(index);
    - added = true;
    - break;
    - }
    - }
    - if (!added) {
    - List<Index> newType = new ArrayList<Index>();
    - newType.add(index);
    - indexesByType.add(newType);
    - }
    + // make sure we have an index on the table being scanned
    + TableDesc tblDesc = operator.getTableDesc();
    + Table srcTable = pctx.getTopToTable().get(operator);
    + if (indexes == null || indexes.get(srcTable) == null) {
    + return null;
    + }
    +
    + List<Index> tableIndexes = indexes.get(srcTable);
    + Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
    + for (Index indexOnTable : tableIndexes) {
    + if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
    + List<Index> newType = new ArrayList<Index>();
    + newType.add(indexOnTable);
    + indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
    + } else {
    + indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
    }
    + }

    - // choose index type with most indexes of the same type on the table
    - // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    - List<Index> bestIndexes = indexesByType.get(0);
    - for (List<Index> indexTypes : indexesByType) {
    - if (bestIndexes.size() < indexTypes.size()) {
    - bestIndexes = indexTypes;
    - }
    + // choose index type with most indexes of the same type on the table
    + // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    + List<Index> bestIndexes = indexesByType.values().iterator().next();
    + for (List<Index> indexTypes : indexesByType.values()) {
    + if (bestIndexes.size() < indexTypes.size()) {
    + bestIndexes = indexTypes;
    }
    + }

    - // rewrite index queries for the chosen index type
    - HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
    - queryContext.setQueryPartitions(queryPartitions);
    - rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
    - List<Task<?>> indexTasks = queryContext.getQueryTasks();
    + // rewrite index queries for the chosen index type
    + HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
    + tmpQueryContext.setQueryPartitions(queryPartitions);
    + rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
    + List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();

    - if (indexTasks != null && indexTasks.size() > 0) {
    - queryContexts.put(bestIndexes.get(0), queryContext);
    - }
    + if (indexTasks != null && indexTasks.size() > 0) {
    + queryContexts.put(bestIndexes.get(0), tmpQueryContext);
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
    @@ -168,8 +168,7 @@ public class IndexWhereProcessor impleme
    // prepare the map reduce job to use indexing
    MapredWork work = currentTask.getWork();
    work.setInputformat(queryContext.getIndexInputFormat());
    - work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());
    -
    + work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
    // modify inputs based on index query
    Set<ReadEntity> inputs = pctx.getSemanticInputs();
    inputs.addAll(queryContext.getAdditionalSemanticInputs());
    @@ -226,8 +225,6 @@ public class IndexWhereProcessor impleme
    return;
    }

    -
    -
    /**
    * Check the partitions used by the table scan to make sure they also exist in the
    * index table
    @@ -239,6 +236,7 @@ public class IndexWhereProcessor impleme
    throws HiveException {
    Hive hive = Hive.get(pctx.getConf());

    +
    // make sure each partition exists on the index table
    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
    Set<Partition> queryPartitions = queryPartitionList.getConfirmedPartns();
    @@ -259,6 +257,9 @@ public class IndexWhereProcessor impleme
    private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException {
    List<Table> indexTables = new ArrayList<Table>();
    Table partitionedTable = part.getTable();
    + if (indexes == null || indexes.get(partitionedTable) == null) {
    + return indexTables;
    + }
    for (Index index : indexes.get(partitionedTable)) {
    indexTables.add(hive.getTable(index.getIndexTableName()));
    }
    @@ -276,6 +277,10 @@ public class IndexWhereProcessor impleme
    return true; // empty specs come from non-partitioned tables
    }

    + if (indexTables == null || indexTables.size() == 0) {
    + return false;
    + }
    +
    for (Table indexTable : indexTables) {
    // get partitions that match the spec
    List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java Tue Jul 26 00:12:38 2011
    @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.metadat
    import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
    import org.apache.hadoop.hive.ql.parse.ParseContext;
    import org.apache.hadoop.hive.ql.parse.SemanticException;
    +import org.apache.hadoop.hive.ql.plan.MapredWork;

    /**
    *
    @@ -87,10 +88,14 @@ public class IndexWhereTaskDispatcher im
    operatorRules,
    indexWhereOptimizeCtx);

    - // walk the mapper operator(not task) tree
    + // walk the mapper operator(not task) tree for each specific task
    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    - topNodes.addAll(pctx.getTopOps().values());
    + if (task.getWork() instanceof MapredWork) {
    + topNodes.addAll(((MapredWork)task.getWork()).getAliasToWork().values());
    + } else {
    + return null;
    + }
    ogw.startWalking(topNodes, null);

    return null;

    Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1150962&r1=1150961&r2=1150962&view=diff
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Tue Jul 26 00:12:38 2011
    @@ -387,8 +387,12 @@ public class MapredWork implements Seria
    return indexIntermediateFile;
    }

    - public void setIndexIntermediateFile(String fileName) {
    - this.indexIntermediateFile = fileName;
    + public void addIndexIntermediateFile(String fileName) {
    + if (this.indexIntermediateFile == null) {
    + this.indexIntermediateFile = fileName;
    + } else {
    + this.indexIntermediateFile += "," + fileName;
    + }
    }

    public void setGatheringStats(boolean gatherStats) {

    Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q?rev=1150962&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q (added)
    +++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q Tue Jul 26 00:12:38 2011
    @@ -0,0 +1,23 @@
    +-- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +
    +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
    +ALTER INDEX src_index ON src REBUILD;
    +
    +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
    +ALTER INDEX srcpart_index ON srcpart REBUILD;
    +
    +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
    +SET hive.optimize.index.filter=true;
    +SET hive.optimize.index.filter.compact.minsize=0;
    +
    +-- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +DROP INDEX src_index on src;
    +DROP INDEX srcpart_index on src;

    Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q?rev=1150962&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q (added)
    +++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q Tue Jul 26 00:12:38 2011
    @@ -0,0 +1,23 @@
    +-- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +
    +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
    +ALTER INDEX src_index ON src REBUILD;
    +
    +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
    +ALTER INDEX srcpart_index ON srcpart REBUILD;
    +
    +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
    +SET hive.optimize.index.filter=true;
    +SET hive.optimize.index.filter.compact.minsize=0;
    +
    +-- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +DROP INDEX src_index on src;
    +DROP INDEX srcpart_index on src;

    Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q?rev=1150962&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q (added)
    +++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q Tue Jul 26 00:12:38 2011
    @@ -0,0 +1,18 @@
    +-- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
    +ALTER INDEX src_index ON src REBUILD;
    +
    +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
    +SET hive.optimize.index.filter=true;
    +SET hive.optimize.index.filter.compact.minsize=0;
    +
    +-- automatic indexing
    +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
    +
    +DROP INDEX src_index on src;

    Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out?rev=1150962&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (added)
    +++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out Tue Jul 26 00:12:38 2011
    @@ -0,0 +1,638 @@
    +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Alias -> Map Operator Tree:
    + a
    + TableScan
    + alias: a
    + Filter Operator
    + predicate:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 0
    + value expressions:
    + expr: key
    + type: string
    + expr: value
    + type: string
    + b
    + TableScan
    + alias: b
    + Filter Operator
    + predicate:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 1
    + value expressions:
    + expr: key
    + type: string
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + condition expressions:
    + 0 {VALUE._col0} {VALUE._col1}
    + 1 {VALUE._col0}
    + handleSkewJoin: false
    + outputColumnNames: _col0, _col1, _col4
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    +
    + Stage: Stage-2
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_232_8620953468013110737/-mr-10002
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + tag: -1
    + value expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + Reduce Operator Tree:
    + Extract
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    +
    +
    +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
    +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
    +82 val_82
    +82 val_82
    +82 val_82
    +82 val_82
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +85 val_85
    +85 val_85
    +85 val_85
    +85 val_85
    +86 val_86
    +86 val_86
    +86 val_86
    +86 val_86
    +87 val_87
    +87 val_87
    +87 val_87
    +87 val_87
    +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
    +PREHOOK: type: CREATEINDEX
    +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
    +POSTHOOK: type: CREATEINDEX
    +PREHOOK: query: ALTER INDEX src_index ON src REBUILD
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@default__src_src_index__
    +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@default__src_src_index__
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
    +PREHOOK: type: CREATEINDEX
    +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
    +POSTHOOK: type: CREATEINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: -- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +ABSTRACT SYNTAX TREE:
    + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
    +
    +STAGE DEPENDENCIES:
    + Stage-5 is a root stage
    + Stage-4 depends on stages: Stage-5
    + Stage-1 depends on stages: Stage-4, Stage-6
    + Stage-2 depends on stages: Stage-1
    + Stage-7 is a root stage
    + Stage-6 depends on stages: Stage-7
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-5
    + Map Reduce
    + Alias -> Map Operator Tree:
    + tmp_index:ind0:default__srcpart_srcpart_index__
    + TableScan
    + alias: default__srcpart_srcpart_index__
    + filterExpr:
    + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
    + type: boolean
    + Select Operator
    + expressions:
    + expr: _bucketname
    + type: string
    + expr: _offset
    + type: bigint
    + expr: _bitmaps
    + type: array<bigint>
    + outputColumnNames: _col1, _col2, _col3
    + Select Operator
    + expressions:
    + expr: _col1
    + type: string
    + expr: _col2
    + type: bigint
    + outputColumnNames: _col0, _col1
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: bigint
    + outputColumnNames: _col0, _col1
    + Group By Operator
    + aggregations:
    + expr: collect_set(_col1)
    + bucketGroup: false
    + keys:
    + expr: _col0
    + type: string
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: _col0
    + type: string
    + tag: -1
    + value expressions:
    + expr: _col1
    + type: array<bigint>
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations:
    + expr: collect_set(VALUE._col0)
    + bucketGroup: false
    + keys:
    + expr: KEY._col0
    + type: string
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: array<bigint>
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 1
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10003
    +
    + Stage: Stage-1
    + Map Reduce
    + Alias -> Map Operator Tree:
    + a
    + TableScan
    + alias: a
    + filterExpr:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 0
    + value expressions:
    + expr: key
    + type: string
    + expr: value
    + type: string
    + b
    + TableScan
    + alias: b
    + filterExpr:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 1
    + value expressions:
    + expr: key
    + type: string
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + condition expressions:
    + 0 {VALUE._col0} {VALUE._col1}
    + 1 {VALUE._col0}
    + handleSkewJoin: false
    + outputColumnNames: _col0, _col1, _col4
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    +
    + Stage: Stage-2
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10002
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + tag: -1
    + value expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + Reduce Operator Tree:
    + Extract
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-7
    + Map Reduce
    + Alias -> Map Operator Tree:
    + tmp_index:ind0:default__src_src_index__
    + TableScan
    + alias: default__src_src_index__
    + filterExpr:
    + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
    + type: boolean
    + Select Operator
    + expressions:
    + expr: _bucketname
    + type: string
    + expr: _offset
    + type: bigint
    + expr: _bitmaps
    + type: array<bigint>
    + outputColumnNames: _col1, _col2, _col3
    + Select Operator
    + expressions:
    + expr: _col1
    + type: string
    + expr: _col2
    + type: bigint
    + outputColumnNames: _col0, _col1
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: bigint
    + outputColumnNames: _col0, _col1
    + Group By Operator
    + aggregations:
    + expr: collect_set(_col1)
    + bucketGroup: false
    + keys:
    + expr: _col0
    + type: string
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: _col0
    + type: string
    + tag: -1
    + value expressions:
    + expr: _col1
    + type: array<bigint>
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations:
    + expr: collect_set(VALUE._col0)
    + bucketGroup: false
    + keys:
    + expr: KEY._col0
    + type: string
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: array<bigint>
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 1
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-6
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10004
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    +
    +
    +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@default__src_src_index__
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
    +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@default__src_src_index__
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +82 val_82
    +82 val_82
    +82 val_82
    +82 val_82
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +85 val_85
    +85 val_85
    +85 val_85
    +85 val_85
    +86 val_86
    +86 val_86
    +86 val_86
    +86 val_86
    +87 val_87
    +87 val_87
    +87 val_87
    +87 val_87
    +PREHOOK: query: DROP INDEX src_index on src
    +PREHOOK: type: DROPINDEX
    +POSTHOOK: query: DROP INDEX src_index on src
    +POSTHOOK: type: DROPINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: DROP INDEX srcpart_index on src
    +PREHOOK: type: DROPINDEX
    +POSTHOOK: query: DROP INDEX srcpart_index on src
    +POSTHOOK: type: DROPINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]

    Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out?rev=1150962&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out (added)
    +++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out Tue Jul 26 00:12:38 2011
    @@ -0,0 +1,564 @@
    +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
    +
    +-- without indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Alias -> Map Operator Tree:
    + a
    + TableScan
    + alias: a
    + Filter Operator
    + predicate:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 0
    + value expressions:
    + expr: key
    + type: string
    + expr: value
    + type: string
    + b
    + TableScan
    + alias: b
    + Filter Operator
    + predicate:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 1
    + value expressions:
    + expr: key
    + type: string
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + condition expressions:
    + 0 {VALUE._col0} {VALUE._col1}
    + 1 {VALUE._col0}
    + handleSkewJoin: false
    + outputColumnNames: _col0, _col1, _col4
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    +
    + Stage: Stage-2
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_433_5542638095321427981/-mr-10002
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + tag: -1
    + value expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + Reduce Operator Tree:
    + Extract
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    +
    +
    +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
    +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
    +82 val_82
    +82 val_82
    +82 val_82
    +82 val_82
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +85 val_85
    +85 val_85
    +85 val_85
    +85 val_85
    +86 val_86
    +86 val_86
    +86 val_86
    +86 val_86
    +87 val_87
    +87 val_87
    +87 val_87
    +87 val_87
    +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
    +PREHOOK: type: CREATEINDEX
    +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
    +POSTHOOK: type: CREATEINDEX
    +PREHOOK: query: ALTER INDEX src_index ON src REBUILD
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@default__src_src_index__
    +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@default__src_src_index__
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
    +PREHOOK: type: CREATEINDEX
    +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
    +POSTHOOK: type: CREATEINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: -- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- automatic indexing
    +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +ABSTRACT SYNTAX TREE:
    + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
    +
    +STAGE DEPENDENCIES:
    + Stage-5 is a root stage
    + Stage-8 depends on stages: Stage-5 , consists of Stage-7, Stage-6
    + Stage-7
    + Stage-4 depends on stages: Stage-7, Stage-6
    + Stage-1 depends on stages: Stage-4, Stage-9
    + Stage-2 depends on stages: Stage-1
    + Stage-6
    + Stage-10 is a root stage
    + Stage-13 depends on stages: Stage-10 , consists of Stage-12, Stage-11
    + Stage-12
    + Stage-9 depends on stages: Stage-12, Stage-11
    + Stage-11
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-5
    + Map Reduce
    + Alias -> Map Operator Tree:
    + default__srcpart_srcpart_index__
    + TableScan
    + alias: default__srcpart_srcpart_index__
    + filterExpr:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Select Operator
    + expressions:
    + expr: _bucketname
    + type: string
    + expr: _offsets
    + type: array<bigint>
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 1
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-8
    + Conditional Operator
    +
    + Stage: Stage-7
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10000
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10003
    +
    + Stage: Stage-1
    + Map Reduce
    + Alias -> Map Operator Tree:
    + a
    + TableScan
    + alias: a
    + filterExpr:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 0
    + value expressions:
    + expr: key
    + type: string
    + expr: value
    + type: string
    + b
    + TableScan
    + alias: b
    + filterExpr:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 70) and (key < 90))
    + type: boolean
    + Reduce Output Operator
    + key expressions:
    + expr: key
    + type: string
    + sort order: +
    + Map-reduce partition columns:
    + expr: key
    + type: string
    + tag: 1
    + value expressions:
    + expr: key
    + type: string
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + condition expressions:
    + 0 {VALUE._col0} {VALUE._col1}
    + 1 {VALUE._col0}
    + handleSkewJoin: false
    + outputColumnNames: _col0, _col1, _col4
    + Select Operator
    + expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    +
    + Stage: Stage-2
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10002
    + Reduce Output Operator
    + key expressions:
    + expr: _col0
    + type: string
    + sort order: +
    + tag: -1
    + value expressions:
    + expr: _col0
    + type: string
    + expr: _col1
    + type: string
    + Reduce Operator Tree:
    + Extract
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-6
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10001
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-10
    + Map Reduce
    + Alias -> Map Operator Tree:
    + default__src_src_index__
    + TableScan
    + alias: default__src_src_index__
    + filterExpr:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Filter Operator
    + predicate:
    + expr: ((key > 80) and (key < 100))
    + type: boolean
    + Select Operator
    + expressions:
    + expr: _bucketname
    + type: string
    + expr: _offsets
    + type: array<bigint>
    + outputColumnNames: _col0, _col1
    + File Output Operator
    + compressed: false
    + GlobalTableId: 1
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-13
    + Conditional Operator
    +
    + Stage: Stage-12
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10000
    +
    + Stage: Stage-9
    + Move Operator
    + files:
    + hdfs directory: true
    + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10004
    +
    + Stage: Stage-11
    + Map Reduce
    + Alias -> Map Operator Tree:
    + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10001
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    +
    +
    +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@default__src_src_index__
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
    +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@default__src_src_index__
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +82 val_82
    +82 val_82
    +82 val_82
    +82 val_82
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +83 val_83
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +84 val_84
    +85 val_85
    +85 val_85
    +85 val_85
    +85 val_85
    +86 val_86
    +86 val_86
    +86 val_86
    +86 val_86
    +87 val_87
    +87 val_87
    +87 val_87
    +87 val_87
    +PREHOOK: query: DROP INDEX src_index on src
    +PREHOOK: type: DROPINDEX
    +POSTHOOK: query: DROP INDEX src_index on src
    +POSTHOOK: type: DROPINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
    +PREHOOK: query: DROP INDEX srcpart_index on src
    +PREHOOK: type: DROPINDEX
    +POSTHOOK: query: DROP INDEX srcpart_index on src
    +POSTHOOK: type: DROPINDEX
    +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
    +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJul 26, '11 at 12:13a
activeJul 26, '11 at 12:13a
posts2
users1
websitehive.apache.org

1 user in discussion

Jvs: 2 posts

People

Translate

site design / logo © 2021 Grokbase