FAQ
Repository: hive
Updated Branches:
   refs/heads/master 3038b05ed -> 968620932


http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
new file mode 100644
index 0000000..f4e55ed
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
@@ -0,0 +1,717 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.StatObjectConverter;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class TestHBaseAggregateStatsExtrapolation {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(TestHBaseAggregateStatsExtrapolation.class.getName());
+
+ @Mock
+ HTableInterface htable;
+ private HBaseStore store;
+ SortedMap<String, Cell> rows = new TreeMap<>();
+
+ // NDV will be 3 for the bitVectors
+ String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
+
+ @Before
+ public void before() throws IOException {
+ MockitoAnnotations.initMocks(this);
+ HiveConf conf = new HiveConf();
+ conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+ store = MockUtils.init(conf, htable, rows);
+ store.backdoor().getStatsCache().resetCounters();
+ }
+
+ private static interface Checker {
+ void checkStats(AggrStats aggrStats) throws Exception;
+ }
+
+ @Test
+ public void allPartitionsHaveBitVectorStatusLong() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void allPartitionsHaveBitVectorStatusDecimal() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1_decimal", "decimal", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1_decimal");
+ obj.setColType("decimal");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DecimalColumnStatsData dcsd = new DecimalColumnStatsData();
+ dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i)));
+ dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i)));
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors);
+ data.setDecimalStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1_decimal", cso.getColName());
+ Assert.assertEquals("decimal", cso.getColType());
+ DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats();
+ Assert.assertEquals(1009, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01);
+ Assert.assertEquals(-1009, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1_decimal"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void allPartitionsHaveBitVectorStatusDouble() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1_double", "double", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1_double");
+ obj.setColType("double");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors);
+ data.setDoubleStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1_double", cso.getColName());
+ Assert.assertEquals("double", cso.getColType());
+ DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1_double"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void allPartitionsHaveBitVectorStatusString() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1_string", "string", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1_string");
+ obj.setColType("string");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ StringColumnStatsData dcsd = new StringColumnStatsData();
+ dcsd.setAvgColLen(i + 1);
+ dcsd.setMaxColLen(i + 10);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors);
+ data.setStringStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1_string", cso.getColName());
+ Assert.assertEquals("string", cso.getColType());
+ StringColumnStatsData lcsd = cso.getStatsData().getStringStats();
+ Assert.assertEquals(10, lcsd.getAvgColLen(), 0.01);
+ Assert.assertEquals(19, lcsd.getMaxColLen(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1_string"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void noPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col2", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col2");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col2", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(90, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col2"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col3", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i < 2 || i > 7) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col3");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i);
+ dcsd.setBitVectors(bitVectors);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(4, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col3", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col3"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col4", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i > 2 && i < 7) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col4");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i);
+ dcsd.setBitVectors(bitVectors);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(4, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col4", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1006, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col4"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col5", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col5");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i);
+ dcsd.setBitVectors(bitVectors);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(6, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col5", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(40, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col5"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col5_double", "double", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col5_double");
+ obj.setColType("double");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i);
+ dcsd.setBitVectors(bitVectors);
+ data.setDoubleStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(6, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col5_double", cso.getColName());
+ Assert.assertEquals("double", cso.getColType());
+ DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(40, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col5_double"));
+ statChecker.checkStats(aggrStats);
+ }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
new file mode 100644
index 0000000..62918be
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
@@ -0,0 +1,581 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.StatObjectConverter;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class TestHBaseAggregateStatsNDVUniformDist {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(TestHBaseAggregateStatsNDVUniformDist.class.getName());
+
+ @Mock
+ HTableInterface htable;
+ private HBaseStore store;
+ SortedMap<String, Cell> rows = new TreeMap<>();
+
+ // NDV will be 3 for bitVectors[0] and 12 for bitVectors[1]
+ String bitVectors[] = {
+ "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}",
+ "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" };
+
+ @Before
+ public void before() throws IOException {
+ MockitoAnnotations.initMocks(this);
+ HiveConf conf = new HiveConf();
+ conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+ conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true);
+ store = MockUtils.init(conf, htable, rows);
+ store.backdoor().getStatsCache().resetCounters();
+ }
+
+ private static interface Checker {
+ void checkStats(AggrStats aggrStats) throws Exception;
+ }
+
+ @Test
+ public void allPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col1", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col1");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[0]);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col1", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col1"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void noPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col2", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col2");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(10, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col2", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(91, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col2"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col3", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i < 2 || i > 7) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col3");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[i / 5]);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(4, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col3", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(12, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col3"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col4", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i > 2 && i < 7) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col4");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[0]);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(4, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col4", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1006, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(45, lcsd.getNumNulls());
+ Assert.assertEquals(3, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col4"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col5_long", "long", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col5_long");
+ obj.setColType("long");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ LongColumnStatsData dcsd = new LongColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[i / 5]);
+ data.setLongStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(6, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col5_long", cso.getColName());
+ Assert.assertEquals("long", cso.getColType());
+ LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(40, lcsd.getNumNulls());
+ Assert.assertEquals(12, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col5_long"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDecimal() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col5_decimal", "decimal", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col5_decimal");
+ obj.setColType("decimal");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DecimalColumnStatsData dcsd = new DecimalColumnStatsData();
+ dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i)));
+ dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i)));
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[i / 5]);
+ data.setDecimalStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(6, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col5_decimal", cso.getColName());
+ Assert.assertEquals("decimal", cso.getColType());
+ DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats();
+ Assert.assertEquals(1010, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01);
+ Assert.assertEquals(-1010, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01);
+ Assert.assertEquals(40, lcsd.getNumNulls());
+ Assert.assertEquals(12, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col5_decimal"));
+ statChecker.checkStats(aggrStats);
+ }
+
+ @Test
+ public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception {
+ String dbName = "default";
+ String tableName = "snp";
+ long now = System.currentTimeMillis();
+ List<FieldSchema> cols = new ArrayList<>();
+ cols.add(new FieldSchema("col5_double", "double", "nocomment"));
+ SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+ StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
+ serde, null, null, Collections.<String, String> emptyMap());
+ List<FieldSchema> partCols = new ArrayList<>();
+ partCols.add(new FieldSchema("ds", "string", ""));
+ Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
+ Collections.<String, String> emptyMap(), null, null, null);
+ store.createTable(table);
+
+ List<List<String>> partVals = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ List<String> partVal = Arrays.asList("" + i);
+ partVals.add(partVal);
+ StorageDescriptor psd = new StorageDescriptor(sd);
+ psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
+ Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
+ Collections.<String, String> emptyMap());
+ store.addPartition(part);
+ if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
+ ColumnStatistics cs = new ColumnStatistics();
+ ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
+ desc.setLastAnalyzed(now);
+ desc.setPartName("ds=" + partVal);
+ cs.setStatsDesc(desc);
+ ColumnStatisticsObj obj = new ColumnStatisticsObj();
+ obj.setColName("col5_double");
+ obj.setColType("double");
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
+ dcsd.setHighValue(1000 + i);
+ dcsd.setLowValue(-1000 - i);
+ dcsd.setNumNulls(i);
+ dcsd.setNumDVs(10 * i + 1);
+ dcsd.setBitVectors(bitVectors[i / 5]);
+ data.setDoubleStats(dcsd);
+ obj.setStatsData(data);
+ cs.addToStatsObj(obj);
+ store.updatePartitionColumnStatistics(cs, partVal);
+ }
+ }
+
+ Checker statChecker = new Checker() {
+ @Override
+ public void checkStats(AggrStats aggrStats) throws Exception {
+ Assert.assertEquals(6, aggrStats.getPartsFound());
+ Assert.assertEquals(1, aggrStats.getColStatsSize());
+ ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
+ Assert.assertEquals("col5_double", cso.getColName());
+ Assert.assertEquals("double", cso.getColType());
+ DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
+ Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
+ Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
+ Assert.assertEquals(40, lcsd.getNumNulls());
+ Assert.assertEquals(12, lcsd.getNumDVs());
+ }
+ };
+ List<String> partNames = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ partNames.add("ds=" + i);
+ }
+ AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
+ Arrays.asList("col5_double"));
+ statChecker.checkStats(aggrStats);
+ }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index b501f97..0eb9132 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -426,9 +426,9 @@ Stage-0
                                            <-Map 8 [SIMPLE_EDGE]
                                              SHUFFLE [RS_15]
                                                PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_14] (rows=1 width=101)
+ Group By Operator [GBY_14] (rows=2 width=101)
                                                  Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
- Filter Operator [FIL_49] (rows=3 width=93)
+ Filter Operator [FIL_49] (rows=5 width=74)
                                                    predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                                    TableScan [TS_11] (rows=20 width=83)
                                                      default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -446,9 +446,9 @@ Stage-0
                                            <-Map 1 [SIMPLE_EDGE]
                                              SHUFFLE [RS_4]
                                                PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_3] (rows=1 width=101)
+ Group By Operator [GBY_3] (rows=2 width=101)
                                                  Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
- Filter Operator [FIL_48] (rows=3 width=93)
+ Filter Operator [FIL_48] (rows=5 width=74)
                                                    predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                                    TableScan [TS_0] (rows=20 width=83)
                                                      default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1201,11 +1201,11 @@ Stage-0
      Stage-1
        Reducer 3
        File Output Operator [FS_19]
- Select Operator [SEL_18] (rows=21 width=101)
+ Select Operator [SEL_18] (rows=36 width=101)
            Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_17] (rows=21 width=101)
+ Filter Operator [FIL_17] (rows=36 width=101)
              predicate:((_col1 > 0) or (_col6 >= 0))
- Merge Join Operator [MERGEJOIN_28] (rows=21 width=101)
+ Merge Join Operator [MERGEJOIN_28] (rows=36 width=101)
                Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"]
              <-Map 5 [SIMPLE_EDGE]
                SHUFFLE [RS_15]
@@ -1219,25 +1219,25 @@ Stage-0
              <-Reducer 2 [SIMPLE_EDGE]
                SHUFFLE [RS_14]
                  PartitionCols:_col0
- Filter Operator [FIL_9] (rows=6 width=182)
+ Filter Operator [FIL_9] (rows=10 width=182)
                    predicate:(((_col1 + _col4) = 2) and ((_col4 + 1) = 2))
- Merge Join Operator [MERGEJOIN_27] (rows=25 width=182)
+ Merge Join Operator [MERGEJOIN_27] (rows=40 width=182)
                      Conds:RS_6._col0=RS_7._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"]
                    <-Map 1 [SIMPLE_EDGE]
                      SHUFFLE [RS_6]
                        PartitionCols:_col0
- Select Operator [SEL_2] (rows=5 width=74)
+ Select Operator [SEL_2] (rows=9 width=82)
                          Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_24] (rows=5 width=74)
+ Filter Operator [FIL_24] (rows=9 width=82)
                            predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                            TableScan [TS_0] (rows=20 width=83)
                              default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                    <-Map 4 [SIMPLE_EDGE]
                      SHUFFLE [RS_7]
                        PartitionCols:_col0
- Select Operator [SEL_5] (rows=5 width=71)
+ Select Operator [SEL_5] (rows=9 width=79)
                          Output:["_col0","_col1"]
- Filter Operator [FIL_25] (rows=5 width=74)
+ Filter Operator [FIL_25] (rows=9 width=82)
                            predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                            TableScan [TS_3] (rows=20 width=83)
                              default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1257,27 +1257,27 @@ Stage-0
      Stage-1
        Reducer 2
        File Output Operator [FS_14]
- Select Operator [SEL_13] (rows=24 width=101)
+ Select Operator [SEL_13] (rows=50 width=101)
            Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_12] (rows=24 width=101)
+ Filter Operator [FIL_12] (rows=50 width=101)
              predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2))
- Merge Join Operator [MERGEJOIN_19] (rows=72 width=101)
+ Merge Join Operator [MERGEJOIN_19] (rows=200 width=101)
                Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"]
              <-Map 1 [SIMPLE_EDGE]
                SHUFFLE [RS_8]
                  PartitionCols:_col0
- Select Operator [SEL_2] (rows=6 width=77)
+ Select Operator [SEL_2] (rows=10 width=83)
                    Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_17] (rows=6 width=77)
+ Filter Operator [FIL_17] (rows=10 width=83)
                      predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)))
                      TableScan [TS_0] (rows=20 width=83)
                        default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
              <-Map 3 [SIMPLE_EDGE]
                SHUFFLE [RS_9]
                  PartitionCols:_col0
- Select Operator [SEL_5] (rows=6 width=74)
+ Select Operator [SEL_5] (rows=10 width=80)
                    Output:["_col0","_col1"]
- Filter Operator [FIL_18] (rows=6 width=77)
+ Filter Operator [FIL_18] (rows=10 width=83)
                      predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)))
                      TableScan [TS_3] (rows=20 width=83)
                        default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1509,53 +1509,53 @@ Stage-0
                                  Output:["_col0","_col1","_col2"]
                                  Filter Operator [FIL_31] (rows=1 width=101)
                                    predicate:((_col1 + _col4) >= 0)
- Merge Join Operator [MERGEJOIN_60] (rows=1 width=101)
+ Merge Join Operator [MERGEJOIN_60] (rows=2 width=101)
                                      Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col0","_col1","_col2","_col4"]
                                    <-Reducer 10 [SIMPLE_EDGE]
                                      SHUFFLE [RS_29]
                                        PartitionCols:_col0
- Filter Operator [FIL_26] (rows=1 width=105)
+ Filter Operator [FIL_26] (rows=2 width=62)
                                          predicate:_col0 is not null
- Limit [LIM_24] (rows=1 width=105)
+ Limit [LIM_24] (rows=3 width=76)
                                            Number of rows:5
- Select Operator [SEL_23] (rows=1 width=105)
+ Select Operator [SEL_23] (rows=3 width=76)
                                              Output:["_col0","_col1"]
                                            <-Reducer 9 [SIMPLE_EDGE]
                                              SHUFFLE [RS_22]
- Select Operator [SEL_20] (rows=1 width=105)
+ Select Operator [SEL_20] (rows=3 width=76)
                                                  Output:["_col0","_col1","_col2","_col3"]
- Group By Operator [GBY_19] (rows=1 width=101)
+ Group By Operator [GBY_19] (rows=3 width=70)
                                                    Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
                                                  <-Map 8 [SIMPLE_EDGE]
                                                    SHUFFLE [RS_18]
                                                      PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_17] (rows=1 width=101)
+ Group By Operator [GBY_17] (rows=3 width=70)
                                                        Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
- Filter Operator [FIL_58] (rows=4 width=93)
+ Filter Operator [FIL_58] (rows=6 width=77)
                                                          predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)))
                                                          TableScan [TS_14] (rows=20 width=83)
                                                            default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                                    <-Reducer 3 [SIMPLE_EDGE]
                                      SHUFFLE [RS_28]
                                        PartitionCols:_col0
- Filter Operator [FIL_12] (rows=1 width=97)
+ Filter Operator [FIL_12] (rows=2 width=54)
                                          predicate:_col0 is not null
- Limit [LIM_10] (rows=1 width=97)
+ Limit [LIM_10] (rows=3 width=68)
                                            Number of rows:5
- Select Operator [SEL_9] (rows=1 width=97)
+ Select Operator [SEL_9] (rows=3 width=68)
                                              Output:["_col0","_col1","_col2"]
                                            <-Reducer 2 [SIMPLE_EDGE]
                                              SHUFFLE [RS_8]
- Select Operator [SEL_6] (rows=1 width=97)
+ Select Operator [SEL_6] (rows=3 width=68)
                                                  Output:["_col0","_col1","_col2"]
- Group By Operator [GBY_5] (rows=1 width=101)
+ Group By Operator [GBY_5] (rows=3 width=70)
                                                    Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
                                                  <-Map 1 [SIMPLE_EDGE]
                                                    SHUFFLE [RS_4]
                                                      PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_3] (rows=1 width=101)
+ Group By Operator [GBY_3] (rows=3 width=70)
                                                        Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
- Filter Operator [FIL_56] (rows=4 width=93)
+ Filter Operator [FIL_56] (rows=6 width=77)
                                                          predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)))
                                                          TableScan [TS_0] (rows=20 width=83)
                                                            default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1575,16 +1575,16 @@ Stage-0
      Stage-1
        Reducer 2
        File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=6 width=4)
+ Select Operator [SEL_11] (rows=11 width=4)
            Output:["_col0"]
- Merge Join Operator [MERGEJOIN_17] (rows=6 width=4)
+ Merge Join Operator [MERGEJOIN_17] (rows=11 width=4)
              Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"]
            <-Map 1 [SIMPLE_EDGE]
              SHUFFLE [RS_8]
                PartitionCols:_col0
- Select Operator [SEL_2] (rows=5 width=74)
+ Select Operator [SEL_2] (rows=9 width=82)
                  Output:["_col0","_col1"]
- Filter Operator [FIL_15] (rows=5 width=74)
+ Filter Operator [FIL_15] (rows=9 width=82)
                    predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                    TableScan [TS_0] (rows=20 width=83)
                      default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
@@ -1615,27 +1615,27 @@ Stage-0
      Stage-1
        Reducer 2
        File Output Operator [FS_18]
- Select Operator [SEL_17] (rows=12 width=93)
+ Select Operator [SEL_17] (rows=16 width=93)
            Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_28] (rows=12 width=93)
+ Merge Join Operator [MERGEJOIN_28] (rows=16 width=93)
              Conds:RS_13._col0=RS_14._col0(Left Semi),RS_13._col0=RS_15._col0(Left Semi),Output:["_col0","_col1","_col2"]
            <-Map 1 [SIMPLE_EDGE]
              SHUFFLE [RS_13]
                PartitionCols:_col0
- Select Operator [SEL_2] (rows=5 width=74)
+ Select Operator [SEL_2] (rows=9 width=82)
                  Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_25] (rows=5 width=74)
+ Filter Operator [FIL_25] (rows=9 width=82)
                    predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                    TableScan [TS_0] (rows=20 width=83)
                      default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
            <-Map 3 [SIMPLE_EDGE]
              SHUFFLE [RS_14]
                PartitionCols:_col0
- Group By Operator [GBY_10] (rows=2 width=85)
+ Group By Operator [GBY_10] (rows=3 width=85)
                  Output:["_col0"],keys:_col0
- Select Operator [SEL_5] (rows=5 width=68)
+ Select Operator [SEL_5] (rows=9 width=75)
                    Output:["_col0"]
- Filter Operator [FIL_26] (rows=5 width=74)
+ Filter Operator [FIL_26] (rows=9 width=82)
                      predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                      TableScan [TS_3] (rows=20 width=83)
                        default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]

Search Discussions

  • Pxiong at Mar 27, 2016 at 7:14 pm
    Repository: hive
    Updated Branches:
       refs/heads/master 3038b05ed -> 968620932


    http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
    ----------------------------------------------------------------------
    diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
    new file mode 100644
    index 0000000..f4e55ed
    --- /dev/null
    +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java
    @@ -0,0 +1,717 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied. See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.hadoop.hive.metastore.hbase;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +import org.apache.hadoop.hbase.Cell;
    +import org.apache.hadoop.hbase.client.HTableInterface;
    +import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.StatObjectConverter;
    +import org.apache.hadoop.hive.metastore.api.AggrStats;
    +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
    +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.FieldSchema;
    +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.Partition;
    +import org.apache.hadoop.hive.metastore.api.SerDeInfo;
    +import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
    +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.Table;
    +import org.junit.Assert;
    +import org.junit.Before;
    +import org.junit.Test;
    +import org.mockito.Mock;
    +import org.mockito.MockitoAnnotations;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Arrays;
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.SortedMap;
    +import java.util.TreeMap;
    +
    +public class TestHBaseAggregateStatsExtrapolation {
    + private static final Logger LOG = LoggerFactory
    + .getLogger(TestHBaseAggregateStatsExtrapolation.class.getName());
    +
    + @Mock
    + HTableInterface htable;
    + private HBaseStore store;
    + SortedMap<String, Cell> rows = new TreeMap<>();
    +
    + // NDV will be 3 for the bitVectors
    + String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
    +
    + @Before
    + public void before() throws IOException {
    + MockitoAnnotations.initMocks(this);
    + HiveConf conf = new HiveConf();
    + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
    + store = MockUtils.init(conf, htable, rows);
    + store.backdoor().getStatsCache().resetCounters();
    + }
    +
    + private static interface Checker {
    + void checkStats(AggrStats aggrStats) throws Exception;
    + }
    +
    + @Test
    + public void allPartitionsHaveBitVectorStatusLong() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col1", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col1");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col1", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col1"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void allPartitionsHaveBitVectorStatusDecimal() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col1_decimal", "decimal", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col1_decimal");
    + obj.setColType("decimal");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + DecimalColumnStatsData dcsd = new DecimalColumnStatsData();
    + dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i)));
    + dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i)));
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors);
    + data.setDecimalStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col1_decimal", cso.getColName());
    + Assert.assertEquals("decimal", cso.getColType());
    + DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats();
    + Assert.assertEquals(1009, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01);
    + Assert.assertEquals(-1009, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col1_decimal"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void allPartitionsHaveBitVectorStatusDouble() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col1_double", "double", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col1_double");
    + obj.setColType("double");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors);
    + data.setDoubleStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col1_double", cso.getColName());
    + Assert.assertEquals("double", cso.getColType());
    + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
    + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col1_double"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void allPartitionsHaveBitVectorStatusString() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col1_string", "string", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col1_string");
    + obj.setColType("string");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + StringColumnStatsData dcsd = new StringColumnStatsData();
    + dcsd.setAvgColLen(i + 1);
    + dcsd.setMaxColLen(i + 10);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors);
    + data.setStringStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col1_string", cso.getColName());
    + Assert.assertEquals("string", cso.getColType());
    + StringColumnStatsData lcsd = cso.getStatsData().getStringStats();
    + Assert.assertEquals(10, lcsd.getAvgColLen(), 0.01);
    + Assert.assertEquals(19, lcsd.getMaxColLen(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col1_string"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void noPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col2", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col2");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col2", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(90, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col2"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col3", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i < 2 || i > 7) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col3");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i);
    + dcsd.setBitVectors(bitVectors);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(4, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col3", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col3"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col4", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i > 2 && i < 7) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col4");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i);
    + dcsd.setBitVectors(bitVectors);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(4, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col4", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1006, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col4"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col5", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col5");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i);
    + dcsd.setBitVectors(bitVectors);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(6, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col5", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(40, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col5"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col5_double", "double", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col5_double");
    + obj.setColType("double");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i);
    + dcsd.setBitVectors(bitVectors);
    + data.setDoubleStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(6, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col5_double", cso.getColName());
    + Assert.assertEquals("double", cso.getColType());
    + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(40, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col5_double"));
    + statChecker.checkStats(aggrStats);
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
    ----------------------------------------------------------------------
    diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
    new file mode 100644
    index 0000000..62918be
    --- /dev/null
    +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java
    @@ -0,0 +1,581 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied. See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.hadoop.hive.metastore.hbase;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +import org.apache.hadoop.hbase.Cell;
    +import org.apache.hadoop.hbase.client.HTableInterface;
    +import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.StatObjectConverter;
    +import org.apache.hadoop.hive.metastore.api.AggrStats;
    +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
    +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
    +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.FieldSchema;
    +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
    +import org.apache.hadoop.hive.metastore.api.Partition;
    +import org.apache.hadoop.hive.metastore.api.SerDeInfo;
    +import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
    +import org.apache.hadoop.hive.metastore.api.Table;
    +import org.junit.Assert;
    +import org.junit.Before;
    +import org.junit.Test;
    +import org.mockito.Mock;
    +import org.mockito.MockitoAnnotations;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Arrays;
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.SortedMap;
    +import java.util.TreeMap;
    +
    +public class TestHBaseAggregateStatsNDVUniformDist {
    + private static final Logger LOG = LoggerFactory
    + .getLogger(TestHBaseAggregateStatsNDVUniformDist.class.getName());
    +
    + @Mock
    + HTableInterface htable;
    + private HBaseStore store;
    + SortedMap<String, Cell> rows = new TreeMap<>();
    +
    + // NDV will be 3 for bitVectors[0] and 12 for bitVectors[1]
    + String bitVectors[] = {
    + "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}",
    + "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" };
    +
    + @Before
    + public void before() throws IOException {
    + MockitoAnnotations.initMocks(this);
    + HiveConf conf = new HiveConf();
    + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
    + conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true);
    + store = MockUtils.init(conf, htable, rows);
    + store.backdoor().getStatsCache().resetCounters();
    + }
    +
    + private static interface Checker {
    + void checkStats(AggrStats aggrStats) throws Exception;
    + }
    +
    + @Test
    + public void allPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col1", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col1");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[0]);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col1", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col1"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void noPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col2", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col2");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(10, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col2", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(91, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col2"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col3", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i < 2 || i > 7) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col3");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[i / 5]);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(4, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col3", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(12, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col3"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col4", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i > 2 && i < 7) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col4");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[0]);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(4, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col4", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1006, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(45, lcsd.getNumNulls());
    + Assert.assertEquals(3, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col4"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col5_long", "long", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col5_long");
    + obj.setColType("long");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + LongColumnStatsData dcsd = new LongColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[i / 5]);
    + data.setLongStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(6, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col5_long", cso.getColName());
    + Assert.assertEquals("long", cso.getColType());
    + LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(40, lcsd.getNumNulls());
    + Assert.assertEquals(12, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col5_long"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDecimal() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col5_decimal", "decimal", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col5_decimal");
    + obj.setColType("decimal");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + DecimalColumnStatsData dcsd = new DecimalColumnStatsData();
    + dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i)));
    + dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i)));
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[i / 5]);
    + data.setDecimalStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(6, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col5_decimal", cso.getColName());
    + Assert.assertEquals("decimal", cso.getColType());
    + DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats();
    + Assert.assertEquals(1010, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01);
    + Assert.assertEquals(-1010, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01);
    + Assert.assertEquals(40, lcsd.getNumNulls());
    + Assert.assertEquals(12, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col5_decimal"));
    + statChecker.checkStats(aggrStats);
    + }
    +
    + @Test
    + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception {
    + String dbName = "default";
    + String tableName = "snp";
    + long now = System.currentTimeMillis();
    + List<FieldSchema> cols = new ArrayList<>();
    + cols.add(new FieldSchema("col5_double", "double", "nocomment"));
    + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0,
    + serde, null, null, Collections.<String, String> emptyMap());
    + List<FieldSchema> partCols = new ArrayList<>();
    + partCols.add(new FieldSchema("ds", "string", ""));
    + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols,
    + Collections.<String, String> emptyMap(), null, null, null);
    + store.createTable(table);
    +
    + List<List<String>> partVals = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + List<String> partVal = Arrays.asList("" + i);
    + partVals.add(partVal);
    + StorageDescriptor psd = new StorageDescriptor(sd);
    + psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
    + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd,
    + Collections.<String, String> emptyMap());
    + store.addPartition(part);
    + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) {
    + ColumnStatistics cs = new ColumnStatistics();
    + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
    + desc.setLastAnalyzed(now);
    + desc.setPartName("ds=" + partVal);
    + cs.setStatsDesc(desc);
    + ColumnStatisticsObj obj = new ColumnStatisticsObj();
    + obj.setColName("col5_double");
    + obj.setColType("double");
    + ColumnStatisticsData data = new ColumnStatisticsData();
    + DoubleColumnStatsData dcsd = new DoubleColumnStatsData();
    + dcsd.setHighValue(1000 + i);
    + dcsd.setLowValue(-1000 - i);
    + dcsd.setNumNulls(i);
    + dcsd.setNumDVs(10 * i + 1);
    + dcsd.setBitVectors(bitVectors[i / 5]);
    + data.setDoubleStats(dcsd);
    + obj.setStatsData(data);
    + cs.addToStatsObj(obj);
    + store.updatePartitionColumnStatistics(cs, partVal);
    + }
    + }
    +
    + Checker statChecker = new Checker() {
    + @Override
    + public void checkStats(AggrStats aggrStats) throws Exception {
    + Assert.assertEquals(6, aggrStats.getPartsFound());
    + Assert.assertEquals(1, aggrStats.getColStatsSize());
    + ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
    + Assert.assertEquals("col5_double", cso.getColName());
    + Assert.assertEquals("double", cso.getColType());
    + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats();
    + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01);
    + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01);
    + Assert.assertEquals(40, lcsd.getNumNulls());
    + Assert.assertEquals(12, lcsd.getNumDVs());
    + }
    + };
    + List<String> partNames = new ArrayList<>();
    + for (int i = 0; i < 10; i++) {
    + partNames.add("ds=" + i);
    + }
    + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames,
    + Arrays.asList("col5_double"));
    + statChecker.checkStats(aggrStats);
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index b501f97..0eb9132 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -426,9 +426,9 @@ Stage-0
                                                <-Map 8 [SIMPLE_EDGE]
                                                  SHUFFLE [RS_15]
                                                    PartitionCols:_col0, _col1, _col2
    - Group By Operator [GBY_14] (rows=1 width=101)
    + Group By Operator [GBY_14] (rows=2 width=101)
                                                      Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
    - Filter Operator [FIL_49] (rows=3 width=93)
    + Filter Operator [FIL_49] (rows=5 width=74)
                                                        predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                                        TableScan [TS_11] (rows=20 width=83)
                                                          default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -446,9 +446,9 @@ Stage-0
                                                <-Map 1 [SIMPLE_EDGE]
                                                  SHUFFLE [RS_4]
                                                    PartitionCols:_col0, _col1, _col2
    - Group By Operator [GBY_3] (rows=1 width=101)
    + Group By Operator [GBY_3] (rows=2 width=101)
                                                      Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
    - Filter Operator [FIL_48] (rows=3 width=93)
    + Filter Operator [FIL_48] (rows=5 width=74)
                                                        predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                                        TableScan [TS_0] (rows=20 width=83)
                                                          default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -1201,11 +1201,11 @@ Stage-0
          Stage-1
            Reducer 3
            File Output Operator [FS_19]
    - Select Operator [SEL_18] (rows=21 width=101)
    + Select Operator [SEL_18] (rows=36 width=101)
                Output:["_col0","_col1","_col2","_col3","_col4"]
    - Filter Operator [FIL_17] (rows=21 width=101)
    + Filter Operator [FIL_17] (rows=36 width=101)
                  predicate:((_col1 > 0) or (_col6 >= 0))
    - Merge Join Operator [MERGEJOIN_28] (rows=21 width=101)
    + Merge Join Operator [MERGEJOIN_28] (rows=36 width=101)
                    Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"]
                  <-Map 5 [SIMPLE_EDGE]
                    SHUFFLE [RS_15]
    @@ -1219,25 +1219,25 @@ Stage-0
                  <-Reducer 2 [SIMPLE_EDGE]
                    SHUFFLE [RS_14]
                      PartitionCols:_col0
    - Filter Operator [FIL_9] (rows=6 width=182)
    + Filter Operator [FIL_9] (rows=10 width=182)
                        predicate:(((_col1 + _col4) = 2) and ((_col4 + 1) = 2))
    - Merge Join Operator [MERGEJOIN_27] (rows=25 width=182)
    + Merge Join Operator [MERGEJOIN_27] (rows=40 width=182)
                          Conds:RS_6._col0=RS_7._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"]
                        <-Map 1 [SIMPLE_EDGE]
                          SHUFFLE [RS_6]
                            PartitionCols:_col0
    - Select Operator [SEL_2] (rows=5 width=74)
    + Select Operator [SEL_2] (rows=9 width=82)
                              Output:["_col0","_col1","_col2"]
    - Filter Operator [FIL_24] (rows=5 width=74)
    + Filter Operator [FIL_24] (rows=9 width=82)
                                predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                TableScan [TS_0] (rows=20 width=83)
                                  default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                        <-Map 4 [SIMPLE_EDGE]
                          SHUFFLE [RS_7]
                            PartitionCols:_col0
    - Select Operator [SEL_5] (rows=5 width=71)
    + Select Operator [SEL_5] (rows=9 width=79)
                              Output:["_col0","_col1"]
    - Filter Operator [FIL_25] (rows=5 width=74)
    + Filter Operator [FIL_25] (rows=9 width=82)
                                predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                                TableScan [TS_3] (rows=20 width=83)
                                  default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -1257,27 +1257,27 @@ Stage-0
          Stage-1
            Reducer 2
            File Output Operator [FS_14]
    - Select Operator [SEL_13] (rows=24 width=101)
    + Select Operator [SEL_13] (rows=50 width=101)
                Output:["_col0","_col1","_col2","_col3","_col4"]
    - Filter Operator [FIL_12] (rows=24 width=101)
    + Filter Operator [FIL_12] (rows=50 width=101)
                  predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2))
    - Merge Join Operator [MERGEJOIN_19] (rows=72 width=101)
    + Merge Join Operator [MERGEJOIN_19] (rows=200 width=101)
                    Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"]
                  <-Map 1 [SIMPLE_EDGE]
                    SHUFFLE [RS_8]
                      PartitionCols:_col0
    - Select Operator [SEL_2] (rows=6 width=77)
    + Select Operator [SEL_2] (rows=10 width=83)
                        Output:["_col0","_col1","_col2"]
    - Filter Operator [FIL_17] (rows=6 width=77)
    + Filter Operator [FIL_17] (rows=10 width=83)
                          predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)))
                          TableScan [TS_0] (rows=20 width=83)
                            default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                  <-Map 3 [SIMPLE_EDGE]
                    SHUFFLE [RS_9]
                      PartitionCols:_col0
    - Select Operator [SEL_5] (rows=6 width=74)
    + Select Operator [SEL_5] (rows=10 width=80)
                        Output:["_col0","_col1"]
    - Filter Operator [FIL_18] (rows=6 width=77)
    + Filter Operator [FIL_18] (rows=10 width=83)
                          predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)))
                          TableScan [TS_3] (rows=20 width=83)
                            default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -1509,53 +1509,53 @@ Stage-0
                                      Output:["_col0","_col1","_col2"]
                                      Filter Operator [FIL_31] (rows=1 width=101)
                                        predicate:((_col1 + _col4) >= 0)
    - Merge Join Operator [MERGEJOIN_60] (rows=1 width=101)
    + Merge Join Operator [MERGEJOIN_60] (rows=2 width=101)
                                          Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col0","_col1","_col2","_col4"]
                                        <-Reducer 10 [SIMPLE_EDGE]
                                          SHUFFLE [RS_29]
                                            PartitionCols:_col0
    - Filter Operator [FIL_26] (rows=1 width=105)
    + Filter Operator [FIL_26] (rows=2 width=62)
                                              predicate:_col0 is not null
    - Limit [LIM_24] (rows=1 width=105)
    + Limit [LIM_24] (rows=3 width=76)
                                                Number of rows:5
    - Select Operator [SEL_23] (rows=1 width=105)
    + Select Operator [SEL_23] (rows=3 width=76)
                                                  Output:["_col0","_col1"]
                                                <-Reducer 9 [SIMPLE_EDGE]
                                                  SHUFFLE [RS_22]
    - Select Operator [SEL_20] (rows=1 width=105)
    + Select Operator [SEL_20] (rows=3 width=76)
                                                      Output:["_col0","_col1","_col2","_col3"]
    - Group By Operator [GBY_19] (rows=1 width=101)
    + Group By Operator [GBY_19] (rows=3 width=70)
                                                        Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
                                                      <-Map 8 [SIMPLE_EDGE]
                                                        SHUFFLE [RS_18]
                                                          PartitionCols:_col0, _col1, _col2
    - Group By Operator [GBY_17] (rows=1 width=101)
    + Group By Operator [GBY_17] (rows=3 width=70)
                                                            Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
    - Filter Operator [FIL_58] (rows=4 width=93)
    + Filter Operator [FIL_58] (rows=6 width=77)
                                                              predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)))
                                                              TableScan [TS_14] (rows=20 width=83)
                                                                default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                                        <-Reducer 3 [SIMPLE_EDGE]
                                          SHUFFLE [RS_28]
                                            PartitionCols:_col0
    - Filter Operator [FIL_12] (rows=1 width=97)
    + Filter Operator [FIL_12] (rows=2 width=54)
                                              predicate:_col0 is not null
    - Limit [LIM_10] (rows=1 width=97)
    + Limit [LIM_10] (rows=3 width=68)
                                                Number of rows:5
    - Select Operator [SEL_9] (rows=1 width=97)
    + Select Operator [SEL_9] (rows=3 width=68)
                                                  Output:["_col0","_col1","_col2"]
                                                <-Reducer 2 [SIMPLE_EDGE]
                                                  SHUFFLE [RS_8]
    - Select Operator [SEL_6] (rows=1 width=97)
    + Select Operator [SEL_6] (rows=3 width=68)
                                                      Output:["_col0","_col1","_col2"]
    - Group By Operator [GBY_5] (rows=1 width=101)
    + Group By Operator [GBY_5] (rows=3 width=70)
                                                        Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
                                                      <-Map 1 [SIMPLE_EDGE]
                                                        SHUFFLE [RS_4]
                                                          PartitionCols:_col0, _col1, _col2
    - Group By Operator [GBY_3] (rows=1 width=101)
    + Group By Operator [GBY_3] (rows=3 width=70)
                                                            Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float
    - Filter Operator [FIL_56] (rows=4 width=93)
    + Filter Operator [FIL_56] (rows=6 width=77)
                                                              predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)))
                                                              TableScan [TS_0] (rows=20 width=83)
                                                                default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -1575,16 +1575,16 @@ Stage-0
          Stage-1
            Reducer 2
            File Output Operator [FS_12]
    - Select Operator [SEL_11] (rows=6 width=4)
    + Select Operator [SEL_11] (rows=11 width=4)
                Output:["_col0"]
    - Merge Join Operator [MERGEJOIN_17] (rows=6 width=4)
    + Merge Join Operator [MERGEJOIN_17] (rows=11 width=4)
                  Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"]
                <-Map 1 [SIMPLE_EDGE]
                  SHUFFLE [RS_8]
                    PartitionCols:_col0
    - Select Operator [SEL_2] (rows=5 width=74)
    + Select Operator [SEL_2] (rows=9 width=82)
                      Output:["_col0","_col1"]
    - Filter Operator [FIL_15] (rows=5 width=74)
    + Filter Operator [FIL_15] (rows=9 width=82)
                        predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                        TableScan [TS_0] (rows=20 width=83)
                          default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
    @@ -1615,27 +1615,27 @@ Stage-0
          Stage-1
            Reducer 2
            File Output Operator [FS_18]
    - Select Operator [SEL_17] (rows=12 width=93)
    + Select Operator [SEL_17] (rows=16 width=93)
                Output:["_col0","_col1","_col2"]
    - Merge Join Operator [MERGEJOIN_28] (rows=12 width=93)
    + Merge Join Operator [MERGEJOIN_28] (rows=16 width=93)
                  Conds:RS_13._col0=RS_14._col0(Left Semi),RS_13._col0=RS_15._col0(Left Semi),Output:["_col0","_col1","_col2"]
                <-Map 1 [SIMPLE_EDGE]
                  SHUFFLE [RS_13]
                    PartitionCols:_col0
    - Select Operator [SEL_2] (rows=5 width=74)
    + Select Operator [SEL_2] (rows=9 width=82)
                      Output:["_col0","_col1","_col2"]
    - Filter Operator [FIL_25] (rows=5 width=74)
    + Filter Operator [FIL_25] (rows=9 width=82)
                        predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                        TableScan [TS_0] (rows=20 width=83)
                          default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]
                <-Map 3 [SIMPLE_EDGE]
                  SHUFFLE [RS_14]
                    PartitionCols:_col0
    - Group By Operator [GBY_10] (rows=2 width=85)
    + Group By Operator [GBY_10] (rows=3 width=85)
                      Output:["_col0"],keys:_col0
    - Select Operator [SEL_5] (rows=5 width=68)
    + Select Operator [SEL_5] (rows=9 width=75)
                        Output:["_col0"]
    - Filter Operator [FIL_26] (rows=5 width=74)
    + Filter Operator [FIL_26] (rows=9 width=82)
                          predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null)
                          TableScan [TS_3] (rows=20 width=83)
                            default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 27, '16 at 7:14p
activeMar 27, '16 at 7:14p
posts2
users1
websitehive.apache.org

1 user in discussion

Pxiong: 2 posts

People

Translate

site design / logo © 2022 Grokbase