FAQ
Repository: hive
Updated Branches:
   refs/heads/master d7f1b465b -> 1d2e5eed3


HIVE-11499: Datanucleus leaks classloaders when used using embedded metastore with HiveServer2 with UDFs (Vaibhav Gumashta reviewed by Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1d2e5eed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1d2e5eed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1d2e5eed

Branch: refs/heads/master
Commit: 1d2e5eed3eb755bcf8a6d2b85ad0aa9a0bb7b0d9
Parents: d7f1b46
Author: Vaibhav Gumashta <vgumashta@apache.org>
Authored: Tue Oct 13 18:49:30 2015 -0700
Committer: Vaibhav Gumashta <vgumashta@apache.org>
Committed: Tue Oct 13 18:49:30 2015 -0700

----------------------------------------------------------------------
  data/files/identity_udf.jar | Bin 0 -> 710 bytes
  .../apache/hive/jdbc/TestJdbcWithMiniHS2.java | 98 +++++++++++++++++--
  .../hive/metastore/HiveMetaStoreClient.java | 5 +
  .../hadoop/hive/metastore/IMetaStoreClient.java | 7 ++
  .../hadoop/hive/metastore/ObjectStore.java | 33 +++++++
  .../hadoop/hive/ql/session/SessionState.java | 18 ++++
  6 files changed, 155 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/data/files/identity_udf.jar
----------------------------------------------------------------------
diff --git a/data/files/identity_udf.jar b/data/files/identity_udf.jar
new file mode 100644
index 0000000..8170995
Binary files /dev/null and b/data/files/identity_udf.jar differ

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index 8ba2a12..9c8cf02 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertNotNull;
  import static org.junit.Assert.assertTrue;
  import static org.junit.Assert.fail;

+import java.lang.reflect.Field;
  import java.sql.Connection;
  import java.sql.DriverManager;
  import java.sql.ResultSet;
@@ -51,17 +52,23 @@ import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.fs.permission.FsPermission;
  import org.apache.hadoop.hive.conf.HiveConf;
  import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.ObjectStore;
  import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  import org.apache.hive.jdbc.miniHS2.MiniHS2;
+import org.datanucleus.ClassLoaderResolver;
+import org.datanucleus.NucleusContext;
+import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
  import org.junit.After;
  import org.junit.AfterClass;
+import org.junit.Assert;
  import org.junit.Before;
  import org.junit.BeforeClass;
  import org.junit.Test;

  public class TestJdbcWithMiniHS2 {
    private static MiniHS2 miniHS2 = null;
- private static Path dataFilePath;
+ private static String dataFileDir;
+ private static Path kvDataFilePath;
    private static final String tmpDir = System.getProperty("test.tmp.dir");

    private Connection hs2Conn = null;
@@ -72,9 +79,8 @@ public class TestJdbcWithMiniHS2 {
      HiveConf conf = new HiveConf();
      conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
      miniHS2 = new MiniHS2(conf);
- String dataFileDir = conf.get("test.data.files").replace('\\', '/')
- .replace("c:", "");
- dataFilePath = new Path(dataFileDir, "kv1.txt");
+ dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", "");
+ kvDataFilePath = new Path(dataFileDir, "kv1.txt");
      Map<String, String> confOverlay = new HashMap<String, String>();
      miniHS2.start(confOverlay);
    }
@@ -114,7 +120,7 @@ public class TestJdbcWithMiniHS2 {

      // load data
      stmt.execute("load data local inpath '"
- + dataFilePath.toString() + "' into table " + tableName);
+ + kvDataFilePath.toString() + "' into table " + tableName);

      ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
      assertTrue(res.next());
@@ -135,7 +141,7 @@ public class TestJdbcWithMiniHS2 {

      // load data
      stmt.execute("load data local inpath '"
- + dataFilePath.toString() + "' into table " + tableName);
+ + kvDataFilePath.toString() + "' into table " + tableName);

      ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
      assertTrue(res.next());
@@ -723,4 +729,84 @@ public class TestJdbcWithMiniHS2 {
        fail("Not expecting exception: " + e);
      }
    }
+
+ /**
+ * Tests that DataNucleus' NucleusContext.classLoaderResolverMap clears cached class objects (& hence
+ * doesn't leak classloaders) on closing any session
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testAddJarDataNucleusUnCaching() throws Exception {
+ Path jarFilePath = new Path(dataFileDir, "identity_udf.jar");
+ Connection conn = getConnection(miniHS2.getJdbcURL(), "foo", "bar");
+ String tableName = "testAddJar";
+ Statement stmt = conn.createStatement();
+ stmt.execute("SET hive.support.concurrency = false");
+ // Create table
+ stmt.execute("DROP TABLE IF EXISTS " + tableName);
+ stmt.execute("CREATE TABLE " + tableName + " (key INT, value STRING)");
+ // Load data
+ stmt.execute("LOAD DATA LOCAL INPATH '" + kvDataFilePath.toString() + "' INTO TABLE "
+ + tableName);
+ ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
+ // Ensure table is populated
+ assertTrue(res.next());
+
+ int mapSizeBeforeClose;
+ int mapSizeAfterClose;
+ // Add the jar file
+ stmt.execute("ADD JAR " + jarFilePath.toString());
+ // Create a temporary function using the jar
+ stmt.execute("CREATE TEMPORARY FUNCTION func AS 'IdentityStringUDF'");
+ // Execute the UDF
+ stmt.execute("SELECT func(value) from " + tableName);
+ mapSizeBeforeClose = getNucleusClassLoaderResolverMapSize();
+ System.out
+ .println("classLoaderResolverMap size before connection close: " + mapSizeBeforeClose);
+ // Cache size should be > 0 now
+ Assert.assertTrue(mapSizeBeforeClose > 0);
+ conn.close();
+ mapSizeAfterClose = getNucleusClassLoaderResolverMapSize();
+ System.out.println("classLoaderResolverMap size after connection close: " + mapSizeAfterClose);
+ // Cache size should be 0 now
+ Assert.assertTrue("Failed; NucleusContext classLoaderResolverMap size: " + mapSizeAfterClose,
+ mapSizeAfterClose == 0);
+ }
+
+ @SuppressWarnings("unchecked")
+ private int getNucleusClassLoaderResolverMapSize() {
+ Field classLoaderResolverMap;
+ Field pmf;
+ JDOPersistenceManagerFactory jdoPmf = null;
+ NucleusContext nc = null;
+ Map<String, ClassLoaderResolver> cMap;
+ try {
+ pmf = ObjectStore.class.getDeclaredField("pmf");
+ if (pmf != null) {
+ pmf.setAccessible(true);
+ jdoPmf = (JDOPersistenceManagerFactory) pmf.get(null);
+ if (jdoPmf != null) {
+ nc = jdoPmf.getNucleusContext();
+ }
+ }
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ if (nc != null) {
+ try {
+ classLoaderResolverMap = NucleusContext.class.getDeclaredField("classLoaderResolverMap");
+ if (classLoaderResolverMap != null) {
+ classLoaderResolverMap.setAccessible(true);
+ cMap = (Map<String, ClassLoaderResolver>) classLoaderResolverMap.get(nc);
+ if (cMap != null) {
+ return cMap.size();
+ }
+ }
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ }
+ return -1;
+ }
  }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 8e32966..d92c284 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -302,6 +302,11 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
    }

    @Override
+ public boolean isLocalMetaStore() {
+ return localMetaStore;
+ }
+
+ @Override
    public boolean isCompatibleWith(HiveConf conf) {
      if (currentMetaVars == null) {
        return false; // recreate

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
index 77820ae..f3a23f5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
@@ -104,6 +104,13 @@ public interface IMetaStoreClient {
    void setHiveAddedJars(String addedJars);

    /**
+ * Returns true if the current client is using an in process metastore (local metastore).
+ *
+ * @return
+ */
+ boolean isLocalMetaStore();
+
+ /**
     * Tries to reconnect this MetaStoreClient to the MetaStore.
     */
    void reconnect() throws MetaException;

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 31f8ccf..136eff5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore;
  import static org.apache.commons.lang.StringUtils.join;

  import java.io.IOException;
+import java.lang.reflect.Field;
  import java.net.InetAddress;
  import java.net.URI;
  import java.nio.ByteBuffer;
@@ -154,6 +155,9 @@ import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
  import org.apache.hadoop.util.StringUtils;
  import org.apache.hive.common.util.HiveStringUtils;
  import org.apache.thrift.TException;
+import org.datanucleus.ClassLoaderResolver;
+import org.datanucleus.NucleusContext;
+import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
  import org.datanucleus.store.rdbms.exceptions.MissingTableException;

  import com.google.common.collect.Lists;
@@ -7656,4 +7660,33 @@ public class ObjectStore implements RawStore, Configurable {
    public void putFileMetadata(List<Long> fileIds, List<ByteBuffer> metadata) {
      // Not supported for now.
    }
+
+ /**
+ * Removed cached classloaders from DataNucleus
+ * DataNucleus caches classloaders in NucleusContext.
+ * In UDFs, this can result in classloaders not getting GCed resulting in PermGen leaks.
+ * This is particularly an issue when using embedded metastore with HiveServer2,
+ * since the current classloader gets modified with each new add jar,
+ * becoming the classloader for downstream classes, which DataNucleus ends up using.
+ * The NucleusContext cache gets freed up only on calling a close on it.
+ * We're not closing NucleusContext since it does a bunch of other things which we don't want.
+ * We're not clearing the cache HashMap by calling HashMap#clear to avoid concurrency issues.
+ */
+ public static void unCacheDataNucleusClassLoaders() {
+ PersistenceManagerFactory pmf = ObjectStore.getPMF();
+ if ((pmf != null) && (pmf instanceof JDOPersistenceManagerFactory)) {
+ JDOPersistenceManagerFactory jdoPmf = (JDOPersistenceManagerFactory) pmf;
+ NucleusContext nc = jdoPmf.getNucleusContext();
+ try {
+ Field classLoaderResolverMap =
+ NucleusContext.class.getDeclaredField("classLoaderResolverMap");
+ classLoaderResolverMap.setAccessible(true);
+ classLoaderResolverMap.set(nc, new HashMap<String, ClassLoaderResolver>());
+ LOG.debug("Removed cached classloaders from DataNucleus NucleusContext");
+ } catch (Exception e) {
+ LOG.warn(e);
+ }
+ }
+ }
+
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 41b4bb1..92ac209 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -23,6 +23,8 @@ import java.io.File;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.PrintStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
  import java.net.URI;
  import java.net.URISyntaxException;
  import java.net.URLClassLoader;
@@ -53,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
  import org.apache.hadoop.hive.common.JavaUtils;
  import org.apache.hadoop.hive.conf.HiveConf;
  import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.ObjectStore;
  import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
  import org.apache.hadoop.hive.ql.MapRedStats;
  import org.apache.hadoop.hive.ql.exec.Registry;
@@ -1537,6 +1540,21 @@ public class SessionState {
      closeSparkSession();
      registry.closeCUDFLoaders();
      dropSessionPaths(conf);
+ unCacheDataNucleusClassLoaders();
+ }
+
+ private void unCacheDataNucleusClassLoaders() {
+ try {
+ Hive threadLocalHive = Hive.get(conf);
+ if ((threadLocalHive != null) && (threadLocalHive.getMSC() != null)
+ && (threadLocalHive.getMSC().isLocalMetaStore())) {
+ if (conf.getVar(ConfVars.METASTORE_RAW_STORE_IMPL).equals(ObjectStore.class.getName())) {
+ ObjectStore.unCacheDataNucleusClassLoaders();
+ }
+ }
+ } catch (Exception e) {
+ LOG.info(e);
+ }
    }

    public void closeSparkSession() {

Search Discussions

Discussion Posts

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 14, '15 at 1:49a
activeOct 14, '15 at 1:56a
posts2
users1
websitehive.apache.org

1 user in discussion

Vgumashta: 2 posts

People

Translate

site design / logo © 2021 Grokbase