FAQ
Author: ekoifman
Date: Mon Mar 30 18:10:29 2015
New Revision: 1670162

URL: http://svn.apache.org/r1670162
Log:
HIVE-10066 Hive on Tez job submission through WebHCat doesn't ship Tez artifacts (Eugene Koifman, reviewed by Thejas Nair)

Modified:
     hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
     hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
     hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
     hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml Mon Mar 30 18:10:29 2015
@@ -35,7 +35,7 @@

      <property>
          <name>templeton.libjars</name>
- <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar</value>
+ <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value>
          <description>Jars to add to the classpath.</description>
      </property>

@@ -69,6 +69,11 @@
              shipped to the target node in the cluster to execute Pig job which uses
              HCat, Hive query, etc.</description>
      </property>
+
+ <property>
+ <name>templeton.hive.extra.files</name>
+ <value>${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib</value>
+ </property>
      <property>
          <name>templeton.hcat.home</name>
          <value>apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog</value>
@@ -101,7 +106,7 @@
      </property>

      <property>
- <!--\,thrift://127.0.0.1:9933-->
+ <!--\,thrift://127.0.0.1:9933,,hive.execution.engine=tez-->
          <name>templeton.hive.properties</name>
          <value>hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false</value>
      </property>

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/env.sh Mon Mar 30 18:10:29 2015
@@ -36,6 +36,10 @@ if [ -z ${PIG_VERSION} ]; then
    export PIG_VERSION=0.12.2-SNAPSHOT
  fi

+if [ -z ${TEZ_VERSION} ]; then
+ export TEZ_VERSION=0.5.3
+fi
+
  #Root of project source tree
  if [ -z ${PROJ_HOME} ]; then
    export PROJ_HOME=/Users/${USER}/dev/hive
@@ -46,6 +50,7 @@ if [ -z ${HADOOP_HOME} ]; then
    export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION}
  fi

+export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION}
  #Make sure Pig is built for the Hadoop version you are running
  export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build
  #this is part of Pig distribution

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/config/webhcat-default.xml Mon Mar 30 18:10:29 2015
@@ -39,7 +39,7 @@

    <property>
      <name>templeton.libjars</name>
- <value>${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar</value>
+ <value>${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar</value>
      <description>Jars to add to the classpath.</description>
    </property>

@@ -106,7 +106,20 @@
    <property>
      <name>templeton.hive.path</name>
      <value>hive-0.11.0.tar.gz/hive-0.11.0/bin/hive</value>
- <description>The path to the Hive executable.</description>
+ <description>The path to the Hive executable. Applies only if templeton.hive.archive is defined.</description>
+ </property>
+
+ <property>
+ <name>templeton.hive.extra.files</name>
+ <value>/tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib</value>
+ <description>The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH
+ before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized
+ and ./bar/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories,
+ the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to
+ HADOOP_CLASSPATH.
+ This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus
+ each path is interpreted by Generic Option Parser. It can be local or hdfs path.
+ </description>
    </property>

    <property>

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java Mon Mar 30 18:10:29 2015
@@ -154,6 +154,11 @@ public class AppConfig extends Configura
    public static final String HADOOP_MR_AM_JAVA_OPTS = "yarn.app.mapreduce.am.command-opts";
    public static final String HADOOP_MR_AM_MEMORY_MB = "yarn.app.mapreduce.am.resource.mb";
    public static final String UNIT_TEST_MODE = "templeton.unit.test.mode";
+ /**
+ * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in
+ * LaunchMapper before launching Hive command
+ */
+ public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files";


    private static final Log LOG = LogFactory.getLog(AppConfig.class);

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java Mon Mar 30 18:10:29 2015
@@ -27,6 +27,7 @@ import java.util.List;
  import java.util.Map;

  import org.apache.commons.exec.ExecuteException;
+import org.apache.hadoop.fs.Path;
  import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants;
  import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob;
  import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
@@ -117,7 +118,7 @@ public class HiveDelegator extends Launc
    private List<String> makeBasicArgs(String execute, String srcFile, String otherFiles,
                                           String statusdir, String completedUrl,
                                           boolean enablelog)
- throws URISyntaxException, FileNotFoundException, IOException,
+ throws URISyntaxException, IOException,
      InterruptedException
    {
      ArrayList<String> args = new ArrayList<String>();
@@ -142,6 +143,30 @@ public class HiveDelegator extends Launc
        args.add(appConf.hiveArchive());
      }

+ //ship additional artifacts, for example for Tez
+ String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES);
+ if(extras != null && extras.length() > 0) {
+ boolean foundFiles = false;
+ for(int i = 0; i < args.size(); i++) {
+ if(FILES.equals(args.get(i))) {
+ String value = args.get(i + 1);
+ args.set(i + 1, value + "," + extras);
+ foundFiles = true;
+ }
+ }
+ if(!foundFiles) {
+ args.add(FILES);
+ args.add(extras);
+ }
+ String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES);
+ StringBuilder extraFileNames = new StringBuilder();
+ //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH
+ for(String file : extraFiles) {
+ Path p = new Path(file);
+ extraFileNames.append(p.getName()).append(",");
+ }
+ addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, extraFileNames.toString());
+ }
      return args;
    }
  }

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java Mon Mar 30 18:10:29 2015
@@ -28,6 +28,10 @@ public class TempletonDelegator {
     * http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options
     */
    public static final String ARCHIVES = "-archives";
+ /**
+ * http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options
+ */
+ public static final String FILES = "-files";

    protected AppConfig appConf;


Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java Mon Mar 30 18:10:29 2015
@@ -31,6 +31,12 @@ public interface JobSubmissionConstants
    public static final String EXIT_FNAME = "exit";
    public static final int WATCHER_TIMEOUT_SECS = 10;
    public static final int KEEP_ALIVE_MSEC = 60 * 1000;
+ /**
+ * A comma-separated list of files to be added to HADOOP_CLASSPATH in
+ * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}. Used to localize additional
+ * artifacts for job submission requests.
+ */
+ public static final String HADOOP_CLASSPATH_EXTRAS = "templeton.hadoop.classpath.extras";
    /*
     * The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required because
     * org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() prepares

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java Mon Mar 30 18:10:29 2015
@@ -21,6 +21,7 @@ package org.apache.hive.hcatalog.templet
  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
  import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
  import org.apache.hadoop.fs.FileSystem;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.hive.common.classification.InterfaceAudience;
@@ -33,7 +34,6 @@ import org.apache.hadoop.mapreduce.Mappe
  import org.apache.hadoop.security.UserGroupInformation;
  import org.apache.hadoop.util.Shell;
  import org.apache.hadoop.util.StringUtils;
-import org.apache.hive.hcatalog.templeton.AppConfig;
  import org.apache.hive.hcatalog.templeton.BadParam;
  import org.apache.hive.hcatalog.templeton.LauncherDelegator;

@@ -115,6 +115,32 @@ public class LaunchMapper extends Mapper
        }
      }
    }
+ private static void handleHadoopClasspathExtras(Configuration conf, Map<String, String> env)
+ throws IOException {
+ if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS))) {
+ return;
+ }
+ LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + conf.get(HADOOP_CLASSPATH_EXTRAS));
+ String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS);
+ StringBuilder paths = new StringBuilder();
+ FileSystem fs = FileSystem.getLocal(conf);//these have been localized already
+ for(String f : files) {
+ Path p = new Path(f);
+ FileStatus fileStatus = fs.getFileStatus(p);
+ paths.append(f);
+ if(fileStatus.isDirectory()) {
+ paths.append(File.separator).append("*");
+ }
+ paths.append(File.pathSeparator);
+ }
+ paths.setLength(paths.length() - 1);
+ if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) {
+ env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + File.pathSeparator + paths);
+ }
+ else {
+ env.put("HADOOP_CLASSPATH", paths.toString());
+ }
+ }
    protected Process startJob(Context context, String user, String overrideClasspath)
      throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
@@ -135,6 +161,7 @@ public class LaunchMapper extends Mapper
      Map<String, String> env = TempletonUtils.hadoopUserEnv(user, overrideClasspath);
      handlePigEnvVars(conf, env);
      handleSqoop(conf, env);
+ handleHadoopClasspathExtras(conf, env);
      List<String> jarArgsList = new LinkedList<String>(Arrays.asList(jarArgs));
      handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, "mapreduce.job.credentials.binary");
      handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path");

Modified: hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java?rev=1670162&r1=1670161&r2=1670162&view=diff
==============================================================================
--- hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java (original)
+++ hive/trunk/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java Mon Mar 30 18:10:29 2015
@@ -76,32 +76,31 @@ final class TrivialExecService {
      }
    }
    /**
- * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep)
- * time honored tradition in WebHCat of borrowing from Oozie
+ * Print files and directories in current {@code dir}.
     */
- private static void printContentsOfDir(String dir) {
+ private static StringBuilder printContentsOfDir(String dir, int depth, StringBuilder sb) {
+ StringBuilder indent = new StringBuilder();
+ for(int i = 0; i < depth; i++) {
+ indent.append("--");
+ }
      File folder = new File(dir);
- StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n');
+ sb.append(indent).append("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n');

      File[] listOfFiles = folder.listFiles();
+ if(listOfFiles == null) {
+ return sb;
+ }
      for (File fileName : listOfFiles) {
        if (fileName.isFile()) {
- sb.append("File: ").append(fileName.getName()).append('\n');
+ sb.append(indent).append("File: ").append(fileName.getName()).append('\n');
        }
        else if (fileName.isDirectory()) {
- sb.append("Dir: ").append(fileName.getName()).append('\n');
- File subDir = new File(fileName.getName());
- File[] moreFiles = subDir.listFiles();
- for (File subFileName : moreFiles) {
- if (subFileName.isFile()) {
- sb.append("--File: ").append(subFileName.getName()).append('\n');
- }
- else if (subFileName.isDirectory()) {
- sb.append("--Dir: ").append(subFileName.getName()).append('\n');
- }
- }
+ printContentsOfDir(fileName.getName(), depth+1, sb);
        }
      }
- LOG.info(sb.toString());
+ return sb;
+ }
+ private static void printContentsOfDir(String dir) {
+ LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString());
    }
  }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 30, '15 at 6:10p
activeMar 30, '15 at 6:10p
posts1
users1
websitehive.apache.org

1 user in discussion

Ekoifman: 1 post

People

Translate

site design / logo © 2021 Grokbase