On Tue, Mar 26, 2013 at 8:42 AM, RAKESH THERANI wrote:Hey Lenni ,
Thanks for prompt reply .
Configuration: core-default.xml, core-site.xml, mapred-default.xml,
mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml,
hdfs-site.xml
Key Value
dfs.datanode.data.dir file://${hadoop.tmp.dir}/dfs/data
dfs.namenode.checkpoint.txns 40000
s3.replication 3
mapreduce.output.fileoutputformat.compress.type RECORD
mapreduce.jobtracker.jobhistory.lru.cache.size 5
dfs.datanode.failed.volumes.tolerated 0
hadoop.http.filter.initializers
org.apache.hadoop.http.lib.StaticUserWebFilter
mapreduce.cluster.temp.dir ${hadoop.tmp.dir}/mapred/temp
mapreduce.reduce.shuffle.memory.limit.percent 0.25
yarn.nodemanager.keytab /etc/krb5.keytab
dfs.https.server.keystore.resource ssl-server.xml
mapreduce.reduce.skip.maxgroups 0
dfs.domain.socket.path /var/run/hdfs-sockets/dn
hadoop.http.authentication.kerberos.keytab ${user.home}/hadoop.keytab
yarn.nodemanager.localizer.client.thread-count 5
ha.failover-controller.new-active.rpc-timeout.ms 60000
mapreduce.framework.name local
ha.health-monitor.check-interval.ms 1000
io.file.buffer.size 4096
dfs.namenode.checkpoint.period 3600
mapreduce.task.tmp.dir ./tmp
ipc.client.kill.max 10
yarn.resourcemanager.scheduler.class
org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler
mapreduce.jobtracker.taskcache.levels 2
s3.stream-buffer-size 4096
dfs.namenode.secondary.http-address 0.0.0.0:50090
dfs.namenode.decommission.interval 30
dfs.namenode.http-address dcp-xs-hdfs-namenode-01:50070
mapreduce.task.files.preserve.failedtasks false
dfs.encrypt.data.transfer false
dfs.datanode.address 0.0.0.0:50010
hadoop.http.authentication.token.validity 36000
hadoop.security.group.mapping.ldap.search.filter.group (objectClass=group)
dfs.client.failover.max.attempts 15
kfs.client-write-packet-size 65536
yarn.admin.acl *
yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs
86400
dfs.client.failover.connection.retries.on.timeouts 0
mapreduce.map.sort.spill.percent 0.80
file.stream-buffer-size 4096
dfs.webhdfs.enabled false
ipc.client.connection.maxidletime 10000
mapreduce.jobtracker.persist.jobstatus.hours 1
dfs.datanode.ipc.address 0.0.0.0:50020
yarn.nodemanager.address 0.0.0.0:0
yarn.app.mapreduce.am.job.task.listener.thread-count 30
dfs.client.read.shortcircuit true
dfs.namenode.safemode.extension 30000
ha.zookeeper.parent-znode /hadoop-ha
yarn.nodemanager.container-executor.class
org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor
io.skip.checksum.errors false
yarn.resourcemanager.scheduler.client.thread-count 50
hadoop.http.authentication.kerberos.principal HTTP/
[email protected]mapreduce.reduce.log.level INFO
fs.s3.maxRetries 4
hadoop.kerberos.kinit.command kinit
yarn.nodemanager.process-kill-wait.ms 2000
dfs.namenode.name.dir.restore false
mapreduce.jobtracker.handler.count 10
yarn.app.mapreduce.client-am.ipc.max-retries 1
dfs.client.use.datanode.hostname false
hadoop.util.hash.type murmur
io.seqfile.lazydecompress true
dfs.datanode.dns.interface default
yarn.nodemanager.disk-health-checker.min-healthy-disks 0.25
mapreduce.job.maxtaskfailures.per.tracker 3
mapreduce.tasktracker.healthchecker.script.timeout 600000
hadoop.security.group.mapping.ldap.search.attr.group.name cn
fs.df.interval 60000
dfs.namenode.kerberos.internal.spnego.principal
${dfs.web.authentication.kerberos.principal}
mapreduce.job.reduce.shuffle.consumer.plugin.class
org.apache.hadoop.mapreduce.task.reduce.Shuffle
mapreduce.jobtracker.address local
mapreduce.tasktracker.tasks.sleeptimebeforesigkill 5000
dfs.journalnode.rpc-address 0.0.0.0:8485
mapreduce.job.acl-view-job
dfs.client.block.write.replace-datanode-on-failure.policy DEFAULT
dfs.namenode.replication.interval 3
dfs.namenode.num.checkpoints.retained 2
mapreduce.tasktracker.http.address 0.0.0.0:50060
yarn.resourcemanager.scheduler.address 0.0.0.0:8030
dfs.datanode.directoryscan.threads 1
hadoop.security.group.mapping.ldap.ssl false
mapreduce.task.merge.progress.records 10000
dfs.heartbeat.interval 3
net.topology.script.number.args 100
mapreduce.local.clientfactory.class.name
org.apache.hadoop.mapred.LocalClientFactory
dfs.client-write-packet-size 65536
io.native.lib.available true
dfs.client.failover.connection.retries 0
yarn.nodemanager.disk-health-checker.interval-ms 120000
dfs.blocksize 134217728
yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs
86400
mapreduce.jobhistory.webapp.address 0.0.0.0:19888
yarn.resourcemanager.resource-tracker.client.thread-count 50
dfs.blockreport.initialDelay 0
ha.health-monitor.rpc-timeout.ms 45000
mapreduce.reduce.markreset.buffer.percent 0.0
dfs.ha.tail-edits.period 60
mapreduce.admin.user.env
LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native
yarn.resourcemanager.client.thread-count 50
yarn.nodemanager.health-checker.script.timeout-ms 1200000
file.bytes-per-checksum 512
dfs.replication.max 512
dfs.namenode.max.extra.edits.segments.retained 10000
io.map.index.skip 0
mapreduce.task.timeout 600000
dfs.datanode.du.reserved 0
dfs.support.append true
ftp.blocksize 67108864
dfs.client.file-block-storage-locations.num-threads 10
yarn.nodemanager.container-manager.thread-count 20
ipc.server.listen.queue.size 128
yarn.resourcemanager.amliveliness-monitor.interval-ms 1000
hadoop.ssl.hostname.verifier DEFAULT
mapreduce.tasktracker.dns.interface default
hadoop.security.group.mapping.ldap.search.attr.member member
mapreduce.tasktracker.outofband.heartbeat false
mapreduce.job.userlog.retain.hours 24
yarn.nodemanager.resource.memory-mb 8192
dfs.namenode.delegation.token.renew-interval 86400000
hadoop.ssl.keystores.factory.class
org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
dfs.datanode.sync.behind.writes false
dfs.client.read.shortcircuit.skip.checksum false
mapreduce.map.maxattempts 4
dfs.datanode.handler.count 10
hadoop.ssl.require.client.cert false
ftp.client-write-packet-size 65536
ipc.server.tcpnodelay false
mapreduce.reduce.shuffle.retry-delay.max.ms 60000
mapreduce.task.profile.reduces 0-2
ha.health-monitor.connect-retry-interval.ms 1000
hadoop.fuse.connection.timeout 300
dfs.permissions.superusergroup supergroup
mapreduce.jobtracker.jobhistory.task.numberprogresssplits 12
fs.ftp.host.port 21
mapreduce.map.speculative true
dfs.datanode.data.dir.perm 700
mapreduce.client.submit.file.replication 10
s3native.blocksize 67108864
mapreduce.job.ubertask.maxmaps 9
dfs.namenode.replication.min 1
mapreduce.cluster.acls.enabled false
hadoop.security.uid.cache.secs 14400
yarn.nodemanager.localizer.fetch.thread-count 4
map.sort.class org.apache.hadoop.util.QuickSort
fs.trash.checkpoint.interval 0
dfs.namenode.name.dir file://${hadoop.tmp.dir}/dfs/name
yarn.app.mapreduce.am.staging-dir /tmp/hadoop-yarn/staging
fs.AbstractFileSystem.file.impl org.apache.hadoop.fs.local.LocalFs
yarn.nodemanager.env-whitelist
JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME
dfs.image.compression.codec org.apache.hadoop.io.compress.DefaultCodec
mapreduce.job.reduces 1
mapreduce.job.complete.cancel.delegation.tokens true
hadoop.security.group.mapping.ldap.search.filter.user
(&(objectClass=user)(sAMAccountName={0}))
yarn.nodemanager.sleep-delay-before-sigkill.ms 250
mapreduce.tasktracker.healthchecker.interval 60000
mapreduce.jobtracker.heartbeats.in.second 100
hadoop.security.auth_to_local DEFAULT
kfs.bytes-per-checksum 512
mapreduce.jobtracker.persist.jobstatus.dir /jobtracker/jobsInfo
dfs.namenode.backup.http-address 0.0.0.0:50105
hadoop.rpc.protection authentication
dfs.namenode.https-address dcp-xs-hdfs-namenode-01:50470
ftp.stream-buffer-size 4096
dfs.ha.log-roll.period 120
yarn.resourcemanager.admin.client.thread-count 1
file.client-write-packet-size 65536
hadoop.http.authentication.simple.anonymous.allowed true
yarn.nodemanager.log.retain-seconds 10800
dfs.datanode.drop.cache.behind.reads false
dfs.image.transfer.bandwidthPerSec 0
ha.failover-controller.cli-check.rpc-timeout.ms 20000
mapreduce.tasktracker.instrumentation
org.apache.hadoop.mapred.TaskTrackerMetricsInst
io.mapfile.bloom.size 1048576
dfs.ha.fencing.ssh.connect-timeout 30000
s3.bytes-per-checksum 512
fs.automatic.close true
fs.trash.interval 1
hadoop.security.authentication simple
fs.defaultFS hdfs://dcp-xs-hdfs-namenode-01:8020
hadoop.ssl.server.conf ssl-server.xml
ipc.client.connect.max.retries 10
yarn.resourcemanager.delayed.delegation-token.removal-interval-ms
30000
dfs.journalnode.http-address 0.0.0.0:8480
mapreduce.jobtracker.taskscheduler
org.apache.hadoop.mapred.JobQueueTaskScheduler
mapreduce.job.speculative.speculativecap 0.1
yarn.am.liveness-monitor.expiry-interval-ms 600000
mapreduce.output.fileoutputformat.compress false
net.topology.node.switch.mapping.impl
org.apache.hadoop.net.ScriptBasedMapping
dfs.namenode.replication.considerLoad true
dfs.namenode.audit.loggers default
mapreduce.job.counters.max 120
yarn.resourcemanager.address 0.0.0.0:8032
dfs.client.block.write.retries 3
yarn.resourcemanager.nm.liveness-monitor.interval-ms 1000
io.map.index.interval 128
mapred.child.java.opts -Xmx200m
mapreduce.tasktracker.local.dir.minspacestart 0
mapreduce.client.progressmonitor.pollinterval 1000
dfs.client.https.keystore.resource ssl-client.xml
mapreduce.jobtracker.tasktracker.maxblacklists 4
mapreduce.job.queuename default
yarn.nodemanager.localizer.address 0.0.0.0:8040
io.mapfile.bloom.error.rate 0.005
mapreduce.job.split.metainfo.maxsize 10000000
yarn.nodemanager.delete.thread-count 4
ipc.client.tcpnodelay false
dfs.https.port 50470
yarn.app.mapreduce.am.resource.mb 1536
dfs.datanode.dns.nameserver default
mapreduce.map.output.compress.codec
org.apache.hadoop.io.compress.DefaultCodec
dfs.namenode.accesstime.precision 3600000
mapreduce.map.log.level INFO
io.seqfile.compress.blocksize 1000000
mapreduce.tasktracker.taskcontroller
org.apache.hadoop.mapred.DefaultTaskController
hadoop.security.groups.cache.secs 300
mapreduce.job.end-notification.max.attempts 5
yarn.nodemanager.webapp.address 0.0.0.0:8042
mapreduce.jobtracker.expire.trackers.interval 600000
yarn.resourcemanager.webapp.address 0.0.0.0:8088
yarn.nodemanager.health-checker.interval-ms 600000
hadoop.security.authorization false
mapreduce.job.map.output.collector.class
org.apache.hadoop.mapred.MapTask$MapOutputBuffer
fs.ftp.host 0.0.0.0
yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms 1000
mapreduce.ifile.readahead true
ha.zookeeper.session-timeout.ms 5000
mapreduce.tasktracker.taskmemorymanager.monitoringinterval 5000
mapreduce.reduce.shuffle.parallelcopies 5
mapreduce.map.skip.maxrecords 0
dfs.https.enable false
mapreduce.reduce.shuffle.read.timeout 180000
mapreduce.output.fileoutputformat.compress.codec
org.apache.hadoop.io.compress.DefaultCodec
mapreduce.jobtracker.instrumentation
org.apache.hadoop.mapred.JobTrackerMetricsInst
yarn.nodemanager.remote-app-log-dir-suffix logs
dfs.blockreport.intervalMsec 21600000
mapreduce.reduce.speculative true
mapreduce.jobhistory.keytab /etc/security/keytab/jhs.service.keytab
dfs.datanode.balance.bandwidthPerSec 1048576
file.blocksize 67108864
yarn.resourcemanager.admin.address 0.0.0.0:8033
yarn.resourcemanager.resource-tracker.address 0.0.0.0:8031
mapreduce.tasktracker.local.dir.minspacekill 0
mapreduce.jobtracker.staging.root.dir ${hadoop.tmp.dir}/mapred/staging
mapreduce.jobtracker.retiredjobs.cache.size 1000
ipc.client.connect.max.retries.on.timeouts 45
ha.zookeeper.acl world:anyone:rwcda
yarn.nodemanager.local-dirs ${hadoop.tmp.dir}/nm-local-dir
mapreduce.reduce.shuffle.connect.timeout 180000
dfs.block.access.key.update.interval 600
dfs.block.access.token.lifetime 600
mapreduce.job.end-notification.retry.attempts 5
mapreduce.jobtracker.system.dir ${hadoop.tmp.dir}/mapred/system
yarn.nodemanager.admin-env MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX
mapreduce.jobtracker.jobhistory.block.size 3145728
yarn.log-aggregation.retain-seconds -1
mapreduce.tasktracker.indexcache.mb 10
dfs.namenode.checkpoint.check.period 60
dfs.client.block.write.replace-datanode-on-failure.enable true
dfs.datanode.directoryscan.interval 21600
yarn.nodemanager.container-monitor.interval-ms 3000
dfs.default.chunk.view.size 32768
mapreduce.job.speculative.slownodethreshold 1.0
mapreduce.job.reduce.slowstart.completedmaps 0.05
hadoop.security.instrumentation.requires.admin false
dfs.namenode.safemode.min.datanodes 0
hadoop.http.authentication.signature.secret.file
${user.home}/hadoop-http-auth-signature-secret
mapreduce.reduce.maxattempts 4
yarn.nodemanager.localizer.cache.target-size-mb 10240
s3native.replication 3
dfs.datanode.https.address 0.0.0.0:50475
mapreduce.reduce.skip.proc.count.autoincr true
file.replication 1
hadoop.hdfs.configuration.version 1
ipc.client.idlethreshold 4000
hadoop.tmp.dir /tmp/hadoop-${user.name}
mapreduce.jobhistory.address 0.0.0.0:10020
mapreduce.jobtracker.restart.recover false
mapreduce.cluster.local.dir ${hadoop.tmp.dir}/mapred/local
yarn.ipc.serializer.type protocolbuffers
dfs.namenode.decommission.nodes.per.interval 5
dfs.namenode.delegation.key.update-interval 86400000
fs.s3.buffer.dir ${hadoop.tmp.dir}/s3
dfs.namenode.support.allow.format true
yarn.nodemanager.remote-app-log-dir /tmp/logs
hadoop.work.around.non.threadsafe.getpwuid false
dfs.ha.automatic-failover.enabled false
mapreduce.jobtracker.persist.jobstatus.active true
dfs.namenode.logging.level info
yarn.nodemanager.log-dirs ${yarn.log.dir}/userlogs
ha.health-monitor.sleep-after-disconnect.ms 1000
dfs.namenode.checkpoint.edits.dir ${dfs.namenode.checkpoint.dir}
hadoop.rpc.socket.factory.class.default
org.apache.hadoop.net.StandardSocketFactory
yarn.resourcemanager.keytab /etc/krb5.keytab
dfs.datanode.http.address 0.0.0.0:50075
mapreduce.task.profile false
dfs.namenode.edits.dir ${dfs.namenode.name.dir}
hadoop.fuse.timer.period 5
mapreduce.map.skip.proc.count.autoincr true
fs.AbstractFileSystem.viewfs.impl org.apache.hadoop.fs.viewfs.ViewFs
mapreduce.job.speculative.slowtaskthreshold 1.0
s3native.stream-buffer-size 4096
yarn.nodemanager.delete.debug-delay-sec 0
dfs.secondary.namenode.kerberos.internal.spnego.principal
${dfs.web.authentication.kerberos.principal}
dfs.namenode.safemode.threshold-pct 0.999f
mapreduce.ifile.readahead.bytes 4194304
yarn.scheduler.maximum-allocation-mb 8192
s3native.bytes-per-checksum 512
mapreduce.job.committer.setup.cleanup.needed true
kfs.replication 3
yarn.nodemanager.log-aggregation.compression-type none
hadoop.http.authentication.type simple
dfs.client.failover.sleep.base.millis 500
yarn.nodemanager.heartbeat.interval-ms 1000
hadoop.jetty.logs.serve.aliases true
ha.failover-controller.graceful-fence.rpc-timeout.ms 5000
mapreduce.reduce.shuffle.input.buffer.percent 0.70
dfs.datanode.max.transfer.threads 4096
mapreduce.task.io.sort.mb 100
mapreduce.reduce.merge.inmem.threshold 1000
dfs.namenode.handler.count 10
hadoop.ssl.client.conf ssl-client.xml
yarn.resourcemanager.container.liveness-monitor.interval-ms 600000
mapreduce.client.completion.pollinterval 5000
yarn.nodemanager.vmem-pmem-ratio 2.1
yarn.app.mapreduce.client.max-retries 3
hadoop.ssl.enabled false
fs.AbstractFileSystem.hdfs.impl org.apache.hadoop.fs.Hdfs
mapreduce.tasktracker.reduce.tasks.maximum 2
mapreduce.reduce.input.buffer.percent 0.0
kfs.stream-buffer-size 4096
dfs.namenode.invalidate.work.pct.per.iteration 0.32f
yarn.app.mapreduce.am.command-opts -Xmx1024m
dfs.bytes-per-checksum 512
dfs.replication 3
mapreduce.shuffle.ssl.file.buffer.size 65536
dfs.permissions.enabled true
mapreduce.jobtracker.maxtasks.perjob -1
dfs.datanode.use.datanode.hostname false
mapreduce.task.userlog.limit.kb 0
dfs.namenode.fs-limits.max-directory-items 0
s3.client-write-packet-size 65536
dfs.client.failover.sleep.max.millis 15000
mapreduce.job.maps 2
dfs.namenode.fs-limits.max-component-length 0
mapreduce.map.output.compress false
s3.blocksize 67108864
dfs.namenode.edits.journal-plugin.qjournal
org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager
kfs.blocksize 67108864
dfs.client.https.need-auth false
yarn.scheduler.minimum-allocation-mb 1024
ftp.replication 3
mapreduce.input.fileinputformat.split.minsize 0
fs.s3n.block.size 67108864
yarn.ipc.rpc.class org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
dfs.namenode.num.extra.edits.retained 1000000
hadoop.http.staticuser.user dr.who
yarn.nodemanager.localizer.cache.cleanup.interval-ms 600000
mapreduce.job.jvm.numtasks 1
mapreduce.task.profile.maps 0-2
mapreduce.shuffle.port 8080
mapreduce.reduce.shuffle.merge.percent 0.66
mapreduce.jobtracker.http.address 0.0.0.0:50030
mapreduce.task.skip.start.attempts 2
mapreduce.task.io.sort.factor 10
dfs.namenode.checkpoint.dir file://${hadoop.tmp.dir}/dfs/namesecondary
tfile.fs.input.buffer.size 262144
tfile.io.chunk.size 1048576
fs.s3.block.size 67108864
io.serializations
org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization
yarn.resourcemanager.max-completed-applications 10000
mapreduce.jobhistory.principal jhs/
[email protected]mapreduce.job.end-notification.retry.interval 1
dfs.namenode.backup.address 0.0.0.0:50100
dfs.block.access.token.enable false
io.seqfile.sorter.recordlimit 1000000
s3native.client-write-packet-size 65536
ftp.bytes-per-checksum 512
hadoop.security.group.mapping
org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback
dfs.client.domain.socket.data.traffic false
dfs.client.file-block-storage-locations.timeout 500
mapreduce.job.end-notification.max.retry.interval 5
yarn.acl.enable true
yarn.nm.liveness-monitor.expiry-interval-ms 600000
mapreduce.tasktracker.map.tasks.maximum 2
dfs.namenode.max.objects 0
dfs.namenode.delegation.token.max-lifetime 604800000
mapreduce.job.hdfs-servers ${fs.defaultFS}
yarn.application.classpath
$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$YARN_HOME/share/hadoop/yarn/*,$YARN_HOME/share/hadoop/yarn/lib/*,$YARN_HOME/share/hadoop/mapreduce/*,$YARN_HOME/share/hadoop/mapreduce/lib/*
mapreduce.tasktracker.dns.nameserver default
dfs.datanode.hdfs-blocks-metadata.enabled true
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
dfs.datanode.readahead.bytes 4193404
mapreduce.job.ubertask.maxreduces 1
dfs.image.compress false
mapreduce.shuffle.ssl.enabled false
yarn.log-aggregation-enable false
mapreduce.tasktracker.report.address 127.0.0.1:0
mapreduce.tasktracker.http.threads 40
dfs.stream-buffer-size 4096
tfile.fs.output.buffer.size 262144
fs.permissions.umask-mode 022
yarn.resourcemanager.am.max-retries 1
ha.failover-controller.graceful-fence.connection.retries 1
dfs.datanode.drop.cache.behind.writes false
mapreduce.job.ubertask.enable false
hadoop.common.configuration.version 0.23.0
dfs.namenode.replication.work.multiplier.per.iteration 2
mapreduce.job.acl-modify-job
io.seqfile.local.dir ${hadoop.tmp.dir}/io/local
fs.s3.sleepTimeSeconds 10
mapreduce.client.output.filter FAILED
Command-line Flags
--dump_ir=false
--module_output=
--be_port=22000
--classpath=
--hostname=dcp-xs-hdfs-datanode-02
--ipaddress=172.16.3.154
--keytab_file=
--planservice_host=localhost
--planservice_port=20000
--principal=
--max_row_batches=0
--randomize_scan_ranges=false
--num_disks=0
--num_threads_per_disk=1
--read_size=8388608
--enable_webserver=true
--use_statestore=true
--nn=dcp-xs-hdfs-namenode-01
--nn_port=8020
--serialize_batch=false
--status_report_interval=5
--abort_on_config_error=true
--be_service_threads=64
--beeswax_port=21000
--default_query_options=
--fe_service_threads=64
--heap_profile_dir=
--hs2_port=21050
--load_catalog_at_startup=false
--log_mem_usage_interval=0
--mem_limit=-1
--query_log_size=25
--use_planservice=false
--statestore_subscriber_timeout_seconds=10
--state_store_host=dcp-xs-hdfs-namenode-01
--state_store_port=24000
--state_store_subscriber_port=23000
--kerberos_reinit_interval=60
--sasl_path=/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2
--web_log_bytes=1048576
--log_filename=impalad
--rpc_cnxn_attempts=10
--rpc_cnxn_retry_interval_ms=2000
--enable_webserver_doc_root=true
--webserver_doc_root=/usr/lib/impala
--webserver_interface=
--webserver_port=25000
--flagfile=/run/cloudera-scm-agent/process/93-impala-IMPALAD/impala-conf/impalad_flags
--fromenv=
--tryfromenv=
--undefok=
--tab_completion_columns=80
--tab_completion_word=
--help=false
--helpfull=false
--helpmatch=
--helpon=
--helppackage=false
--helpshort=false
--helpxml=false
--version=false
--alsologtoemail=
--alsologtostderr=false
--drop_log_memory=true
--log_backtrace_at=
--log_dir=/var/log/impalad
--log_link=
--log_prefix=true
--logbuflevel=-1
--logbufsecs=30
--logemaillevel=999
--logmailer=/bin/mail
--logtostderr=false
--max_log_size=200
--minloglevel=0
--stderrthreshold=2
--stop_logging_if_full_disk=false
--symbolize_stacktrace=true
--v=1
--vmodule=
Regards,
Rakesh T.
On Tue, Mar 26, 2013 at 9:05 PM, Lenni Kuff wrote:Thanks Rakesh,
It would help to gather a little more configuration info about your setup.
Can you please send me the output from the "varz" tab of the impalad debug
webpage? You can access this page on any one of the hosts Impala is running
on by going to:
http://<hostname>:25000/varzFor example:
http://dcp-xs-hdfs-datanode-01:25000/varzThanks,
Lenni
On Tue, Mar 26, 2013 at 8:17 AM, RAKESH THERANI <
[email protected]>
wrote:
HI Lenni,
Thanks for reply . (dfs.client.read.shortcircuit) is enabled in CM.
Versions are CDH4.2+ and CM4.5+ .
I am able to start impala-shell on data nodes.
I am creating table on name node in Hive .
On data nodes i am able to access table created on name nodes via
impala-shell.
I am running following commands on data nodes :
[email protected]: impala-shell
[Not connected] > connect dcp-xs-hdfs-datanode-01;
Connected to dcp-xs-hdfs-datanode-01:21000
[dcp-xs-hdfs-datanode-01:21000] >
Regards,
Rakesh T.
On Tue, Mar 26, 2013 at 8:30 PM, Lenni Kuff wrote:
Thanks for the information Rakesh. It looks like your
DataNode/hdfs-site.xml
configuration is correct so the configuration problem appears to be on
the
Impala service side.
In CM, can you go to the Impala service configuration page. On the
left
pane
select: "Service Wide" -> "Performance". You should be presented with
a
checkbox for "Enable HDFS Short Circuit Read
(dfs.client.read.shortcircuit)". If this is not enabled, check the box
and
restart the Impala service.
Could you also confirm the CM and CDH version you are running? Keep in
mind
Impala currently only support CDH4.2+ and CM4.5+.
Thanks,
Lenni
On Tue, Mar 26, 2013 at 3:24 AM, RAKESH THERANI
<
[email protected]>
wrote:
Hi ,
Below given is configuration on datanodes in hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera CM on 2013-03-14T10:34:05.989Z-->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///dfs/nn</value>
</property>
<property>
<name>dfs.https.address</name>
<value>dcp-xs-hdfs-namenode-01:50470</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>dcp-xs-hdfs-namenode-01:50070</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/run/hdfs-socket/dn._PORT</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>
<property>
<name>dfs.client.domain.socket.data.traffic</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
</configuration>
Kindly help me on this.
Regards,
Rakesh T.
On Tue, Mar 26, 2013 at 3:23 PM, RAKESH THERANI
wrote:
Hi Lenny ,
I have made changes in hdfs-site.xml as per given in following
link .
After executing impalad command on datanodes it gives following
error
impalad
log4j:WARN No appenders could be found for logger
(org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See
http://logging.apache.org/log4j/1.2/faq.html#noconfigfor more info.
E0326 04:51:05.750805 14391 impala-server.cc:573] ERROR:
short-circuit
local reads is disabled because
- dfs.client.read.shortcircuit is not enabled.
E0326 04:51:05.751220 14391 impala-server.cc:575] Impala is aborted
due to improper configurations.
still i can run impala-shell on data nodes
Regards,
Rakesh T.
On Mon, Mar 25, 2013 at 10:39 PM, Lenni Kuff <
[email protected]>
wrote:
Hi Rakesh,
From the error you are getting it appears Impala is not configured
properly
to use direct reads. Please take a look at the following page for
details on
how to properly configure the Impala services:
https://ccp.cloudera.com/display/IMPALA10BETADOC/Configuring+Impala+for+PerformanceAfter making the changes on that page, restart the Impala services
and
you
should be good to go. Let us know if you are still having trouble
after
updating the configuration.
Thanks,
Lenni
Software Engineer - Cloudera
On Mon, Mar 25, 2013 at 8:41 AM, RAKESH THERANI
<
[email protected]>
wrote:
Hi Lenni,
After executing following commands its gives output as :
On name node :
ps aux | grep impala
root 10309 0.0 0.0 9384 932 pts/1 S+ 10:37 0:00
grep
--color=auto impala
impala 16763 0.0 0.0 233852 720 ? Sl Mar14 9:54
/usr/lib/impala/sbin/statestored
--flagfile=/run/cloudera-scm-agent/process/32-impala-STATESTORE/impala-conf/state_store_flags
impala 16773 0.4 31.2 3200024 1264052 ? Sl Mar14 65:30
/usr/lib/impala/sbin/impalad
--flagfile=/run/cloudera-scm-agent/process/31-impala-IMPALAD/impala-conf/impalad_flags
On Data node :
ps aux | grep impala
impala 28640 0.5 65.4 4453024 2644800 ? Sl Mar19 52:32
/usr/lib/impala/sbin/impalad
--flagfile=/run/cloudera-scm-agent/process/29-impala-IMPALAD/impala-conf/impalad_flags
root 62849 0.0 0.0 9384 932 pts/1 S+ 10:37 0:00
grep
--color=auto impala
When i am trying to start impalad service on datanode it gives
following
error:
impalad
log4j:WARN No appenders could be found for logger
(org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See
http://logging.apache.org/log4j/1.2/faq.html#noconfigfor more info.
E0325 10:36:26.031052 62803 impala-server.cc:573] ERROR:
short-circuit
local reads is disabled because
- dfs.client.read.shortcircuit is not enabled.
E0325 10:36:26.031447 62803 impala-server.cc:575] Impala is
aborted
due to improper configurations.
But if i am running impala-shell on data nodes Its get connected
impala-shell
Welcome to the Impala shell. Press TAB twice to see a list of
available commands.
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
(Build version: Impala v0.6 (720f93c) built on Sat Feb 23
18:52:43
PST
2013)
[Not connected] >
[Not connected] >
[Not connected] >
Can you please help me on this?
Regards,
Rakesh T.
On Fri, Mar 22, 2013 at 8:42 PM, Lenni Kuff <
[email protected]wrote:
Hi Rakesh,
You would not connect to a DataNode or the NameNode, you would
connect
to an
impalad service. Every impalad service provides a frontend
interface
that
you can connect to and submit queries. It shouldn't matter
which
impalad
in
your cluster you use, just type "connect <hostname>:21000" in
the
impala-shell where <hostname> is a machine running an impalad.
As a general cluster configuration note - we recommend you run
the
NameNode
service on a separate machine than DataNodes and
impalad/statestored
instances due to the high memory requirements it has.
Thanks,
Lenni
Software Engineer - Cloudera
On Fri, Mar 22, 2013 at 1:56 AM, RAKESH THERANI
<
[email protected]>
wrote:
HI Vikas ,
I am connecting using impala shell . My queries are properly
executing
on hive as well in impala-shell.
My question is by executing impala-shell do i need to connect
to
datanode or namenode ?
Regards,
Rakesh T
On Thu, Mar 21, 2013 at 10:07 PM, Vikas Singh
<
[email protected]>
wrote:
Hi Rakesh,
From the prompt of the shell "[Not connected] > " it seems
like
you
are
not
connected to any impalad. You need to execute the "connect"
command
to
connect to an impalad instance and then execute your query
(please
type
"help" on impala-shell prompt to get more information on
various
supported
commands).
But before doing that, please confirm that you can execute
the
same
query
using Hive shell to ensure that hive is setup correctly.
Vikas
On Thu, Mar 21, 2013 at 1:56 AM, RAKESH THERANI
<
[email protected]>
wrote:
HI Alan ,
I have installed CDH4 and have installed impala 0.6 using
automated
installation of cloudera manager.
I have created a setup of 1 name node and 4 data nodes. I
have
created
table on name node which is consuming approx 15G of space
on
name
node. All data from name node is replicated on all data
nodes
in
cluster.
I am running impala shell on data nodes and connecting to
same
datanodes.
I just wanted to know how impala distributively process the
data?
Regards
Rakesh T.
On Thu, Mar 21, 2013 at 8:22 AM, Alan <
[email protected]>
wrote:
Hi Rakesh,
Can you tell us a bit more about the problem? Where you
able
to
execute
any
query at all (such as "select 1")? How did you install
impala
(which
version)? Did you see anything in the log?
Thanks,
Alan
On Monday, March 18, 2013 2:17:27 AM UTC-7, RAKESH
THERANI
wrote:
HI ,
While executing query on datanode , i m getting
following
error
.
Query: select count(1) from test_data
Error communicating with impalad: TSocket read 0 bytes
[Not connected] >
Regards,
Rakesh T.