hadoopspring-dataspring-integrationspring-data-hadoop

Spring data - hadoop connectivity


I'm trying out Spring Data - Hadoop for executing the MR code on a remote cluster from my local machine's IDE

Hadoop 1.1.2, Spring 3.2.4, Spring-Data-Hadoop 1.0.0

My bean configuration file viz. applicationContext.xml is as follows :

        <?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hdp="http://www.springframework.org/schema/hadoop"
    xmlns:context="http://www.springframework.org/schema/context"
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd 
    http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd
    http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.2.xsd">

    <context:property-placeholder location="resources/hadoop.properties" />

    <hdp:configuration file-system-uri="${hd.fs}" job-tracker-uri="${hd.jobtracker.uri}">

    </hdp:configuration>

    <hdp:job id="wc-job" mapper="com.hadoop.basics.WordCounter.WCMapper"
        reducer="com.hadoop.basics.WordCounter.WCReducer" input-path="${wordcount.input.path}"
        output-path="${wordcount.output.path}" user="bigdata">
    </hdp:job>

    <hdp:job-runner id="myjobs-runner" job-ref="wc-job"
        run-at-startup="true" />

    <hdp:resource-loader id="resourceLoader" uri="${hd.fs}"
        user="bigdata" />   
</beans>

hadoop.properties

hd.fs=hdfs://cloudx-843-770:9000
hd.jobtracker.uri=cloudx-843-770:9001

wordcount.input.path=/scratchpad/input/Childhood_days.txt
wordcount.output.path=/scratchpad/output

The java class which I'm doing 'Run as ...'

package com.hadoop.basics;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.springframework.context.support.AbstractApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class WordCounter {

    private static IntWritable one = new IntWritable(1);

    public class WCMapper extends Mapper<Text, Text, Text, IntWritable> {

        @Override
        protected void map(
                Text key,
                Text value,
                org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            StringTokenizer strTokenizer = new StringTokenizer(value.toString());
            Text token = new Text();

            while (strTokenizer.hasMoreTokens()) {
                token.set(strTokenizer.nextToken());
                context.write(token, one);
            }

        }
    }

    public class WCReducer extends
            Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(
                Text key,
                Iterable<IntWritable> values,
                org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub

            int sum = 0;

            for (IntWritable value : values) {
                sum += value.get();
            }

            context.write(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) {
        AbstractApplicationContext context = new ClassPathXmlApplicationContext(
                "applicationContext.xml", WordCounter.class);
        System.out.println("Word Count Application Running");
        context.registerShutdownHook();
    }
}

The output is :

Aug 22, 2013 9:59:02 AM org.springframework.context.support.AbstractApplicationContext prepareRefresh
INFO: Refreshing org.springframework.context.support.ClassPathXmlApplicationContext@1815338: startup date [Thu Aug 22 09:59:02 IST 2013]; root of context hierarchy
Aug 22, 2013 9:59:03 AM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions
INFO: Loading XML bean definitions from class path resource [com/hadoop/basics/applicationContext.xml]
Aug 22, 2013 9:59:03 AM org.springframework.core.io.support.PropertiesLoaderSupport loadProperties
INFO: Loading properties file from class path resource [resources/hadoop.properties]
Aug 22, 2013 9:59:03 AM org.springframework.beans.factory.support.DefaultListableBeanFactory preInstantiateSingletons
INFO: Pre-instantiating singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Aug 22, 2013 9:59:03 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
INFO: Starting job [wc-job]
Aug 22, 2013 9:59:03 AM org.apache.hadoop.security.UserGroupInformation doAs
SEVERE: PriviledgedActionException as:bigdata via 298790 cause:org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
Aug 22, 2013 9:59:03 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
WARNING: Cannot start job [wc-job]
org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.apache.hadoop.ipc.Client.call(Client.java:1107)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
    at org.apache.hadoop.mapred.$Proxy2.getProtocolVersion(Unknown Source)
    at org.apache.hadoop.ipc.RPC.getProxy(RPC.java:411)
    at org.apache.hadoop.mapred.JobClient.createRPCProxy(JobClient.java:499)
    at org.apache.hadoop.mapred.JobClient.init(JobClient.java:490)
    at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:473)
    at org.apache.hadoop.mapreduce.Job$1.run(Job.java:513)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapreduce.Job.connect(Job.java:511)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:499)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:197)
    at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160)
    at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52)
    at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
    at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295)
    at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
    at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292)
    at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194)
    at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628)
    at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932)
    at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158)
    at com.hadoop.basics.WordCounter.main(WordCounter.java:58)

Aug 22, 2013 9:59:03 AM org.springframework.beans.factory.support.DefaultSingletonBeanRegistry destroySingletons
INFO: Destroying singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Exception in thread "main" org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'myjobs-runner': Invocation of init method failed; nested exception is java.lang.IllegalStateException: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1482)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
    at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295)
    at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
    at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292)
    at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194)
    at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628)
    at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932)
    at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158)
    at com.hadoop.basics.WordCounter.main(WordCounter.java:58)
Caused by: java.lang.IllegalStateException: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:209)
    at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160)
    at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52)
    at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479)
    ... 13 more
Caused by: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.apache.hadoop.ipc.Client.call(Client.java:1107)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
    at org.apache.hadoop.mapred.$Proxy2.getProtocolVersion(Unknown Source)
    at org.apache.hadoop.ipc.RPC.getProxy(RPC.java:411)
    at org.apache.hadoop.mapred.JobClient.createRPCProxy(JobClient.java:499)
    at org.apache.hadoop.mapred.JobClient.init(JobClient.java:490)
    at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:473)
    at org.apache.hadoop.mapreduce.Job$1.run(Job.java:513)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapreduce.Job.connect(Job.java:511)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:499)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:197)
    ... 20 more

As is obvious, the user 298790(my local Windows machine user) is not recognized on the cluster - that's why in the config. file

  1. I specified user="bigdata" in the job's configuration as mentioned in the doc.
  2. The doc. also mentions :

SHDP obeys the HDFS permissions, using the identity of the current user (by default) for interacting with the file system. In particular, the HdfsResourceLoader considers when doing pattern matching, only the files that its suppose to see and does not perform any privileged action. It is possible however to specify a different user, meaning the ResourceLoader interacts with HDFS using that user's rights - however this obeys the user impersonation rules As per the api, I decided to use HdfsResourceLoader but couldn't find any example or even configuration in the documentation - can anyone provide any pointers?

  1. As per Hadoop Secure Impersonation, I believe that I need to add my Windows user 298790 to the remote cluster machine(Ubuntu)user's group and also my Windows host name which I find infeasible in case of large no. of users and changing Windows client machines. In case my assumption is correct, what can be done to avoid adding and configuring all these users?

/Adding the changes to core-site.xml/

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>fs.default.name</name>
<value>hdfs://cloudx-843-770:9000</value>
</property>

<property>
<name>hadoop.proxyuser.298790.groups</name>
<value>bigdata</value>
<description>Allow the superuser bigdatato impersonate any members of the group bigdata</description>
</property>

<property>
<name>hadoop.proxyuser.298790.hosts</name>
<value>*</value>
<description>The superuser can connect only from INFVA03351 to impersonate a user</description>
</property>

</configuration>

I restarted all the Hadoop processes but the error persists.

Then, I decided to create a new user viz. 298790 on the remote Ubuntu machine and add it to the group bigdata for impersonation purposes :

    root@cloudx-843-770:/home/bigdata# useradd -G bigdata 298790
    root@cloudx-843-770:/home/bigdata#
    root@cloudx-843-770:/home/bigdata#
    root@cloudx-843-770:/home/bigdata# usermod -G bigdata 298790
    root@cloudx-843-770:/home/bigdata#
    root@cloudx-843-770:/home/bigdata# su 298790
    $ groups
    298790 bigdata
root@cloudx-843-770:/home/bigdata#
root@cloudx-843-770:/home/bigdata# cat /etc/passwd
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/bin/sh
bin:x:2:2:bin:/bin:/bin/sh
sys:x:3:3:sys:/dev:/bin/sh
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/bin/sh
man:x:6:12:man:/var/cache/man:/bin/sh
lp:x:7:7:lp:/var/spool/lpd:/bin/sh
mail:x:8:8:mail:/var/mail:/bin/sh
news:x:9:9:news:/var/spool/news:/bin/sh
uucp:x:10:10:uucp:/var/spool/uucp:/bin/sh
proxy:x:13:13:proxy:/bin:/bin/sh
www-data:x:33:33:www-data:/var/www:/bin/sh
backup:x:34:34:backup:/var/backups:/bin/sh
list:x:38:38:Mailing List Manager:/var/list:/bin/sh
irc:x:39:39:ircd:/var/run/ircd:/bin/sh
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/bin/sh
nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
libuuid:x:100:101::/var/lib/libuuid:/bin/sh
syslog:x:101:103::/home/syslog:/bin/false
mysql:x:102:105:MySQL Server,,,:/nonexistent:/bin/false
messagebus:x:103:106::/var/run/dbus:/bin/false
whoopsie:x:104:107::/nonexistent:/bin/false
landscape:x:105:110::/var/lib/landscape:/bin/false
sshd:x:106:65534::/var/run/sshd:/usr/sbin/nologin
tomcat6:x:107:113::/usr/share/tomcat6:/bin/false
coesystem:x:1000:1000:coesystem,,,:/home/coesystem:/bin/bash
hpcc:x:999:1001:hpcc Runtime User:/home/hpcc:/bin/sh
hduser:x:1001:1002:hduser,1,1,1,1:/home/hduser:/bin/bash
bigdata:x:1002:1003:Big Data,1,1,1,1:/home/bigdata:/bin/bash
298790:x:1003:1004::/home/298790:/bin/sh

But now when I attempt to stop(and then start) the cluster, it asks for password for all processes :

bigdata@cloudx-843-770:~/hadoop_ecosystem/apache_hadoop/hadoop-1.1.2/bin$ stop-all.sh
Warning: $HADOOP_HOME is deprecated.

stopping jobtracker
bigdata@localhost's password:
localhost: stopping tasktracker
stopping namenode
bigdata@localhost's password:
localhost: stopping datanode
bigdata@localhost's password:
localhost: stopping secondarynamenode

And now the error is slightly modified - it first fails to connect and then to impersonate :

Aug 22, 2013 5:14:17 PM org.springframework.context.support.AbstractApplicationContext prepareRefresh
INFO: Refreshing org.springframework.context.support.ClassPathXmlApplicationContext@922804: startup date [Thu Aug 22 17:14:17 IST 2013]; root of context hierarchy
Aug 22, 2013 5:14:17 PM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions
INFO: Loading XML bean definitions from class path resource [com/hadoop/basics/applicationContext.xml]
Aug 22, 2013 5:14:17 PM org.springframework.core.io.support.PropertiesLoaderSupport loadProperties
INFO: Loading properties file from class path resource [resources/hadoop.properties]
Aug 22, 2013 5:14:17 PM org.springframework.beans.factory.support.DefaultListableBeanFactory preInstantiateSingletons
INFO: Pre-instantiating singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Aug 22, 2013 5:14:18 PM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
INFO: Starting job [wc-job]
Aug 22, 2013 5:14:20 PM org.apache.hadoop.ipc.Client$Connection handleConnectionFailure
INFO: Retrying connect to server: cloudx-843-770/172.25.37.135:9001. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
Aug 22, 2013 5:14:22 PM org.apache.hadoop.ipc.Client$Connection handleConnectionFailure
INFO: Retrying connect to server: cloudx-843-770/172.25.37.135:9001. Already tried 1 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
Aug 22, 2013 5:14:24 PM org.apache.hadoop.ipc.Client$Connection handleConnectionFailure
INFO: Retrying connect to server: cloudx-843-770/172.25.37.135:9001. Already tried 2 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
Aug 22, 2013 5:14:26 PM org.apache.hadoop.ipc.Client$Connection handleConnectionFailure
INFO: Retrying connect to server: cloudx-843-770/172.25.37.135:9001. Already tried 3 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
Aug 22, 2013 5:14:36 PM org.apache.hadoop.security.UserGroupInformation doAs
SEVERE: PriviledgedActionException as:bigdata via 298790 cause:org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
Aug 22, 2013 5:14:36 PM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
WARNING: Cannot start job [wc-job]
org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.apache.hadoop.ipc.Client.call(Client.java:1107)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
    at org.apache.hadoop.mapred.$Proxy2.getProtocolVersion(Unknown Source)
    at org.apache.hadoop.ipc.RPC.getProxy(RPC.java:411)
    at org.apache.hadoop.mapred.JobClient.createRPCProxy(JobClient.java:499)
    at org.apache.hadoop.mapred.JobClient.init(JobClient.java:490)
    at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:473)
    at org.apache.hadoop.mapreduce.Job$1.run(Job.java:513)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapreduce.Job.connect(Job.java:511)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:499)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:197)
    at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160)
    at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52)
    at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
    at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295)
    at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
    at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292)
    at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194)
    at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628)
    at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932)
    at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158)
    at com.hadoop.basics.WordCounter.main(WordCounter.java:58)

Aug 22, 2013 5:14:36 PM org.springframework.beans.factory.support.DefaultSingletonBeanRegistry destroySingletons
INFO: Destroying singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Exception in thread "main" org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'myjobs-runner': Invocation of init method failed; nested exception is java.lang.IllegalStateException: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1482)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
    at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295)
    at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
    at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292)
    at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194)
    at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628)
    at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932)
    at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158)
    at com.hadoop.basics.WordCounter.main(WordCounter.java:58)
Caused by: java.lang.IllegalStateException: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:209)
    at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160)
    at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52)
    at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479)
    ... 13 more
Caused by: org.apache.hadoop.ipc.RemoteException: User: 298790 is not allowed to impersonate bigdata
    at org.apache.hadoop.ipc.Client.call(Client.java:1107)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
    at org.apache.hadoop.mapred.$Proxy2.getProtocolVersion(Unknown Source)
    at org.apache.hadoop.ipc.RPC.getProxy(RPC.java:411)
    at org.apache.hadoop.mapred.JobClient.createRPCProxy(JobClient.java:499)
    at org.apache.hadoop.mapred.JobClient.init(JobClient.java:490)
    at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:473)
    at org.apache.hadoop.mapreduce.Job$1.run(Job.java:513)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Unknown Source)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapreduce.Job.connect(Job.java:511)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:499)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:197)
    ... 20 more

Solution

  • If you ever wish to impersonate a user as another, Apache Hadoop requires that your service-side configuration allows such a thing.

    This means, that if you're running as "foo", and you want to actually submit the job as "bar", then your NameNode/JobTracker requires its loaded core-site.xml config to be allowing "foo" to proxy other users, and typically something like the below must be present in the NameNode's/JobTracker's core-site.xml:

    <property>
      <name>hadoop.proxyuser.foo.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.foo.hosts</name>
      <value>*</value>
    </property>
    

    This would allow user foo to impersonate any other user (* for groups), and do so when submitting from any host (* for hosts).

    While it isn't necessary, it is definitely recommended that defined users and groups need to exist on the NameNode for things such as permissions, group resolution, etc. to work in a proper fashion. More on that here: http://www.cloudera.com/blog/2012/03/authorization-and-authentication-in-hadoop/

    The documentation at http://static.springsource.org/spring-hadoop/docs/1.0.x/reference/html/security.html#security:kerberos could be clearer in addressing this.