9.在yarn运行wordcount程序
1.WordMapper:
public class WordcountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
//1.获取数据
String line = value.toString();
//2.数据分切
String [] lin = line.split(" ");
//3.存到集合去
for(String word : lin) {
context.write(new Text(word), new IntWritable(1));
}
}
}
2.Wordreducer
public class WordcountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text Key, Iterable<IntWritable> value,Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
//1.统计单词出现次数
int sum = 0;
//2.累加求和
for(IntWritable in : value) {
sum+=in.get();
}
//3.写入
context.write(Key, new IntWritable(sum));
}
}
3.WordDrive
public class WordcountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
//构建任务
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//获取jar包
job.setJarByClass(WordcountDriver.class);
//获取自定义的map,reduce
job.setMapperClass(WordcountMap.class);
job.setReducerClass(WordcountReduce.class);
//设置map数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//给任务路径
FileInputFormat.setInputPaths(job, new Path("/in1019"));
FileOutputFormat.setOutputPath(job, new Path("/out02"));
boolean fs = job.waitForCompletion(true);
System.out.println(fs);
}
}
4.导出jar包
File->Export->JAR file
5.在yarn集群运行
hadoop jar wc.jar com.mr.wc.WordcountDriver
6.在集群上运行
1)需要配置mr的配置文件,默认是在本地的,不然一直是本地运行
hdfs-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
注意改之前把模板文件保存下,免得太乱后不知道咋整了