MapReduce之wordCount计数
1. 新建一个words.txt,上传到hadoop服务器。
2. 打开eclipse,编写代码。
(1)WCMapper.java
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
/**
* 实现map方法,只用到value
*/
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
// 接收数据
String line = value.toString();
// 切分数据
String[] words = line.split(" ");
// 循环
for (String w : words) {
// 出现一次,记一个1,输出
context.write(new Text(w), new LongWritable(1));
}
}
}
(2)WCReducer.java
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WCReducer extends Reducer<Text, LongWritable , Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> v2s,
Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
// 接收数据
// 定义一个计数器
long counter = 0;
// 循环v2s
for (LongWritable i : v2s){
counter += i.get();
}
// 输出
context.write(key, new LongWritable(counter));
}
}
(3)WordCount.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration());
// 将main方法所在类设置进去
job.setJarByClass(WordCount.class);
// 设置自定义的mapper类型
job.setMapperClass(WCMapper.class);
// 设置mapper输出的key类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path("/words.txt")); // hadoop文件系统上
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path("/wcount312"));
// 提交任务
job.waitForCompletion(true);
}
}
3. 打包成jar文件wc.jar,移动到/usr/local/src/demo/下。
4. 执行jar文件:hadoop jar /usr/local/src/demo/wc.jar
5. 查看运行结果: