博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
WordCount 远程集群源码
阅读量:6950 次
发布时间:2019-06-27

本文共 2892 字,大约阅读时间需要 9 分钟。

package test;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class WordCount {   public static class TokenizerMapper       extends Mapper
{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer
{ private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable
values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //conf.set("mapred.job.tracker", "192.168.2.35:9001"); //在你的文件地址前自动添加:hdfs://master:9000/ //conf.set("fs.defaultFS", "hdfs://192.168.2.35:9001/"); //conf.set("hadoop.job.user","hadoop"); //指定jobtracker的ip和端口号,master在/etc/hosts中可以配置 //conf.set("mapred.job.tracker","192.168.2.35:9001"); //在你的文件地址前自动添加:hdfs://master:9000/ conf.set("fs.defaultFS", "hdfs://192.168.2.35:9000/"); conf.set("hadoop.job.user","hadoop"); conf.set("Master","1234"); //指定jobtracker的ip和端口号,master在/etc/hosts中可以配置 //conf.set("mapred.job.tracker","Master:9001"); String[] ars=new String[]{"input","out"}; String[] otherArgs = new GenericOptionsParser(conf, ars).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount "); System.exit(2); } Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }}

 

转载地址:http://cwuil.baihongyu.com/

你可能感兴趣的文章
一篇就让你懂线程池原理
查看>>
MySql事务和锁
查看>>
iOS面试题06-其他
查看>>
区块链软件公司:供应链高本钱的运用区块链技能是否值得?
查看>>
[译]应用内搜索功能实现 Android TV应用程序手册教程十三
查看>>
ListenalbeFuture的使用总结
查看>>
用canvas画一个刮刮卡
查看>>
swig-python初试
查看>>
文字透明使用背景色
查看>>
大家好
查看>>
部署桌面虚拟化时要考虑的因素
查看>>
cacti 安装部署
查看>>
整体管理
查看>>
Allegro16.6导出位号图
查看>>
mycat err:java.sql.SQLNonTransientException: find no Route:select日志报错
查看>>
Centos7.4源码搭建zabbix3.4.11企业级监控
查看>>
yumi引导盘制作
查看>>
Objective C类方法load和initialize的区别
查看>>
【高德地图API】从零开始学高德JS API(五)路线规划——驾车|公交|步行
查看>>
LINUX中nagios客户端安装步骤及遇到问题
查看>>