- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
月光科技|再次缺德友商,除了华为今年屏幕都不错,王腾把卢伟冰的活都干了
-
问董秘|是一块非常大的蛋糕,请问公司在...,投资者提问:我们关注到减糖市场是一个大趋势
-
直播吧|年薪约300万欧,葡媒:泰达接近一千万欧签下蒂基尼奥-苏亚雷斯
-
梦到好多小蛇是什么寓意 梦到好多小蛇预示着什么意思还有警察
-
身份证丢失怎么坐火车 身份证丢失怎么坐火车高铁派出所临时身份证有效期
-
楚汉|刘备去世前,将兵权交给了李严,他为何不交给诸葛亮?
-
-
香水掩盖他的罪恶,却被她闻出了端倪:62岁她如何揭穿他的阴谋?
-
-
【迪拜】迪拜王妃沦为生育机器?原配禁足41年生12娃,哈雅出逃不做金丝雀
-
-
月叔说育儿|正确打开智慧只需两招,孩子会越来越聪明,宝宝大脑需要良性刺激
-
#豆宝麻麻TB#会“越长越丑”,无论现在有多漂亮,有这2种习惯的孩子
-
确诊|山东新增境外输入确诊病例2例:烟台报告,来自菲律宾
-
汽车之家|且行且珍惜!,瑞虎7车主的感叹:生活不易
-
小航健康故事|就得保护好血管,这几种食物能帮助你!,要想预防心血管疾病
-
-
-
-