MapReduce编程之统计每个订单价格最高的商品信息
orders.txt文件内容如下:
Order_0000001 Pdt_01 222.8
Order_0000001 Pdt_05 25.8
Order_0000002 Pdt_03 522.8
Order_0000002 Pdt_04 122.4
Order_0000002 Pdt_05 722.4
Order_0000003 Pdt_01 222.8
Order_0000003 Pdt_02 1000.8
Order_0000003 Pdt_03 999.8
要求:统计每个订单价格最高的那个商品的信息
代码实现:
自定义数据类型:
package com.miao.order;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @ClassName OrderBean
* @Description TODO 自定义数据类型,
* @Date 2021-04-28 22:50:31
* @Create By Miao
*/
public class OrderBean implements WritableComparable<OrderBean> {
private String orderId;
private String pid;
private double price;
public void setAll(String orderId,String pid,double price){
this.setOrderId(orderId);
this.setPid(pid);
this.setPrice(price);
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public double getPrice() {
return price;
}
public void setPrice(double price) {
this.price = price;
}
@Override
public String toString() {
return this.orderId+"\t"+this.pid+"\t"+this.price;
}
//只用来做排序
public int compareTo(OrderBean o) {
//先比较订单是否一致
int comp = this.getOrderId().compareTo(o.getOrderId());
//如果相等,价格降序排序
if(comp == 0){
return -Double.valueOf(this.getPrice()).compareTo(Double.valueOf(o.getPrice()));
}
return comp;
}
public void write(DataOutput out) throws IOException {
out.writeUTF(this.orderId);
out.writeUTF(this.pid);
out.writeDouble(this.price);
}
public void readFields(DataInput in) throws IOException {
this.orderId = in.readUTF();
this.pid = in.readUTF();
this.price = in.readDouble();
}
}
自定义分组比较器:
package com.miao.order;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* @ClassName UserGroup
* @Description TODO 自定义分组比较器
* @Date 2021-04-28 22:50:31
* @Create By Miao
*/
public class UserGroup extends WritableComparator {
public UserGroup(){
super(OrderBean.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean o1 = (OrderBean) a;
OrderBean o2 = (OrderBean) b;
//订单相同就是同一组
return o1.getOrderId().compareTo(o2.getOrderId());
}
}
测试:
package com.miao.order;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* @ClassName OrderTopMR
* @Description TODO
* @Date 2021-04-28 22:50:31
* @Create By Miao
*/
public class OrderTopMR extends Configured implements Tool {
//构建、配置、提交Job
public int run(String[] args) throws Exception {
/**
* step1:构建Job
*/
//实例化一个MapReduce的Job对象
Job job = Job.getInstance(this.getConf(),"orderTop");
//指定允许jar包运行的类
job.setJarByClass(OrderTopMR.class);
/**
* step2:配置Job
*/
//Input:配置输入
//指定输入类的类型
job.setInputFormatClass(TextInputFormat.class);//可以不指定,默认就是TextInputFormat
//指定输入源
Path inputPath = new Path("D:\\Study\\idea\\MavenProject\\text\\orders.txt");
TextInputFormat.setInputPaths(job,inputPath);
//Map:配置Map
job.setMapperClass(OrderMapper.class); //设置调用的Mapper类
job.setMapOutputKeyClass(OrderBean.class); //设置Key2的类型
job.setMapOutputValueClass(NullWritable.class); //设置Value2的类型
//Shuffle:配置Shuffle
job.setGroupingComparatorClass(UserGroup.class); //设置分组器
//Reduce:配置Reduce
job.setReducerClass(OrderReducer.class); //设置调用reduce的类
job.setOutputKeyClass(OrderBean.class); //设置Key3的类型
job.setOutputValueClass(NullWritable.class); //设置Value3的类型
//Output:配置输出
//指定输出类的类型
job.setOutputFormatClass(TextOutputFormat.class);//默认就是TextOutputFormat
//设置输出的路径
Path outputPath = new Path("D:\\Study\\idea\\MavenProject\\output\\order");
//判断输出是否存在,存在就删除
FileSystem fs = FileSystem.get(this.getConf());
if(fs.exists(outputPath)){
fs.delete(outputPath,true);
}
TextOutputFormat.setOutputPath(job,outputPath);
/**
* step3:提交Job
*/
return job.waitForCompletion(true) ? 0 : -1;
}
//程序的入口方法
public static void main(String[] args) throws Exception {
//构建配置管理对象
Configuration conf = new Configuration();
//通过工具类的run方法调用当前类的实例的run方法
int status = ToolRunner.run(conf, new OrderTopMR(), args);
//退出程序
System.exit(status);
}
public static class OrderMapper extends Mapper<LongWritable,Text,OrderBean, NullWritable>{
//Key2
OrderBean outputKey = new OrderBean();
//Value2
NullWritable outputValue = NullWritable.get();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] items = value.toString().split("\\s+");
this.outputKey.setAll(items[0],items[1],Double.parseDouble(items[2]));
//输出
context.write(this.outputKey,this.outputValue);
}
}
public static class OrderReducer extends Reducer<OrderBean, NullWritable,OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
//输出每个订单价格最高的那一条
context.write(key,NullWritable.get());
//for (NullWritable value : values) {
// context.write(key,value);
//}
}
}
}
运行结果:

本文介绍如何利用MapReduce编程统计每个订单中价格最高的商品信息,通过解析orders.txt文件并应用自定义数据类型及分组比较器,最终得到每个订单内价格最高的商品详情。

被折叠的 条评论
为什么被折叠?



