|
| 1 | +// Based on the code from WordCount.java |
| 2 | +// Archana Molasi (molasia) |
| 3 | +// Nikitha Chettiar (nikchett) |
| 4 | +// This program performs basic statistic operations of min, max, average and standard deviation on the input data. |
| 5 | + |
| 6 | +import java.io.IOException; |
| 7 | +import java.util.StringTokenizer; |
| 8 | +import org.apache.hadoop.conf.Configuration; |
| 9 | +import org.apache.hadoop.fs.Path; |
| 10 | +import org.apache.hadoop.io.*; |
| 11 | +import org.apache.hadoop.mapreduce.Job; |
| 12 | +import org.apache.hadoop.mapreduce.Mapper; |
| 13 | +import org.apache.hadoop.mapreduce.Reducer; |
| 14 | +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| 15 | +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 16 | +import org.apache.hadoop.util.GenericOptionsParser; |
| 17 | + |
| 18 | +public class Statistics { |
| 19 | + public static class Map extends Mapper<LongWritable, Text, Text, DoubleWritable>{ |
| 20 | + private Text word = new Text("Values"); // type of output key |
| 21 | + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { |
| 22 | + Double decimal = Double.parseDouble(value.toString()); |
| 23 | + DoubleWritable decimalValue = new DoubleWritable(decimal); |
| 24 | + context.write(word, decimalValue); // create a pair <keyword, decimal value> |
| 25 | + } |
| 26 | + } |
| 27 | + |
| 28 | + public static class Reduce |
| 29 | + extends Reducer<Text, DoubleWritable,Text, DoubleWritable> { |
| 30 | + double sum = 0.0, mini = Double.MAX_VALUE, maxi = Double.MIN_VALUE, sumSquare = 0.0, standardDev = 0.0, avg = 0.0 ; |
| 31 | + private DoubleWritable result = new DoubleWritable(); |
| 32 | + private DoubleWritable resultMini = new DoubleWritable(); |
| 33 | + private DoubleWritable resultMaxi = new DoubleWritable(); |
| 34 | + private DoubleWritable resultSD = new DoubleWritable(); |
| 35 | + private DoubleWritable md = new DoubleWritable(); |
| 36 | + private Text minimum = new Text("Minimum"); |
| 37 | + private Text maximum = new Text("Maximum"); |
| 38 | + private Text average = new Text("Average"); |
| 39 | + private Text standardDeviation = new Text("Standard Deviation"); |
| 40 | + private int count = 0; |
| 41 | + public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { |
| 42 | + for (DoubleWritable val : values) { |
| 43 | + count += 1; |
| 44 | + sum += val.get(); |
| 45 | + sumSquare += val.get() * val.get(); |
| 46 | + // checking for minimum value |
| 47 | + if (val.get() < mini) |
| 48 | + mini = val.get(); |
| 49 | + // checking for maximum value |
| 50 | + if (val.get() > maxi) |
| 51 | + maxi = val.get(); |
| 52 | + } |
| 53 | + |
| 54 | + //calculation of average |
| 55 | + avg = sum/count; |
| 56 | + //calculation of standard deviation |
| 57 | + standardDev = Math.sqrt((sumSquare+(avg*avg*count)-(2*avg*sum))/count); |
| 58 | + result.set(avg); |
| 59 | + resultMini.set(mini); |
| 60 | + resultMaxi.set(maxi); |
| 61 | + resultSD.set(standardDev); |
| 62 | + context.write(minimum, resultMini); |
| 63 | + context.write(maximum, resultMaxi); |
| 64 | + context.write(average, result); |
| 65 | + context.write(standardDeviation, resultSD); |
| 66 | + |
| 67 | + } |
| 68 | + } |
| 69 | + // Driver program |
| 70 | + public static void main(String[] args) throws Exception { |
| 71 | + Configuration conf = new Configuration(); |
| 72 | + String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args |
| 73 | + if (otherArgs.length != 2) { |
| 74 | + System.err.println("Usage: Statistics <in> <out>"); |
| 75 | + System.exit(2); |
| 76 | + } |
| 77 | + // create a job with name "statistics" |
| 78 | + Job job = new Job(conf, "statistics"); |
| 79 | + job.setJarByClass(Statistics.class); |
| 80 | + job.setMapperClass(Map.class); |
| 81 | + job.setReducerClass(Reduce.class); |
| 82 | + // set output key type |
| 83 | + job.setOutputKeyClass(Text.class); |
| 84 | + // set output value type |
| 85 | + job.setOutputValueClass(DoubleWritable.class); |
| 86 | + //set the HDFS path of the input data |
| 87 | + FileInputFormat.addInputPath(job, new Path(otherArgs[0])); |
| 88 | + // set the HDFS path for the output |
| 89 | + FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); |
| 90 | + //Wait till job completion |
| 91 | + System.exit(job.waitForCompletion(true) ? 0 : 1); |
| 92 | + } |
| 93 | +} |
0 commit comments