Skip to content

Commit 922082a

Browse files
Nikitha ChettiarNikitha Chettiar
Nikitha Chettiar
authored and
Nikitha Chettiar
committed
added project
1 parent ec5ed5e commit 922082a

10 files changed

+1218
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Based on the code from WordCount.java
2+
// Archana Molasi (molasia)
3+
// Nikitha Chettiar (nikchett)
4+
// This program performs basic statistic operations of min, max, average and standard deviation on the input data.
5+
6+
import java.io.IOException;
7+
import java.util.StringTokenizer;
8+
import org.apache.hadoop.conf.Configuration;
9+
import org.apache.hadoop.fs.Path;
10+
import org.apache.hadoop.io.*;
11+
import org.apache.hadoop.mapreduce.Job;
12+
import org.apache.hadoop.mapreduce.Mapper;
13+
import org.apache.hadoop.mapreduce.Reducer;
14+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16+
import org.apache.hadoop.util.GenericOptionsParser;
17+
18+
public class Statistics {
19+
public static class Map extends Mapper<LongWritable, Text, Text, DoubleWritable>{
20+
private Text word = new Text("Values"); // type of output key
21+
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
22+
Double decimal = Double.parseDouble(value.toString());
23+
DoubleWritable decimalValue = new DoubleWritable(decimal);
24+
context.write(word, decimalValue); // create a pair <keyword, decimal value>
25+
}
26+
}
27+
28+
public static class Reduce
29+
extends Reducer<Text, DoubleWritable,Text, DoubleWritable> {
30+
double sum = 0.0, mini = Double.MAX_VALUE, maxi = Double.MIN_VALUE, sumSquare = 0.0, standardDev = 0.0, avg = 0.0 ;
31+
private DoubleWritable result = new DoubleWritable();
32+
private DoubleWritable resultMini = new DoubleWritable();
33+
private DoubleWritable resultMaxi = new DoubleWritable();
34+
private DoubleWritable resultSD = new DoubleWritable();
35+
private DoubleWritable md = new DoubleWritable();
36+
private Text minimum = new Text("Minimum");
37+
private Text maximum = new Text("Maximum");
38+
private Text average = new Text("Average");
39+
private Text standardDeviation = new Text("Standard Deviation");
40+
private int count = 0;
41+
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
42+
for (DoubleWritable val : values) {
43+
count += 1;
44+
sum += val.get();
45+
sumSquare += val.get() * val.get();
46+
// checking for minimum value
47+
if (val.get() < mini)
48+
mini = val.get();
49+
// checking for maximum value
50+
if (val.get() > maxi)
51+
maxi = val.get();
52+
}
53+
54+
//calculation of average
55+
avg = sum/count;
56+
//calculation of standard deviation
57+
standardDev = Math.sqrt((sumSquare+(avg*avg*count)-(2*avg*sum))/count);
58+
result.set(avg);
59+
resultMini.set(mini);
60+
resultMaxi.set(maxi);
61+
resultSD.set(standardDev);
62+
context.write(minimum, resultMini);
63+
context.write(maximum, resultMaxi);
64+
context.write(average, result);
65+
context.write(standardDeviation, resultSD);
66+
67+
}
68+
}
69+
// Driver program
70+
public static void main(String[] args) throws Exception {
71+
Configuration conf = new Configuration();
72+
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
73+
if (otherArgs.length != 2) {
74+
System.err.println("Usage: Statistics <in> <out>");
75+
System.exit(2);
76+
}
77+
// create a job with name "statistics"
78+
Job job = new Job(conf, "statistics");
79+
job.setJarByClass(Statistics.class);
80+
job.setMapperClass(Map.class);
81+
job.setReducerClass(Reduce.class);
82+
// set output key type
83+
job.setOutputKeyClass(Text.class);
84+
// set output value type
85+
job.setOutputValueClass(DoubleWritable.class);
86+
//set the HDFS path of the input data
87+
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
88+
// set the HDFS path for the output
89+
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
90+
//Wait till job completion
91+
System.exit(job.waitForCompletion(true) ? 0 : 1);
92+
}
93+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Based on the code from WordCount.java
2+
// Archana Molasi (molasia)
3+
// Nikitha Chettiar (nikchett)
4+
// This program performs basic statistic operations of min, max, average and standard deviation on the input data.
5+
6+
import java.io.IOException;
7+
import java.util.StringTokenizer;
8+
import org.apache.hadoop.conf.Configuration;
9+
import org.apache.hadoop.fs.Path;
10+
import org.apache.hadoop.io.*;
11+
import org.apache.hadoop.mapreduce.Job;
12+
import org.apache.hadoop.mapreduce.Mapper;
13+
import org.apache.hadoop.mapreduce.Reducer;
14+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16+
import org.apache.hadoop.util.GenericOptionsParser;
17+
18+
public class Statistics {
19+
public static class Map extends Mapper<LongWritable, Text, Text, DoubleWritable>{
20+
private Text word = new Text("Values"); // type of output key
21+
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
22+
Double decimal = Double.parseDouble(value.toString());
23+
DoubleWritable decimalValue = new DoubleWritable(decimal);
24+
context.write(word, decimalValue); // create a pair <keyword, decimal value>
25+
}
26+
}
27+
28+
public static class Reduce
29+
extends Reducer<Text, DoubleWritable,Text, DoubleWritable> {
30+
double sum = 0.0, mini = Double.MAX_VALUE, maxi = Double.MIN_VALUE, sumSquare = 0.0, standardDev = 0.0, avg = 0.0 ;
31+
private DoubleWritable result = new DoubleWritable();
32+
private DoubleWritable resultMini = new DoubleWritable();
33+
private DoubleWritable resultMaxi = new DoubleWritable();
34+
private DoubleWritable resultSD = new DoubleWritable();
35+
private DoubleWritable md = new DoubleWritable();
36+
private Text minimum = new Text("Minimum");
37+
private Text maximum = new Text("Maximum");
38+
private Text average = new Text("Average");
39+
private Text standardDeviation = new Text("Standard Deviation");
40+
private int count = 0;
41+
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
42+
for (DoubleWritable val : values) {
43+
count += 1;
44+
sum += val.get();
45+
sumSquare += val.get() * val.get();
46+
// checking for minimum value
47+
if (val.get() < mini)
48+
mini = val.get();
49+
// checking for maximum value
50+
if (val.get() > maxi)
51+
maxi = val.get();
52+
}
53+
54+
55+
avg = sum/count;
56+
//calculation of standard deviation
57+
standardDev = Math.sqrt((sumSquare+(avg*avg*count)-(2*avg*sum))/count);
58+
result.set(avg);
59+
resultMini.set(mini);
60+
resultMaxi.set(maxi);
61+
resultSD.set(standardDev);
62+
context.write(minimum, resultMini);
63+
context.write(maximum, resultMaxi);
64+
context.write(average, result);
65+
context.write(standardDeviation, resultSD);
66+
67+
}
68+
}
69+
// Driver program
70+
public static void main(String[] args) throws Exception {
71+
Configuration conf = new Configuration();
72+
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
73+
if (otherArgs.length != 2) {
74+
System.err.println("Usage: Statistics <in> <out>");
75+
System.exit(2);
76+
}
77+
// create a job with name "statistics"
78+
Job job = new Job(conf, "statistics");
79+
job.setJarByClass(Statistics.class);
80+
job.setMapperClass(Map.class);
81+
job.setReducerClass(Reduce.class);
82+
// set output key type
83+
job.setOutputKeyClass(Text.class);
84+
// set output value type
85+
job.setOutputValueClass(DoubleWritable.class);
86+
//set the HDFS path of the input data
87+
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
88+
// set the HDFS path for the output
89+
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
90+
//Wait till job completion
91+
System.exit(job.waitForCompletion(true) ? 0 : 1);
92+
}
93+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
if [ ! -d classes ]; then
2+
mkdir classes;
3+
fi
4+
5+
# Compile Statistics
6+
javac -classpath $HADOOP_HOME/hadoop-core-1.1.2.jar:$HADOOP_HOME/lib/commons-cli-1.2.jar -d ./classes Statistics.java
7+
8+
# Create the Jar
9+
jar -cvf statistics.jar -C ./classes/ .
10+
11+
# Copy the jar file to the Hadoop distributions
12+
cp statistics.jar $HADOOP_HOME/bin/
13+
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
if [ ! -d classes ]; then
2+
mkdir classes;
3+
fi
4+
5+
# Compile Statistics
6+
javac -classpath $HADOOP_HOME/hadoop-core-1.1.2.jar:$HADOOP_HOME/lib/commons-cli-1.2.jar -d ./classes Statistics.java
7+
8+
# Create the Jar
9+
jar -cvf statistics.jar -C ./classes/ .
10+
11+
# Copy the jar file to the Hadoop distributions
12+
cp statistics.jar $HADOOP_HOME/bin/
13+
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
rm -rf classes
2+
rm statistics.jar
3+
rm -rf output
4+
rm $HADOOP_HOME/bin/statistics.jar
5+
rm $HADOOP_HOME-standalone/bin/statistics.jar
6+

0 commit comments

Comments
 (0)