Driver Class:
package org.puneetha.patternMatching; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WordcountDriver extends Configured implements Tool { public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); /* * ... Other Driver class code ... */ Path inputFilePath = new Path(args[0]); Path outputFilePath = new Path(args[1]); // To list all the filenames FileSystem fs = FileSystem.newInstance(getConf()); FileStatus[] status_list = fs.listStatus(outputFilePath); if (status_list != null) { for (FileStatus status : status_list) { // To print full path // System.out.println(status.getPath()); // Get the filename String filename = status.getPath().getName(); /* Pattern to be matched */ String pattern = "part-r-*"; Pattern regex = Pattern.compile(pattern); Matcher matcher = regex.matcher(filename); Path fullFilePath = new Path(inputFilePath + "/" + filename); if (matcher.find()) { System.out.println("Matched => " + filename); /* Any action */ FileInputFormat.addInputPath(job, fullFilePath); } else { System.out.println("Not Matched => " + filename); /* Any action */ fs.delete(fullFilePath, true); } } } return job.waitForCompletion(true) ? 0: 1; } public static void main(String[] args) throws Exception { WordcountDriver wordcountDriver = new WordcountDriver(); int res = ToolRunner.run(wordcountDriver, args); System.exit(res); } }
i want a mapreduce code in a hadoop to match the particular pattern supoose for example i obtain some set of pakets from tcp dump and put it in wireshark then based on that data i just want to analyze that data in hadoop how it ca be done???plz help the data generated size is nearly 1.50 gb…plz give me some algo or code for that in mapreduce……..thank you
Thanks puneetha,
I am an experienced java developer . this MR stuff helped me . your painting is very good.