package com.csv;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import org.apache.log4j.Logger;
import com.thread.CheckMemoryUses;
/**
* This Class will be responsible for reading for large file, then will sort
* that file.
*
* @author atul.kumar
*
*/
public class ExternalMergeSort {
private String fileName;
private int compareIndex;
public static final int MAX_LINE_READ = 5000;
public static final Logger LOG = Logger.getLogger(ExternalMergeSort.class);
public ExternalMergeSort(String fileName, int compareIndex) {
this.fileName = fileName;
this.compareIndex = compareIndex;
}
private String generateFileName(int index) {
return "tmp" + File.separator + this.fileName + "_" + "chunk" + "_"
+ index;
}
/**
* Will generate temporary files
*
* @param index
* @return
*/
private File generateFile(int index) {
File file = new File(generateFileName(index));
file.deleteOnExit();
return file;
}
/**
* This method will be responsible for reading file, and creating some
* sorted temporary files from that.
*
* After that this will merge all this file in out put files
*
* @see #mergeFiles(int)
*/
public void externalMerge() {
try {
FileReader fileReader = new FileReader(this.fileName);
BufferedReader br = new BufferedReader(fileReader);
int counterForLine = 0;
int fileIndex = 0;
String line = null;
List<CustomLine> listOfLines = new ArrayList<CustomLine>();
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
if (counterForLine < MAX_LINE_READ) {
listOfLines.add(new CustomLine(line));
counterForLine++;
} else {
FileWriter fw = new FileWriter(generateFile(fileIndex));
BufferedWriter bw = new BufferedWriter(fw);
Collections.sort(listOfLines, new CustomLine(1));
for (int i = 0; i < listOfLines.size(); i++) {
bw.append(listOfLines.get(i).line + "\n");
}
bw.flush();
bw.close();
listOfLines.clear();
// Add already read line from file
listOfLines.add(new CustomLine(line));
fileIndex++;
// Re-initialize the counter for creating next tmp file
counterForLine = 1;
}
}
//Closing the main input file
br.close();
fileReader.close();
// When have less than 1000 lines at last
if (counterForLine > 0) {
FileWriter fw = new FileWriter(generateFile(fileIndex));
BufferedWriter bw = new BufferedWriter(fw);
Collections.sort(listOfLines, new CustomLine(1));
for (int i = 0; i < listOfLines.size(); i++) {
bw.append(listOfLines.get(i).line + "\n");
}
bw.flush();
bw.close();
listOfLines.clear();
}
mergeFiles(fileIndex);
} catch (Exception ex) {
}
}
public void simpleSort() {
try {
FileReader fileReader = new FileReader(this.fileName);
BufferedReader br = new BufferedReader(fileReader);
String line = null;
List<CustomLine> listOfLines = new ArrayList<CustomLine>();
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
listOfLines.add(new CustomLine(line));
}
br.close();
FileWriter fw = new FileWriter(this.fileName + "_sorted");
BufferedWriter bw = new BufferedWriter(fw);
for(CustomLine custLine : listOfLines) {
bw.append(custLine.line + "\n");
}
bw.flush();
bw.close();
} catch (Exception ex) {
}
}
/**
* This will be responsible for creating readers for all sorted temporary
* files at last this will sort these files and write output
*
* @param numOfFiles
* Total number for sorted temporary files
* @see #sortFilesAndWriteOutput(List)
*/
private void mergeFiles(int numOfFiles) {
try {
ArrayList<FileReader> listOfFileReader = new ArrayList<FileReader>();
ArrayList<BufferedReader> listOfBufferedReader = new ArrayList<BufferedReader>();
for (int index = 0; index <= numOfFiles; index++) {
String fileName = generateFileName(index);
listOfFileReader.add(new FileReader(fileName));
listOfBufferedReader.add(new BufferedReader(listOfFileReader
.get(index)));
}
sortFilesAndWriteOutput(listOfBufferedReader);
for (int index = 0; index < listOfBufferedReader.size(); index++) {
listOfBufferedReader.get(index).close();
listOfFileReader.get(index).close();
}
} catch (Exception ex) {
}
}
/**
* This will sort all files by reading first line of each files
*
* @param listOfBufferedReader
* this is list of BufferedReader for all temporary files
*/
private void sortFilesAndWriteOutput(
List<BufferedReader> listOfBufferedReader) {
try {
List<CustomLine> listOfLinesfromAllFiles = new ArrayList<CustomLine>();
// Read first line from each file
for (int index = 0; index < listOfBufferedReader.size(); index++) {
String line = listOfBufferedReader.get(index).readLine();
if (line != null) {
listOfLinesfromAllFiles.add(new CustomLine(line, index));
}
}
FileWriter fw = new FileWriter(this.fileName + "_sorted");
BufferedWriter bw = new BufferedWriter(fw);
while (true) {
if (listOfLinesfromAllFiles.size() == 0) {
break;
} else {
Collections.sort(listOfLinesfromAllFiles, new CustomLine(
compareIndex));
CustomLine custLine = listOfLinesfromAllFiles.get(0);
bw.append(custLine.line + "\n");
int indexForFileName = custLine.indexForFileName;
// Remove read line
listOfLinesfromAllFiles.remove(0);
String line = listOfBufferedReader.get(indexForFileName)
.readLine();
if (line != null) {
listOfLinesfromAllFiles.add(new CustomLine(line,
indexForFileName));
} else {// assume current file which have least amount has
// no more line, then need to check for further
// files
for (int index = 0; index < listOfBufferedReader.size(); index++) {
if (index == indexForFileName) {
continue;
} else {
line = listOfBufferedReader.get(index)
.readLine();
if (line != null) {
listOfLinesfromAllFiles.add(new CustomLine(
line, index));
continue;
}
}
}
}
}
}
bw.flush();
bw.close();
fw.close();
} catch (Exception ex) {
}
}
public static void main(String[] args) {
CheckMemoryUses checkMemory = new CheckMemoryUses();
checkMemory.setDaemon(true);
checkMemory.start();
//External Merge Sorting Algorithms
Long time = System.currentTimeMillis();
ExternalMergeSort sortLargeFiles = new ExternalMergeSort(
"input_csv.csv", 1);
Date date = new Date(time - System.currentTimeMillis());
sortLargeFiles.externalMerge();
LOG.debug(date.getHours() + ":" + date.getMinutes() + ":" + date.getSeconds());
//Simple Sorting Algorithms
time = System.currentTimeMillis();
// sortLargeFiles.simpleSort();
date = new Date(time - System.currentTimeMillis());
LOG.debug(date.getHours() + ":" + date.getMinutes() + ":" + date.getSeconds());
LOG.debug(checkMemory.getMaxAllocatedMemory());
}
class CustomLine implements Comparator<CustomLine> {
// Presently not used
// private String name;
private Date date;
private Double amount;
private String line;
private Class compareClass;
private int indexForFileName;
public CustomLine(int compareIndex) {
if (compareIndex == 0)
this.compareClass = Date.class;
else if (compareIndex == 1)
this.compareClass = Double.class;
}
public int compare(CustomLine o1, CustomLine o2) {
if (compareClass == Date.class) {
return o1.date.compareTo(o2.date);
} else {
return o1.amount.compareTo(o2.amount);
}
}
public CustomLine(String line) {
this.line = line;
String[] split = line.split(",");
try {
this.date = new SimpleDateFormat("dd-MMM-yyyy").parse(split[0]
.trim());
} catch (ParseException e) {
e.printStackTrace();
}
try {
this.amount = Double.parseDouble(split[1].trim());
} catch (NumberFormatException e) {
this.amount = -1D;
}
}
public CustomLine(String line, int indexForFileName) {
this.line = line;
this.indexForFileName = indexForFileName;
String[] split = line.split(",");
try {
this.date = new SimpleDateFormat("dd-MMM-yyyy").parse(split[0]
.trim());
} catch (ParseException e) {
e.printStackTrace();
}
try {
this.amount = Double.parseDouble(split[1].trim());
} catch (NumberFormatException e) {
this.amount = -1D;
}
}
@Override
public boolean equals(Object other) {
if (other == null)
return false;
if (other == this)
return true;
if (this.getClass() != other.getClass())
return false;
CustomLine otherMyClass = (CustomLine) other;
return otherMyClass.line.equals(this.line);
}
@Override
public int hashCode() {
return this.line.hashCode();
}
}
}
Good article! And do you provide me a source code of 1d barcode generator using java. Thank you! Please help!
ReplyDeleteplease can you provide me with how to time the merge segment of the code
ReplyDeleteThat will be depends on total [number of files generated X max line read in one chunk]
Delete