Commit 17d59ce5 authored by Morales Hernandez, Mario's avatar Morales Hernandez, Mario
Browse files

splitASCII cpp file optimized to save 92% of time

The splitASCII cpp file has been optimized to optimize the reading
process. Now it should not take too much in splitting a large file.


Former-commit-id: 46f0b324
parent 75c387e5
Loading
Loading
Loading
Loading
+201 −119
Original line number Diff line number Diff line
@@ -6,13 +6,14 @@
#include <dirent.h>
#include <sys/stat.h>
#include <mpi.h>
#include <math.h>

const int NFILES = 552;
const std::string IS_MANN = "NO";
const std::string IS_RMAP = "NO";
const int NFILES = 7;
const std::string IS_MANN = "YES";
const std::string IS_RMAP = "YES";

const std::string TRITON_DIR = "/home/mario/fork_tritonmpi/tritonmpi";
const std::string INPUT_DEM = TRITON_DIR + "/input/dem/asc/rasterEbro2x2.asc";
const std::string TRITON_DIR = "/home/user/fork_tritonmpi/tritonmpi";
const std::string INPUT_DEM = TRITON_DIR + "/input/dem/asc/case03.dem";
const std::string INPUT_MANN = TRITON_DIR + "/input/mann/asc/case03.mann";
const std::string INPUT_RMAP = TRITON_DIR + "/input/runoff/case03_runoff.rmap";

@@ -35,7 +36,7 @@ std::vector<std::string> split(const std::string &s, char delim)
}


void split_dem_to_bin(const std::string& casename_dem, const int start_index, const int end_index, const int idx, const long ncols) {
void split_dem_to_bin(const std::string& casename_dem, const int start_idx, const int end_idx, const std::vector<int> line_numbers, const long ncols) {
    
	std::ifstream input(INPUT_DEM);
   if (!input.is_open()) {
@@ -45,17 +46,19 @@ void split_dem_to_bin(const std::string& casename_dem, const int start_index, co

   std::string line;
   int line_count = -6;
	 long i = 0;
	 int nrows_local=end_index-start_index;
	 double *arr = new double [nrows_local*ncols];
	int start_index= ((start_idx == 0) ? 0 : line_numbers[start_idx - 1]);
	int end_index=line_numbers[end_idx - 1];
	int nrows_total=end_index-start_index;

    while (std::getline(input, line)) {
        if (line_count < 0) {
            // Skip the header lines
   double *arr = new double [nrows_total*ncols];

   // Skip lines until reaching start_index
   while (line_count < start_index  && std::getline(input, line)) {
   	line_count++;
            continue;
   }
        if (line_count >= start_index && line_count < end_index) {

	long i = 0;
   while (line_count < end_index && std::getline(input, line)) {
		std::vector<std::string> row = split(line, ' ');
		std::string val;
		std::vector<std::string>::iterator strit = row.begin();
@@ -66,12 +69,17 @@ void split_dem_to_bin(const std::string& casename_dem, const int start_index, co
			arr[(ncols * i) + j] = (val.find(".") != std::string::npos) ? (double)atof(val.c_str()) : (double)atoi(val.c_str());
		}
		i++;
        }
		line_count++;
	}

	input.close();

	int sum=0;
	for (int idx = start_idx; idx < end_idx; idx++) {
		start_index = (idx == 0) ? 0 : line_numbers[idx - 1] ;
		end_index = line_numbers[idx];
	 	int nrows_local=end_index-start_index;

		 std::string outfile = casename_dem + "_" + (idx < 10 ? "0" : "") + std::to_string(idx) + ".dem";
		 std::ofstream output(outfile, std::ios::binary);

@@ -85,32 +93,41 @@ void split_dem_to_bin(const std::string& casename_dem, const int start_index, co
		 output.write((char*) &put_rows_value, sizeof(double));
		 output.write((char*) &put_cols_value, sizeof(double));

	 output.write((char*)&arr[0], nrows_local*ncols * sizeof(double));
		 output.write((char*)&arr[sum], nrows_local*ncols * sizeof(double));
		 output.close();

    delete[] arr; 

		 sum+=nrows_local*ncols;
		 std::cout << "Split ASCII DEM file and converted to BIN " << outfile << std::endl;

	 }
	 delete[] arr; 

void split_mann_to_bin(const std::string& casename_mann, const int start_index, const int end_index, const int idx, const long ncols) {
}

    std::ifstream input(INPUT_MANN);

void split_mann_to_bin(const std::string& casename_mann, const int start_idx, const int end_idx, const std::vector<int> line_numbers, const long ncols) {
    
	std::ifstream input(INPUT_MANN);
   if (!input.is_open()) {
        std::cerr << "Error opening MANN file" << std::endl;
       std::cerr << "Error opening input MANN file." << std::endl;
       return;
   }

   std::string line;
   int line_count = 0;
	 long i = 0;
	 int nrows_local=end_index-start_index;
	 double *arr = new double [nrows_local*ncols];
	int start_index= ((start_idx == 0) ? 0 : line_numbers[start_idx - 1]);
	int end_index=line_numbers[end_idx - 1];
	int nrows_total=end_index-start_index;

    while (std::getline(input, line)) {
        if (line_count >= start_index && line_count < end_index) {
   double *arr = new double [nrows_total*ncols];

    // Skip lines until reaching start_index
    while (line_count < start_index  && std::getline(input, line)) {
        line_count++;
    }

	long i = 0;
   while (line_count < end_index && std::getline(input, line)) {
		std::vector<std::string> row = split(line, ' ');
		std::string val;
		std::vector<std::string>::iterator strit = row.begin();
@@ -121,13 +138,16 @@ void split_mann_to_bin(const std::string& casename_mann, const int start_index,
			arr[(ncols * i) + j] = (val.find(".") != std::string::npos) ? (double)atof(val.c_str()) : (double)atoi(val.c_str());
		}
		i++;
        }

		line_count++;
	}

	input.close();

	int sum=0;
	for (int idx = start_idx; idx < end_idx; idx++) {
		start_index = (idx == 0) ? 0 : line_numbers[idx - 1] ;
		end_index = line_numbers[idx];
	 	int nrows_local=end_index-start_index;

		 std::string outfile = casename_mann + "_" + (idx < 10 ? "0" : "") + std::to_string(idx) + ".mann";
		 std::ofstream output(outfile, std::ios::binary);
@@ -142,19 +162,86 @@ void split_mann_to_bin(const std::string& casename_mann, const int start_index,
		 output.write((char*) &put_rows_value, sizeof(double));
		 output.write((char*) &put_cols_value, sizeof(double));

	 output.write((char*)&arr[0], nrows_local*ncols * sizeof(double));
		 output.write((char*)&arr[sum], nrows_local*ncols * sizeof(double));
		 output.close();

		 sum+=nrows_local*ncols;
		 std::cout << "Split ASCII MANN file and converted to BIN " << outfile << std::endl;

	 }
	 delete[] arr; 

    output.close();
}

	 std::cout << "Split ASCII MANN file and converted to BIN " << outfile << std::endl;
void split_rmap_to_bin(const std::string& casename_rmap, const int start_idx, const int end_idx, const std::vector<int> line_numbers, const long ncols) {
    
	std::ifstream input(INPUT_RMAP);
   if (!input.is_open()) {
       std::cerr << "Error opening input RMAP file." << std::endl;
       return;
   }

   std::string line;
   int line_count = 0;
	int start_index= ((start_idx == 0) ? 0 : line_numbers[start_idx - 1]);
	int end_index=line_numbers[end_idx - 1];
	int nrows_total=end_index-start_index;

   int *arr = new int [nrows_total*ncols];


    // Skip lines until reaching start_index
    while (line_count < start_index  && std::getline(input, line)) {
        line_count++;
    }


	long i = 0;
   while (line_count < end_index && std::getline(input, line)) {
		std::vector<std::string> row = split(line, ' ');
		std::string val;
		std::vector<std::string>::iterator strit = row.begin();
		long j = 0;
		for (; strit != row.end(); strit++, j++)
		{
			val = *strit;
			arr[(ncols * i) + j] = (val.find(".") != std::string::npos) ? (int)atof(val.c_str()) : (int)atoi(val.c_str());
		}
		i++;
		line_count++;
	}

	input.close();

	int sum=0;
	for (int idx = start_idx; idx < end_idx; idx++) {
		start_index = (idx == 0) ? 0 : line_numbers[idx - 1] ;
		end_index = line_numbers[idx];
	 	int nrows_local=end_index-start_index;

		 std::string outfile = casename_rmap + "_" + (idx < 10 ? "0" : "") + std::to_string(idx) + ".rmap";
		 std::ofstream output(outfile, std::ios::binary);

		 if (!output.is_open()) {
			  std::cerr << "Error opening file: " << outfile << std::endl;
			  return;
		 }
		 int put_rows_value = (int)(nrows_local);
		 int put_cols_value = (int)(ncols);
				
		 output.write((char*) &put_rows_value, sizeof(int));
		 output.write((char*) &put_cols_value, sizeof(int));

		 output.write((char*)&arr[sum], nrows_local*ncols * sizeof(int));
		 output.close();

		 sum+=nrows_local*ncols;
		 std::cout << "Split ASCII RMAP file and converted to BIN " << outfile << std::endl;

	 }
	 delete[] arr; 

}

void split_rmap_to_bin(const std::string& casename_rmap, const int start_index, const int end_index, const int idx, const long ncols) {

@@ -182,8 +269,11 @@ void split_rmap_to_bin(const std::string& casename_rmap, const int start_index,
					arr[(ncols * i) + j] = (val.find(".") != std::string::npos) ? (int)atof(val.c_str()) : (int)atoi(val.c_str());
				}
				i++;
        }else if (line_count >= end_index) {
            break;  // Stop reading the file once the desired lines have been processed
        }


        line_count++;
    }

@@ -329,7 +419,7 @@ int main(int argc, char** argv) {

    int nlines = nrows / NFILES;
    int rem = nrows % NFILES;
    std::vector<int> line_numbers(NFILES - 1);
    std::vector<int> line_numbers(NFILES);
    int sum = 0;

    for (int i = 0; i < NFILES - 1; i++) {
@@ -341,37 +431,29 @@ int main(int argc, char** argv) {
            sum++;
        }
    }
	 line_numbers[NFILES - 1] = nrows;

	int num_lines = line_numbers.size();
	int lines_per_process = (num_lines + size - 1) / size;
	int start_idx = rank * lines_per_process;
	int end_idx = std::min((rank + 1) * lines_per_process, num_lines);
	
	for (int idx = start_idx; idx < end_idx; idx++) {
		int i = line_numbers[idx];
		int start_index = (idx == 0) ? 0 : line_numbers[idx - 1] ;
		split_dem_to_bin(casename_dem, start_index, i, idx,ncols);
	if(rank==size-1){ //last file
		end_idx=num_lines;
	}

	split_dem_to_bin(casename_dem, start_idx, end_idx, line_numbers, ncols );
	if (IS_MANN == "YES") {
      	split_mann_to_bin(casename_mann, start_index, i, idx, ncols);
   	split_mann_to_bin(casename_mann, start_idx, end_idx, line_numbers, ncols);
	}
   if (IS_RMAP == "YES") {
      	split_rmap_to_bin(casename_rmap, start_index, i, idx, ncols);
      }
   	split_rmap_to_bin(casename_rmap, start_idx, end_idx, line_numbers, ncols);
	}

	

	 MPI_Barrier(MPI_COMM_WORLD);

	 if(rank==size-1){
	 	int idx=num_lines;
	 	int start_index = line_numbers[idx - 1] ;
    	split_dem_to_bin(casename_dem, start_index, nrows, idx,ncols);
		if (IS_MANN == "YES") {
      	split_mann_to_bin(casename_mann, start_index, nrows, idx,ncols);
    	}
    	if (IS_RMAP == "YES") {
      	split_rmap_to_bin(casename_rmap, start_index, nrows, idx,ncols);
    	}
	 }

	 if(rank==0){
    	std::cout << "ASCII files generated" << std::endl;