Large data file hw assignment

I am stuck on a homework assignment where I filter out bad data flags that say -9999 from a massive weather data file, and create a new file from that, I omitted the bad data records however, the next problem basically requires me to omit the columns of data I do not want and I am not sure how to omit those columns
Prompt:
1. (10 Points) Using the original AL Weather Station Data file find all records that have a bad data flag (-9999) for either the PRCP, TMAX or TMIN fields. Produce a new data file (call it Filtered_AL_Weather_Station.txt ) that omits those records with bad data flags. This new file will be used in problem 2. NOTE: The temperatures (TMAX, TMIN) are given in tenths of a degree Celsius. e.g 83 is 8.3 degrees C. (I have already done this program so just ignore it)

2. (15 Points) Using the filtered data file from problem 1, create another file (weather_station_five_column.txt) with only the following five columns of information: (This is the problem I am stuck on.)

STATION NAME DATE PRCP TMAX TMIN

Separate the date fields by inserting spaces and convert the temperatures from Celsius to Fahrenheit.

IMPORTANT: You will need to use the string conversion functions to convert the string type numbers to float or double. The functions are stof and stod. To convert a string to float do this:

string s_tmax; // string type for TMAX
float tmax; // float type for TMAX

// Convert string to float
tmax = stof(s_tmax);

Here is a sample of the output. (You may left justify station name if you like.)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

using namespace std;

int main(void)
{
	// Variables

	string dataline = "";
	string station = "";

	string tmax_s = "", tmin_s = "", prcp_s = "", date_s = "";

	unsigned int pos_station_name = 0;
	unsigned int pos_date = 0;	
	unsigned int pos_tmax = 0;
	unsigned int pos_tmin = 0;
    unsigned int pos_prcp = 0;
	//unsigned int bad_records = 0;

	float tmax = 0, tmin = 0, prcp = 0, date = 0;

	ifstream infile;
	ofstream outfile;

	cout << "WEATHER STATION DATA" << endl << endl;
	cout << "Open the data file." << endl << endl;
	infile.open("/Users/adam/desktop/temp/Filtered_AL_Weather_Station.txt");

    if (!infile) 
	{
        cout << "Unable to open the input file. " << endl;
	}
	else
	{
	    cout << "Data file opened." << endl;
	}

    outfile.open("/Users/adam/desktop/temp/weather_station_five_column.txt");
    
    if (!outfile) 
	{
        cout << "Unable to open the output file. " << endl;
	}
	else
	{
	    cout << "Output file opened." << endl;
	}
    
	// Use headers to find max and min temp columns
    getline(infile, dataline);
    outfile << dataline << endl;
    
    pos_station_name = dataline.find("STATION_NAME");
    if (pos_station_name <= dataline.length())
    {
        cout << "STATION_NAME begins at column: " << pos_station_name << endl;
    }
    else
    {
        cout << "STATION_NAME not found." << endl;
        return 2;
    }
    pos_date = dataline.find("DATE");
    if (pos_date <= dataline.length())
    {
        cout << "DATE begins at column: " << pos_date << endl;
    }
    else
    {
        cout << "DATE not found." << endl;
        return 2;
    }
    pos_prcp = dataline.find("PRCP");
    if (pos_prcp <= dataline.length())
    {
        cout << "PRCP begins at column: " << pos_prcp << endl;
    }
    else
    {
        cout << "PRCP not found." << endl;
        return 2;
    }
    
    
    pos_tmax = dataline.find("TMAX");
    if (pos_tmax <= dataline.length())
    {
        cout << "TMAX begins at column: " << pos_tmax << endl;
    }
    else
    {
        cout << "TMAX not found." << endl;
        return 2;
    }

    pos_tmin = dataline.find("TMIN");
    if (pos_tmin <= dataline.length())
    {
        cout << "TMIN begins at column: " << pos_tmin << endl;
    }
    else
    {
        cout << "TMIN not found." << endl;
        return 2;
    }


    getline(infile, dataline);
    

    cout << left << setw(10) <<"STATION\t\t" <<setw(10) << "DATE\t\t" << setw(10) <<"PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;

	// Output the table to the file
	outfile << left << setw(10) <<"STATION\t\t" <<setw(10) << "DATE\t\t" << setw(10) <<"PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;
	while (!infile.eof())
	{
        getline(infile, dataline);

        //extract tmax and tmin
        cout << setw(10) << station << "\t\t" << date << "\t\t" << tmax << "\t\t" << tmin << endl;
        outfile << setw(10) << station << "\t\t" << date << "\t\t" << tmax << "\t\t" << tmin << endl;
        
        station = dataline.substr(pos_station_name, 5);
        date_s = dataline.substr(pos_date, 5);
        prcp_s = dataline.substr(pos_prcp, 5);
        tmax_s = dataline.substr(pos_tmax, 5);
        tmin_s = dataline.substr(pos_tmin, 5);
        
        
        date = stof(date_s);
        prcp = stof(prcp_s);
        tmax = stof(tmax_s); // string to float
        tmin = stof(tmin_s);
        infile >> station >> prcp >> tmax >> tmin;


	} // End While 

	// Close the files
	infile.close();
	outfile.close();


	cout << "\n\n";
	return 0;
}

My filtered weather data txt file output file I use for problem #2 I omitted the bad date flags -9999 for TMAX and TMIN:

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND     
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- -------- 
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180301 -9999    -9999    0.15     0.0      -9999    82       53       -9999    
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 -9999    -9999    0.45     0.0      -9999    83       44       -9999    
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 -9999    -9999    0.00     0.0      -9999    83       33       -9999    

I can't include the full file because it has 4000+ of these entries but my goal is to omit MDPR, SNWD, SNOW, AWWND and just have STATION DATE PRCP TMAX TMIN

What the output is supposed to look like except lined up of course



              STATION NAME                                   DATE           PCRP  TMAX  TMIN
                           BANKHEAD LOCK AND DAM 2018 03 01      0.15  46.76  41.54
                           BANKHEAD LOCK AND DAM 2018 03 02      0.45  46.94  39.92
                           BANKHEAD LOCK AND DAM 2018 03 03      0.00  46.94  37.94
                           BANKHEAD LOCK AND DAM 2018 03 04      0.00  46.76  39.94


My output:But my output file is:

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND     
STATION		 DATE		    PRCP		    TMAX		    TMIN      
          		0		0		0
GHCND:USC00010505		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
Last edited on
You might take adavantage of the apparently tightly structured source data files and do something like the following to extract what data you want out of it.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

using namespace std;

int main(void)
{
    string dataline = "";
    string station = "";
    
    string tmax_s = "", tmin_s = "", prcp_s = "", date_s = "";
    
    double tmax = 0, tmin = 0, prcp = 0, date = 0;
    
    ifstream infile;
    ofstream outfile;
    
    infile.open("000 copy.txt");
    
    if (!infile)
    {
        cout << "Unable to open the input file.\n";
        return -99;
    }
    
    outfile.open("000 copy output.txt");
    
    if (!outfile)
    {
        cout << "Unable to open the output file.\n";
        return -98;
    }
    
    cout << "WEATHER STATION DATA\n";
    
    // FILE HEADING STUFF
    for(int i = 0; i < 3; i++)
    {
        std::string heading;
        getline(infile, heading);
        std::cout << heading << '\n';
    }
    
    // DATA LINES
    while (getline(infile, dataline))
    {
        std::cout << dataline << '\n';
        date_s = dataline.substr(45, 9);
        std::cout << "Date: " << date_s << '\n';
        
        std::cout << " *** tmin: " << stod(dataline.substr(67, 4)) << '\n';
        
        // ... ETC
        
        std::cout << '\n';
    }
    
    infile.close();
    outfile.close();
    
    cout << "End\n";
    return 0;
}


WEATHER STATION DATA
STATION STATION_NAME DATE MDPR DAPR PRCP SNWD SNOW TMAX TMIN AWND
-------------------------------------------------------------------
-------- -------- -------- -------- -------- -------- -------- -------- --------
GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180301 -9999 -9999 0.15 0.0 -9999 82 53 -9999
Date:  20180301
 *** tmin: 0.15

GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180302 -9999 -9999 0.45 0.0 -9999 83 44 -9999
Date:  20180302
 *** tmin: 0.45

GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180303 -9999 -9999 0.00 0.0 -9999 83 33 -9999
Date:  20180303
 *** tmin: 0

End
Program ended with exit code: 0
And moving along easier lines:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

using namespace std;

int main(void)
{
    string dataline = "";
    string station = "";
    
    string tmax_s = "", tmin_s = "", prcp_s = "", date_s = "";
    
    double tmax = 0, tmin = 0, prcp = 0, date = 0;
    
    ifstream infile;
    ofstream outfile;
    
    infile.open("000 copy.txt");
    
    if (!infile)
    {
        cout << "Unable to open the input file.\n";
        return -99;
    }
    
    outfile.open("000 copy output.txt");
    
    if (!outfile)
    {
        cout << "Unable to open the output file.\n";
        return -98;
    }
    
    cout << "WEATHER STATION DATA\n";
    
    // FILE HEADING STUFF
    for(int i = 0; i < 3; i++)
    {
        std::string heading;
        getline(infile, heading);
        std::cout << heading << '\n';
    }
    
    // DATA LINES
    std::string crap;
    
    while (
        infile
           >> crap >> crap >> crap >> crap >> crap >> crap >> crap
           >> date_s
           >> crap >> crap
           >> tmin >> tmax
           >> crap >> crap >> crap >> crap
           )
    {
        std::cout << dataline << '\n';
        std::cout << "Date: " << date_s << '\n';
        
        std::cout << " *** tmin: " << tmin << '\n';
        std::cout << " *** tmax: " << tmax << '\n';
        
        // ... ETC
        
        std::cout << '\n';
    }
    
    infile.close();
    outfile.close();
    
    cout << "End\n";
    return 0;
}
When posting file data, please use output tags so that spaces/tabs etc are kept. You're posted data can't be used as the columns don't line up as spaces/tabs have been removed. Also when outputting data to a file that is going to be used as input data it's better not not to use tabs and just use spaces.

For std::string, you don't need to initialise with "".
Last edited on
Really? We can only thank the lucky stars @gigacapybara used code tags.
As a first revision, perhaps... Note that this isn't tested as need proper formatted data and conversion c/f isn't done.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

using namespace std;

int main(void)
{
	string dataline;

	cout << "WEATHER STATION DATA\n\n";
	cout << "Open the data file.\n\n";

	ifstream infile("/Users/adam/desktop/temp/Filtered_AL_Weather_Station.txt");

	if (!infile)
		return (cout << "Unable to open the input file.\n"), 1;

	ofstream outfile("/Users/adam/desktop/temp/weather_station_five_column.txt");

	if (!outfile)
		return (cout << "Unable to open the output file.\n"), 1;

	// Use headers to find max and min temp columns
	getline(infile, dataline);	// Get header line
	outfile << dataline << '\n';

	const auto pos_station_name {dataline.find("STATION_NAME")};

	if (pos_station_name == string::npos)
		return (cout << "STATION_NAME not found.\n"), 2;

	const auto pos_date {dataline.find("DATE")};

	if (pos_date == string::npos)
		return (cout << "DATE not found.\n"), 2;

	const auto pos_prcp {dataline.find("PRCP")};

	if (pos_prcp == string::npos)
		return (cout << "PRCP not found.\n"), 2;

	const auto pos_tmax {dataline.find("TMAX")};

	if (pos_tmax == string::npos)
		return (cout << "TMAX not found.\n"), 2;

	const auto pos_tmin {dataline.find("TMIN")};

	if (pos_tmin == string::npos)
		return (cout << "TMIN not found.\n"), 2;

	getline(infile, dataline);	// Skip underlines line

	cout << left << setw(10) << "STATION\t\t" << setw(10) << "DATE\t\t" << setw(10) << "PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;

	// Output the table to the file
	outfile << left << setw(10) << "STATION\t\t" << setw(10) << "DATE\t\t" << setw(10) << "PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;

	while (getline(infile, dataline)) {
		const auto station {dataline.substr(pos_station_name, 18)};
		const auto date_s {dataline.substr(pos_date, 8)};
		const auto prcp {stof(dataline.substr(pos_prcp, 5))};
		const auto tmax {stof(dataline.substr(pos_tmax, 5))};
		const auto tmin {stof(dataline.substr(pos_tmin, 5))};

		//cout << setw(10) << station << "\t\t" << date_s << "\t\t" << prcp << "\t\t" << tmax << "\t\t" << tmin << '\n';
		outfile << setw(10) << station << "\t\t" << date_s << "\t\t" << prcp << "\t\t" << tmax << "\t\t" << tmin << '\n';
	}
}

is -9999 valid in any entry that you want to keep? If not, you can really, really simplify this thing... here is a rather simple utility program I use that removes the lines that match the input args (any of them). You can get rid of that and hard code "-9999". It also has line numbers you woudn't want (unless to debug). If you need specific columns (because your sentinel is valid in other fields?), this is reduced too far for that. If you reverse the logic, only the removed lines would be shown, to see what you are not getting.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int main(int argc, char**argv)
{   
   ifstream ifs(argv[1]);
   char line[10000];
   bool in{};
   int ctr{};
   uint64_t i;
   while(ifs.getline(line,10000))   
   {	 
     ++ctr;
     in = false;
     for(i = 2; i < argc; i++)
	   in |= (bool)(strstr(line,argv[i]));
     if(!in)
	 cout <<ctr<<"\t\t"<< line << endl;
   }
}
Last edited on
@seeplus The proper output file should look like this except lined up.

              STATION NAME                                   DATE           PCRP  TMAX  TMIN
                           BANKHEAD LOCK AND DAM 2018 03 01      0.15  46.76  41.54
                           BANKHEAD LOCK AND DAM 2018 03 02      0.45  46.94  39.92
                           BANKHEAD LOCK AND DAM 2018 03 03      0.00  46.94  37.94
                           BANKHEAD LOCK AND DAM 2018 03 04      0.00  46.76  39.94


But my output file is:

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND     
STATION		 DATE		    PRCP		    TMAX		    TMIN      
          		0		0		0
GHCND:USC00010505		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53
          		20180		82		53

Sorry about that does it make more sense now?
Last edited on
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

int main()
{
    std::ifstream infile;
    infile.open("000 copy.txt");
    if (!infile)
    {
        std::cout << "Unable to open the input file\n";
        return -99;
    }
    
    std::ofstream outfile;
    outfile.open("000 copy output.txt");
    if (!outfile)
    {
        std::cout << "Unable to open the output file\n";
        return -98;
    }
    
    outfile << "WEATHER STATION DATA\n\n";
    
    // SOURCE FILE HEADING STUFF
    std::string heading;
    getline(infile, heading);
    outfile << heading << '\n';
    
    std::string underline;
    getline(infile, underline);
    outfile << underline << '\n';
    
    // DATA LINE PROCESSING
    std::string station, station_name, date;
    int mdpr{0}, dapr{0};
    double prcp{0}, snwd(0), snow{0};
    int tmax{0}, tmin{0}, awnd{0};
    
    std::string temp;
    while ( infile >> station )
    {
        station_name.clear();
        for(int i =0; i < 6; i++)
        {
            infile >> temp;
            station_name += (temp + ' ');
        }
        infile
        >> date >> mdpr >> dapr >> prcp >> snwd >> snow >> tmax >> tmin >> awnd;
        
        // NOW THAT ALL ITEMS ARE KNOWN PLACE ALL PROCESSING/FILTERING HERE
        // BUT IN THE MEANTIME JUST PUT THE INFO/CONVERSION INTO THE output FILE
        
        outfile
        << std::setw(17) << station
        << std::setw(52) << std::right << station_name
        << std::setw(8) << date << ' '
        
        << std::setw(8) << std::left << mdpr << ' '
        << std::setw(8) << std::left << dapr << ' '
        << std::setw(8) << std::left << prcp << ' '
        << std::setw(8) << std::left << snwd << ' '
        << std::setw(8) << std::left << snow << ' '
        << std::setw(8) << std::left << ( (tmax * 0.18) + 32 ) << ' '
        << std::setw(8) << std::left << ( (tmin * 0.18) + 32 ) << ' '
        << std::setw(8) << std::left << awnd
        << '\n';
    }
    
    infile.close();
    outfile.close();
    std::cout << "End\n";
    
    return 0;
}


input file:

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- --------
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180301 -9999    -9999    0.15     0.0      -9999    82       53       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 -9999    -9999    0.45     0.0      -9999    83       44       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 -9999    -9999    0.00     0.0      -9999    83       33       -9999



output file:
WEATHER STATION DATA

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- --------
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180301 -9999    -9999    0.15     0        -9999    46.76    41.54    -9999   
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 -9999    -9999    0.45     0        -9999    46.94    39.92    -9999   
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 -9999    -9999    0        0        -9999    46.94    37.94    -9999   

@gigacapybara. I was asking about the input file format to the program - not the output. In your program, what's the format of Filtered_AL_Weather_Station.txt ??

Provide a few lines as an example
@gigacapybara
One of the factors involved in analyzing this structured data input is, once you can capture individual variable you can decide whether to ignore it for output, send it to the output file unchanged or process it beforehand then send that to output.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>

int main()
{
    std::ifstream infile;
    infile.open("wethr_stn.txt");
    if (!infile)
    {
        std::cout << "Unable to open the input file\n";
        return -99;
    }
    
    std::ofstream outfile;
    outfile.open("wethr_stn_output.txt");
    if (!outfile)
    {
        std::cout << "Unable to open the output file\n";
        return -98;
    }
    
    outfile << "WEATHER STATION DATA\n\n";
    
    // SOURCE FILE HEADING STUFF
    std::string heading;
    getline(infile, heading);
    
    heading =   "STATION_NAME                                        DATE     PRCP     TMAX     TMIN";
    outfile << heading << '\n';
    
    std::string underline;
    getline(infile, underline);
    underline = "--------------------------------------------------- -------- -------- -------- --------";
    outfile << underline << '\n';
    
    // DATA LINE PROCESSING
    std::string station, station_name, date;
    int mdpr{0}, dapr{0};
    double prcp{0}, snwd(0), snow{0};
    int tmax{0}, tmin{0}, awnd{0};
    
    std::string temp;
    while ( infile >> station )
    {
        station_name.clear();
        for(int i =0; i < 6; i++)
        {
            infile >> temp;
            station_name += (temp + ' ');
        }
        infile
        >> date >> mdpr >> dapr >> prcp >> snwd >> snow >> tmax >> tmin >> awnd;
        
        // find all records that have a bad data flag (-9999)
        // for either the PRCP, TMAX or TMIN fields
        // STATION NAME DATE PRCP TMAX TMIN
        
        if( (prcp == -9999)|| (tmax == -9999) || (tmin == -9999) )
        {
            // DO NOTHING
        }
        else
        {
            outfile
            // << std::setw(17) << station
            << std::setw(52) << std::right << station_name
            << std::setw(8) << date << ' '
            
            // << std::setw(8) << std::left << mdpr << ' '
            // << std::setw(8) << std::left << dapr << ' '
            << std::setw(8) << std::left << prcp << ' '
            // << std::setw(8) << std::left << snwd << ' '
            // << std::setw(8) << std::left << snow << ' '
            << std::setw(8) << std::left << ( (tmax * 0.18) + 32 ) << ' '
            << std::setw(8) << std::left << ( (tmin * 0.18) + 32 ) << ' '
            // << std::setw(8) << std::left << awnd
            << '\n';
        }
    }
    
    infile.close();
    outfile.close();
    std::cout << "End\n";
    
    return 0;
}


made-up input:

STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- --------
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180301 45       -9999    0.15     0.0      -9999    82       53       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 62       31       0.45     0.0      -9999    83       44       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 102      -9999    0.00     0.0      -9999    83       33       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180305 -9999    -9999    0.15     0.0      -9999    82       53       22
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180312 -78      -82      0.45     0.0      -9999    83       44       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180313 -9999    -9999    0.00     0.0      -9999    83       33       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180311 12       -9999    0.15     0.0      -9999    82       53       -9999
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 -9999    -9999    0.45     0.0      88       83       44       98
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 19       22       0.00     0.0      77       83       33       66



output:

WEATHER STATION DATA

STATION_NAME                                        DATE     PRCP     TMAX     TMIN
--------------------------------------------------- -------- -------- -------- --------
                        BANKHEAD LOCK AND DAM AL US 20180301 0.15     46.76    41.54    
                        BANKHEAD LOCK AND DAM AL US 20180302 0.45     46.94    39.92    
                        BANKHEAD LOCK AND DAM AL US 20180303 0        46.94    37.94    
                        BANKHEAD LOCK AND DAM AL US 20180305 0.15     46.76    41.54    
                        BANKHEAD LOCK AND DAM AL US 20180312 0.45     46.94    39.92    
                        BANKHEAD LOCK AND DAM AL US 20180313 0        46.94    37.94    
                        BANKHEAD LOCK AND DAM AL US 20180311 0.15     46.76    41.54    
                        BANKHEAD LOCK AND DAM AL US 20180302 0.45     46.94    39.92    
                        BANKHEAD LOCK AND DAM AL US 20180303 0        46.94    37.94    

If this really is how the input looks like:

STATION           STATION_NAME
----------------- -------------------------------------------------- ...
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US ...

Then I'd guess that the format is fixed-width fields, with headers.

1
2
3
4
5
6
7
8
std::string line;
while ( infile.getline( line ) ) {
  if (line starts with dashed line) break;
}
// print desired header-block to outfile

Entry entry;
while ( infile >> entry  && entry.cut( outfile ) );

I'd say infile >> entry reads a line, picks expected substring fields, and stores in members as necessary.

The entry.cut( outfile ) writes to stream only the desired fields in desired format.
(The "print header" could be a static member of Entry? Or two -- one for "full" and another for "cut".)
Last edited on
Thank you @againtry you really helped me understand the program better, I make things way more complicated than they need to be.
When working with fixed fields, another approach is to work out the start/length of each field from the line of ---. Then use this to obtain the data of each line into a vector. Then you can process/display each data line based upon the start/len of each element.

As a starter, consider:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <iomanip>
#include <initializer_list>

struct Elem {
	size_t strt {};
	size_t len {};
};

using Line = std::vector<Elem>;
using Data = std::vector<std::string>;

Line elemPos(std::string& line, char elem = '-') {
	Line l;

	for (size_t elend {}, strt {}; elend != std::string::npos && strt != std::string::npos; strt = line.find_first_not_of(' ', elend + 1)) {
		elend = line.find_first_not_of(elem, strt);
		l.emplace_back(strt, elend != std::string::npos ? elend - strt : (line.length() - strt));
	}

	return l;
}

Data getLine(std::string& lin, const Line& elem) {
	Data data;

	for (auto [s, l] : elem) {
		const auto s1 {lin.find_first_not_of(' ', s)};

		l = lin.find_last_not_of(' ', s + l) - s + 1;
		data.emplace_back(lin.data() + s1, l - (s1 - s));
	}

	return data;
}

void display(const Data& data, const Line& elems, const std::initializer_list<int>& cols)
{
	for (const auto cl : cols)
		std::cout << std::left << std::setw(elems[cl].len) << data[cl] << ' ';

	std::cout << '\n';
}

int main()
{
	std::ifstream weath("weather.txt");

	if (!weath)
		return (std::cout << "Cannot open input file\n"), 1;

	std::string header, line;
	const std::initializer_list cols {1, 2, 5, 8, 9};

	std::getline(weath, header);
	std::getline(weath, line);

	const auto elem {elemPos(line)};

	display(getLine(header, elem), elem, cols);
	display(getLine(line, elem), elem, cols);

	for (std::string l; std::getline(weath, l); )
		display(getLine(l, elem), elem, cols);
}


which given input file of:


STATION           STATION_NAME                                       DATE     MDPR     DAPR     PRCP     SNWD     SNOW     TMAX     TMIN     AWND     
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- --------  
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180301 -9999    -9999    0.15     0.0      -9999    82       53       -9999    
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180302 -9999    -9999    0.45     0.0      -9999    83       44       -9999    
GHCND:USC00010505                        BANKHEAD LOCK AND DAM AL US 20180303 -9999    -9999    0.00     0.0      -9999    83       33       -9999  


produces:


STATION_NAME                                       DATE     PRCP     TMAX     TMIN
-------------------------------------------------- -------- -------- -------- --------
BANKHEAD LOCK AND DAM AL US                        20180301 0.15     82       53
BANKHEAD LOCK AND DAM AL US                        20180302 0.45     83       44
BANKHEAD LOCK AND DAM AL US                        20180303 0.00     83       33


Just something to think about.
Last edited on
Topic archived. No new replies allowed.