Assigning string tokens to an array

Good day,

I need to dynamically build a control file for a SQL loader but when tokenizing line values read from a file i am unable to work/assign the last value to a variable even though i can print the actual token value while in the loop.

Below is the code snippet i have and would appreciate all the help i can get:

//headElement declared as headElement[42]
//getCol function is used to decode column keys present in line being read
istringstream ss(line);

wfile << "LOAD DATA " << endl <<
"INFILE * " << endl <<
"APPEND INTO TABLE cdruser.x " << endl <<
"FIELDS TERMINATED BY ',' " << endl <<
"(" << endl;

while (getline(ss, token, ','))
{
//cout << arrayCountHeader << " " <<token<<endl;
headElement[arrayCountHeader] = getCol(token);
cout<<token<< " col " <<getCol(t1)<<endl;
if (arrayCountHeader == (sizeof(headElement) / sizeof(headElement[0])) - 1)
{
wfile << headElement[arrayCountHeader] <<",FILENAME) BEGINDATA " << endl;
wfile.close();
}

wfile.open("test.txt", std::ios_base::app | std::ios_base::out);
wfile << headElement[arrayCountHeader] << "," << endl;
wfile.close();

arrayCountHeader ++;

}

file structure:
100,101,102,103,120,200,201,202,203,204,206,209,2191,240,300,301,400,401,402,403,4201,4202,4203,500,620,622,624,626,627,628,631,700,704,709,741,742,800,6200,
6201,6202,6203,6204

Regards,
Alas, that's not quite enough information to help. Could you make a small program (complete with main() that compiles) that reproduces the problem?

Usually the cause of such things is a mismatch between attempts to read from file, action taken when the file is correctly read, and the condition that notices failed attempts.

Another thing that catches my eye is the way you are handling 'arrayCountHeader' -- I'm not sure what you are trying to do with it or how the size of your 'headElement' array has anything to do with it...

One other issue is the fact that you are repeatedly opening and closing a file to append a small chunk of data to it at a time. Don't do that. Open your file before any handling loop, process your data and append to it, then close it. Any external process waiting on the file will get the data quickly enough. (More quickly, actually.)
Hi Duaos,

Thanks for the reply here's the updated code including main and minimize the open and closing of the file being worked with (please note there are two files one for reading data and the other for writing a temp file):

#include <iostream>
#include <ios>
#include <fstream>
#include <string>
#include <sstream>
#include <cstddef>

using namespace std;
string lineRef;
char *my_file;

string decodeElement(string element, string Value)
{
string ReturnText;

if (element == "100" )
{
if (Value == "0")
ReturnText = "0-Unique";
else if (Value == "1")
ReturnText = "1-Initial";
else if (Value == "2")
ReturnText = "2-Intermediate";
else if (Value == "3")
ReturnText = "3-Final";
else if (Value == "4")
ReturnText = "4-Converted to Unique - time";
else if (Value == "5")
ReturnText = "5-Converted to Unique - duration";
else if (Value == "6")
ReturnText = "6-Aggrgated - normal";
else
ReturnText = "Invalid";

return ReturnText;

}else
{
ReturnText = Value;

return ReturnText;
}
}

string getCol(string token)
{

string col;

if (token == "100" )
{
col = "UNICITY_TYPE";

}else if (token == "101" )
{
col = "CALL_TYPE";

}else if (token == "102" )
{
col = "RECORD_TYPE";

}else if (token == "103" )
{
col = "TRAFFIC_TYPE";

}else if (token == "120" )
{
col = "CALLING_MSISDN_TYPE_NUM";

}else if (token == "200" )
{
col = "CALLING_MSISDN";

}else if (token == "201" )
{
col = "ORIG_CALLING_NUM_PLAN";

}else if (token == "202" )
{
col = "CALLING_IMSI";

}else if (token == "203" )
{
col = "CALLING_CELL_ID";

}else if (token == "204" )
{
col = "CALLING_IMEI";

}else if (token == "206" )
{
col = "CALLING_NUM_INT_FORMAT";

}else if (token == "209" )
{
col = "CALLING_IP_PDP";

}else if (token == "2191" )
{
col = "URL";

}else if (token == "240" )
{
col = "FORMAT_RESULT";

}else if (token == "300" )
{
col = "STARTING_TIME";

}else if (token == "301" )
{
col = "STARTING_DATE";

}else if (token == "400" )
{
col = "DURATION";

}else if (token == "401" )
{
col = "LOCAL_SEQ_NUM";

}else if (token == "402" )
{
col = "INTER_SEQ_NUM";

}else if (token == "403" )
{
col = "CHARGING_ID";

}else if (token == "4201" )
{
col = "DATA_VOL_UPLINK";

}else if (token == "4202" )
{
col = "DATA_VOL_DOWNLINK";

}else if (token == "4203" )
{
col = "TOTAL_DATA_VOL";

}else if (token == "500" )
{
col = "CHARGE_TYPE";

}else if (token == "620" )
{
col = "SWITCH_ID";

}else if (token == "622" )
{
col = "GGSN_IP";

}else if (token == "624" )
{
col = "SGSN_IP";

}else if (token == "626" )
{
col = "ACCESS_POINT_NI";

}else if (token == "627" )
{
col = "ACCESS_POINT_OI";

}else if (token == "628" )
{
col = "NODE_ID";

}else if (token == "631" )
{
col = "PLMN";

}else if (token == "700" )
{
col = "TRANS_SMS_HANDLE";

}else if (token == "704" )
{
col = "CAUSE_RECORD_CLOSE";

}else if (token == "709" )
{
col = "RAT_TYPE";

}else if (token == "741" )
{
col = "RATING_GROUP";

}else if (token == "742" )
{
col = "SERVICE_ID";

}else if (token == "6200" )
{
col = "UNPACKER_NAME";

}else if (token == "6201" )
{
col = "UNPACKER_VERSION";

}else if (token == "6202" )
{
col = "PLATFORM_MANUFACTURER";

}else if (token == "6203" )
{
col = "PLATFORM_MODEL";

}else if (token == "6204" )
{
col = "PLATFORM_VERSION";

}else if (token == "800" )
{
col = "STATE";

}
return col;
}

string trim(string& s)
{
size_t p = s.find_first_not_of(" \t");
s.erase(0, p);

p = s.find_last_not_of(" \t");
if (string::npos != p)
s.erase(p+1);

return s;
}

int main(int argc, char *argv[]){

if (argc != 2)
{
cout<< "\nUsage: " << argv[0] << " <filename>" <<endl;
return(1);
}

my_file = argv[1];
ifstream ofile;
ofstream wfile("test.txt");
string line, token, headElement[42];
int rownum = 0, numLines = 0, arrayCountHeader = 0;

ofile.open(my_file);

if (ofile.is_open())
{
while (getline(ofile, line))
{
numLines ++;
}
ofile.close();
}
else cout << "Unable file to count number of lines for array";

//string elementData[numLines-2][42];

ofile.open(my_file);

if (ofile.is_open())
{
while (getline(ofile, line))
{
rownum ++;

if (rownum == 2)
{
//get header elements per order in file
istringstream ss(line);
lineRef = line;

wfile << "LOAD DATA " << endl <<
"INFILE * " << endl <<
"APPEND INTO TABLE cdruser.x " << endl <<
"FIELDS TERMINATED BY ',' " << endl <<
"(" << endl;

while (getline(ss, token, ','))
{
//cout << arrayCountHeader << " " <<token<<endl;
headElement[arrayCountHeader] = getCol(token);

cout<<token<< " col " <<getCol(token)<<endl;
if (arrayCountHeader == (sizeof(headElement) / sizeof(headElement[0])) - 1)
{
wfile << headElement[arrayCountHeader] <<",FILENAME) BEGINDATA " << endl;
}else
{
wfile << headElement[arrayCountHeader] << "," << endl;
}

arrayCountHeader ++;

}
}
}
wfile.close();
ofile.close();
}
else cout << "Unable to open file";

return 0;
}


As for the first question i'm using 'arrayCountHeader' to assign each token read to the array as well as validate the next item in the array against the array size while in the loop.
Before we go further, I'm pretty sure either I'm missing something important or you are.

Given a file with those number, you need to convert those number into things like "PLATFORM_MODEL"? (Because that's what it looks like you are trying to do.)

Typically a database will dump a fixed-format CSV file. Each column represents a value. So the "PLATFORM_MODEL" column will contain the numbers (or whatever the ID string is) for the "platform model" associated with the record.

You've got to create a SQL query that does what? Selects certain columns?
The purpose of the exercise is to create a SQL Loader control file to be able to insert data to the database. So whats present in the file is a bunch of numbers that needs to be decoded based on the specification given by the vendor.

Whats not present in the dummy file i gave was the actual data (the numbers you are referring to)

Hope it makes a bit more sense now
Almost. So your input file is just a single record, containing the vendor-specific numbers that represent column values, and you want to turn that into a single SQL*Loader control file?

What's special about line 2 of your input data?

Sorry to keep asking so many dumb questions, but your (non-functional) code only helped so much.

The huge if..else if... statement needs to be simplified. If you plan to reuse this program, the data encoded in that statement should be a file itself, which you can load into a std::map <unsigned, string>. Then your input file (the one with all the numbers) will parameterize the map to produce the FILE LOAD.

For example, given the vendor data lookup file:

100 UNICITY_TYPE
101 CALL_TYPE
102 RECORD_TYPE
103 TRAFFIC_TYPE
120 CALLING_MSISDN_TYPE_NUM
...

and the input file:

101,103

you now have enough information to produce a SQL*Loader control file like:


LOAD DATA
INFILE *
APPEND INTO TABLE quux
FIELDS TERMINATED BY ','
(
CALL_TYPE,
TRAFFIC_TYPE,
FILENAME
)
BEGINDATA
...

To begin, first load the lookup table file into a std::map:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef std::map <unsigned, string> lookup_type;

lookup_type load_lookup( const std::string& filename )
{
  lookup_type result;
  ifstream f( filename.c_str() );
  while (f)
  {
    unsigned key;    f >> key;
    string value;    f >> value;
    if (f.fail()) break;
    result[ key ] = value;
  }
  return result;
}

Now you can use it to create your control file. Again, make yourself a function.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
bool write_control_file(
  ofstream& f,  // f will have to have already been opened for writing
  const string& control,  // the string with vendor numbers (that you read from your input file)
  const string& table_name,  // the name of the table to manipulate with this control file
  const lookup_type& lookup )  // our vendor code lookup table
{
  f << "LOAD DATA\n"
       "INFILE *\n"
       "APPEND INTO TABLE " << table_name << "\n"
       "FIELDS TERMINATED BY ','\n"
       "(\n";

  istringstream ss( control );
  string s;
  while (getline( ss, s, ',' ))
  {
    istringstream ss( s );
    unsigned code;
    ss >> code;
    if (!ss.eof()) return false;  // file is corrupt!
    if (map.count( code ) != 1)
      f << "UNKNOWN,\n";  // or however you wish to handle this error
    else
      f << map[ code ] << ",\n";
  }

  f << "FILENAME\n"
       ")\n"
       "BEGINDATA\n";

  return true;
}

The structure of this code is to maintain flexibility and make it easy to handle changes.

Hope this helps.
Topic archived. No new replies allowed.