Extract Values from a complicated string

I am looking for a function to extract values from a string

here are three examples of the string (it is presented all in one line to the function)

-------------------------

[{"cid":"PWER","data":[{"1507785593000":410}],"sid":"796519","units":"kWm","age":2},{"cid":"PWER_SUB","data":[{"1507785585000":108}],"sid":"796726","units":null,"age":10},{"cid":"PWER_SUB","data":[{"1507785593000":102}],"sid":"795702","units":null,"age":2},{"cid":"PWER_GAC","data":[{"1507785593000":2713}],"sid":"731610","units":null,"age":2}]

--------------------------------------------

[{"cid":"PWER","data":[{"1507785686000":409}],"sid":"796519","units":"kWm","age":15},{"cid":"PWER_SUB","data":[{"1507785682000":110}],"sid":"796726","units":null,"age":19},{"cid":"PWER_SUB","data":[{"1507785693000":102}],"sid":"795702","units":null,"age":8},{"cid":"PWER_GAC","data":[{"1507785693000":2692}],"sid":"731610","units":null,"age":8}]

-------------------------------------------------

[{"cid":"PWER","data":[{"1507785734000":412}],"sid":"796519","units":"kWm","age":1},{"cid":"PWER_SUB","data":[{"1507785730000":108}],"sid":"796726","units":null,"age":5},{"cid":"PWER_SUB","data":[{"1507785734000":107}],"sid":"795702","units":null,"age":1},{"cid":"PWER_GAC","data":[{"1507785730000":2647}],"sid":"731610","units":null,"age":5}]


--------------------------------------------------------------

Using the last one as the example

The values I am after, are "412", "108", "107" and "2647"

The values of "sid":"796519" , "sid":"796726" etc are constant, these are my markers, the values before them change, e.g. "[{"1507785734000":412}]".
The larger value changes and is fixed in length e.g. "1507785734000". The value after that is the one of interest and it varies in size e.g. "412", "108", "107" and "2647".

So ideally I pass this string into a function and it extracts the date as below;

Function input

[{"cid":"PWER","data":[{"1507785734000":412}],"sid":"796519","units":"kWm","age":1},{"cid":"PWER_SUB","data":[{"1507785730000":108}],"sid":"796726","units":null,"age":5},{"cid":"PWER_SUB","data":[{"1507785734000":107}],"sid":"795702","units":null,"age":1},{"cid":"PWER_GAC","data":[{"1507785730000":2647}],"sid":"731610","units":null,"age":5}]


Function Magic -->

Result
"sid 796519" = 412
"sid 796726" = 108
"sid 795702" = 107
"sid 731610" = 2647


If anyone could help it would be appreciated,
I am at the "Hello World" stage of learning..
Thanks
The string is just a JSON structure. Use a JSON parser such as https://github.com/nlohmann/json
This uses the standard regular expressions library. http://en.cppreference.com/w/cpp/regex
This may be hard to grasp for a beginner in C++ who is also unfamiliar with regular expressions.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <iostream>
#include <string>
#include <regex>
#include <vector>
#include <utility>

// return vector of pairs ( sid, associated data)
std::vector< std::pair<int,int> > parse_line( const std::string& line )
{
    // looks for the pattern ":nnn}],"sid":"nnn where nnn is one or more decimal digits
    static const std::regex re( R"xxx(.*?":(\d+)\}\],"sid":"(\d+))xxx" ) ;

    std::vector< std::pair<int,int> > sid_and_data ;

    std::sregex_iterator iter( line.begin(), line.end(), re ), end ;
    for( ; iter != end ; ++iter )
    {
        try { sid_and_data.emplace_back( std::stoi( (*iter)[2] ), std::stoi( (*iter)[1] ) ) ;}
        catch( ... ) {} // ignore badly formed lines
    }

    return sid_and_data ;
}

int main()
{
    const std::string str = // cursorily test with just this one string
R"***(
[{"cid":"PWER","data":[{"1507785734000":412}],"sid":"796519","units":"kWm","age":1},
{"cid":"PWER_SUB","data":[{"1507785730000":108}],"sid":"796726","units":null,"age":5},
{"cid":"PWER_SUB","data":[{"1507785734000":107}],"sid":"795702","units":null,"age":1},
{"cid":"PWER_GAC","data":[{"1507785730000":2647}],"sid":"731610","units":null,"age":5}]
)***" ;

    for( auto pair : parse_line(str) )
        std::cout << "sid " << pair.first << "  data: " << pair.second << '\n' ;
}

http://coliru.stacked-crooked.com/a/4ce0ea22ad56895c
http://rextester.com/FAB31009
Last edited on
Very longwinded compared with the slick regex version above, but hopefully clear.
(Stores more than is necessary, for future adaptations.)

Basic parse produces individual strings. Use std::stoi() if you want to convert anything into an int.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
using namespace std;

// Structs and prototypes
struct Item
{
   string cid;
   string dataPart1;
   string dataPart2;
   string sid;
   string units;
   string age;
};

void addItems( string s, vector<Item> &V );                // Parses a string and splits it into Items, adding to vector
string removeChars( string s, string getRidOf );           // Removes unwanted characters
vector<string> tokenise( string s, string delimiters );    // Breaks a string at any of delimiters

//======================================================================

int main()
{
   vector<Item> V;                                         // To hold results

   // Raw string input (because " would otherwise mean lots of escape characters)
   string s = R"([{"cid":"PWER","data":[{"1507785734000":412}],"sid":"796519","units":"kWm","age":1},
                  {"cid":"PWER_SUB","data":[{"1507785730000":108}],"sid":"796726","units":null,"age":5},
                  {"cid":"PWER_SUB","data":[{"1507785734000":107}],"sid":"795702","units":null,"age":1},
                  {"cid":"PWER_GAC","data":[{"1507785730000":2647}],"sid":"731610","units":null,"age":5}])";

   addItems( s, V );                                       // "Function magic" to get the items out of this string

   #define SP << '\t' <<
   cout << "Full data (for checking):\n";
   for ( Item e : V ) cout << setw( 12 ) << e.cid SP e.dataPart1 SP e.dataPart2 SP e.sid SP e.units SP e.age << '\n';

   cout << "\nRequired form:\n";
   for ( Item e : V ) cout << "\"sid " << e.sid << "\" = " << e.dataPart2 << '\n';
}

//======================================================================

void addItems( string s, vector<Item> &V )            // Parses the string and splits it into items
{
   s = removeChars( s, " []{}\"" );                   // Remove all unnecessary characters (including spaces)

   vector<string> tokens = tokenise( s, ",:" );       // Split at any occurrence of , or :

   unsigned int i = 0;
   while ( i < tokens.size() )                        // Assembles vector of tokens into Items (no error checking yet)
   {
      Item item;
      i++;   item.cid       = tokens[i];  i++;
      i++;   item.dataPart1 = tokens[i];  i++;
             item.dataPart2 = tokens[i];  i++;
      i++;   item.sid       = tokens[i];  i++;
      i++;   item.units     = tokens[i];  i++;
      i++;   item.age       = tokens[i];  i++;
      V.push_back( item );
   }
}

//======================================================================

string removeChars( string s, string getRidOf )
{
   string result;
   for ( char c : s ) if ( getRidOf.find( c ) == string::npos ) result += c;
   return result;
}

//======================================================================

vector<string> tokenise( string s, string delimiters )
{
   vector<string> result;
   string part;

   // Split at any delimiter
   int pstart = 0, pend;
   while ( pstart < s.size() )
   {
      pend = s.find_first_of( delimiters, pstart );
      if ( pend == string::npos) 
      {
         result.push_back( s.substr( pstart ) );
         break;
      }
      else
      {
         result.push_back( s.substr( pstart, pend - pstart ) );
      }
      pstart = pend + 1;
   }

   return result;
}

//====================================================================== 


Full data (for checking):
        PWER	1507785734000	412	796519	kWm	1
    PWER_SUB	1507785730000	108	796726	null	5
    PWER_SUB	1507785734000	107	795702	null	1
    PWER_GAC	1507785730000	2647	731610	null	5

Required form:
"sid 796519" = 412
"sid 796726" = 108
"sid 795702" = 107
"sid 731610" = 2647

Last edited on
Topic archived. No new replies allowed.