Write data after the search keyword

Hi,

I have a file of 1gb, I'm searching for the term "topics" in it, after finding the search term I want to write that particular sentence (where "topics" present) into another file
So what's the problem?

- Can you open a file using ifstream?

- Can you read lines from the file using fstream::getline?

- Can you search a string using string::find ?

yes, i can able to do all the above.

My problem is,
e.g., I'm searching for "topics", after finding I want to write the text after topics like "QC Physics","QD Chemistry","QH301 Biology" to another file.

It seems like following in my json file.
"topics":["QC Physics","QD Chemistry","QH301 Biology"]
Using the regular expressions library:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include <iostream>
#include <string>
#include <regex>
#include <vector>

std::string extract_topics( const std::string& line )
{
    // note: \s* between components allows optional white space characters
    //       (.*) captures the text between []
    static const std::regex topics_re( R"****(^\s*"topics"\s*:\s*\[\s*(.+)\s*\]\s*$)****" ) ;

    std::smatch match_results ;
    if( std::regex_match( line, match_results, topics_re ) ) return match_results[1];

    return {} ; // no match, return empty string
}

std::vector<std::string> split_topics( const std::string& topics )
{
    static const std::regex sep_re( "[\",]" ) ; // separators are quote or comma

    std::vector<std::string> result ;

    // -1 as the last constructor arg: get the parts that are not matched (ie the tokens between aeparators).
    std::sregex_token_iterator iter( topics.begin(), topics.end(), sep_re, -1 ), end ;
    for( ; iter != end ; ++iter ) if( iter->length() > 0 ) result.push_back(*iter) ;

    return result ;
}

int main() // very simple test driver
{
    const std::string json_line = R"****("topics" : [ "QC Physics","QD Chemistry","QH301 Biology"])****" ;

    const std::string topics_str = extract_topics(json_line) ;

    std::cout << "text: " << json_line << '\n'
              << "\nextracted: " << topics_str << '\n'
              << "\nsplit:\n-------\n" ;

    for( const std::string& topic : split_topics(topics_str) ) std::cout << '\t' << topic << '\n' ;
}

https://rextester.com/XHRBF24348
May I know what is " R"****"? Because I'm getting error (expected a ')' )
Raw string literals:
http://www.stroustrup.com/C++11FAQ.html#raw-strings

Requires C++11 or later; if there is no typo, the error is because C++11 support is lacking.
Thank you @JLBorges
// Json_Parsing.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "fstream"
#include <string>
#include "iostream"
#include "regex"

using namespace std;

string p;
string line;
int _tmain(int argc, _TCHAR* argv[])
{
int count = 0;
ifstream file;
file.open("F://project//DataSet//107.json",ios::in);
ofstream outfile,outfileTopics;
outfile.open("F://project//DataSet//107_New.json");
outfileTopics.open("F://project//DataSet//107_Topics.txt");

cout << "Reading successful"<<endl;
if(file.is_open())
{
while(!file.eof())
{
getline(file,line);
count++;
//cout << "no.of lines" << count <<endl;
if(p.find("fullText"))
{
if (p.find("topics"))
{
smatch m;
regex e("(\s(topics)*:*\[*(\w)*\]*\,)*");
if(regex_search(line,m,e))
{
for(int i=0;i<=count;i++)
{
cout << m[i].str();
outfileTopics << m[i].str() << endl;
outfile << line << endl;
}
cout << "writing sucessfull" <<endl;
}
}
}
}
}
}

I'm using the above code, but, i'm not getting any string in m[i]...sorry, to bother you...I'm new to c++ n i'm getting many doubts
Something like this, perhaps:
(without using raw string literals, untested code)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include <iostream>
#include <string>
#include <regex>
#include <vector>
#include <fstream>

std::string extract_topics( const std::string& line )
{
    // note: \s* between components allows optional white space characters
    //       (.*) captures the text between []
    static const std::regex topics_re( "^\\s*\"topics\"\\s*:\\s*\\[\\s*(.+)\\s*\\]\\s*$" ) ;

    std::smatch match_results ;
    if( std::regex_match( line, match_results, topics_re ) ) return match_results[1];

    return {} ; // no match, return empty string
}

std::vector<std::string> split_topics( const std::string& topics )
{
    static const std::regex sep_re( "[\",]" ) ; // separators are quote or comma

    std::vector<std::string> result ;

    // https://en.cppreference.com/w/cpp/regex/regex_token_iterator
    // -1 as the last constructor arg: get the parts that are not matched (ie the tokens between separators).
    std::sregex_token_iterator iter( topics.begin(), topics.end(), sep_re, -1 ), end ;
    for( ; iter != end ; ++iter ) if( iter->length() > 0 ) result.push_back(*iter) ;

    return result ;
}

int main()
{
    const std::string in_file_name = "F://project//DataSet//107.json" ;
    const std::string out_file_name = "F://project//DataSet//107_New.json" ;
    const std::string topics_file_name = "F://project//DataSet//107_Topics.txt" ;

    std::ifstream in_file(in_file_name) ;
    if( !in_file.is_open() )
    {
        std::cout << "failed to open input file " << in_file_name << '\n' ;
        return 1 ;
    }
    
    // error handling for failure to open output files elided for brevity 
    std::ofstream out_file(out_file_name) ;
    std::ofstream topics_file(topics_file_name) ;

    std::string line ;
    int count_lines = 0 ;
    int count_topics = 0 ;

    while( std::getline( in_file, line ) ) // for each line read from the input file
    {
        const std::string topics_str = extract_topics(line) ;

        if( !topics_str.empty() ) // if we could extract the topics segment
        {
            ++count_lines ;
            out_file << line << '\n' ; // write this line to out_file

            // split the topics segment into individual topics
            const std::vector<std::string> topics_vector = split_topics(topics_str) ;
            count_topics += topics_vector.size() ;

            // and write each topic to the topics_file
            for( const std::string& topic : topics_vector ) topics_file << topic << '\n' ;
        }
    }

    std::cout << count_topics << " topics were extracted from " << count_lines << " lines\n" ;
}
Thank you once again
Topic archived. No new replies allowed.