Interpreting a string using boost::regex

Hi everyone. Awasome forum ...

How could I make the following code more general and more powerful?
It is thought for interpreting a string of char's contained in argv[1].

The purpose of the program is reading the string and interpreting the date user was meant to type -using boost::regex and perl sintax.

For instance, the string could be !QWETGsdfg21::-!jun-.?1992 and I want to print 21 - June - 1992, or the string could be asfgw|21.6!!||dfslk1992 and I want to print 21 - June - 1992, as well. I've done some work up to now, and looking downward you can see the results. But I feel the following code is not so general and that I'm not considering lots of cases in which, though the string might contain a good date-information, program will find that too difficult and won't evaluate that. So, please, give me some hints to make it better. Thank you, in advance.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include <iostream>
#include <vector>

#include <boost/regex.hpp>


int
main (int argc, char *argv[])
{

  if (argc<2)
   {
    std::cerr << "Usage: " << argv[0] << " <dd/mm/yy>" << std::endl;
    return 1;
   }

  std::string req_date = argv[1];

  std::vector<boost::regex> month_regexp;

  month_regexp.push_back(boost::regex("\\A(jan)|[^\\d\\w](jan)|[^\\d\\w]1[^\\d\\w]|[^\\d\\w]01[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(feb)|[^\\d\\w](feb)|[^\\d\\w]2[^\\d\\w]|[^\\d\\w]02[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(mar)|[^\\d\\w](mar)|[^\\d\\w]3[^\\d\\w]|[^\\d\\w]03[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(ap)|[^\\d\\w](ap)|[^\\d\\w]4[^\\d\\w]|[^\\d\\w]04[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(may)|[^\\d\\w](may)|[^\\d\\w]5[^\\d\\w]|[^\\d\\w]05[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(jun)|[^\\d\\w](jun)|[^\\d\\w]6[^\\d\\w]|[^\\d\\w]06[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(jul)|[^\\d\\w](jul)|[^\\d\\w]7[^\\d\\w]|[^\\d\\w]07[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(au)|[^\\d\\w](au)|[^\\d\\w]8[^\\d\\w]|[^\\d\\w]08[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(se)|[^\\d\\w](se)|[^\\d\\w]9[^\\d\\w]|[^\\d\\w]09[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(o)|[^\\d\\w](o)|[^\\d\\w]10[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(n)|[^\\d\\w](n)|[^\\d\\w]11[^\\d\\w]",boost::regex_constants::icase));
  month_regexp.push_back(boost::regex("\\A(d)|[^\\d\\w](d)|[^\\d\\w]12[^\\d\\w]",boost::regex_constants::icase));

  unsigned int month_index;

  bool valid_day    = false;
  bool valid_month  = false;
  bool valid_year   = false;

  for (unsigned int i = 0; i < month_regexp.size(); ++i)
  {
   if (boost::regex_search(req_date, month_regexp[i],
                          boost::regex_constants::format_perl))
   {
    month_index = i +1;
    #ifdef DEBUG
    std::cerr << "DEBUG : month_index == " << month_index << std::endl;
    #endif
    valid_month = true; 
   }
  }

  // We're using it for printing the regex matched.
  boost::smatch m;

  std::string tmp_string;

  std::string day_str;

  // In prose: if the number related to the day is at the beginning
  // of the string, if it's among NON-digits or if it's at the end of the string
  // -with NON-digits on the left.
  if (boost::regex_search(req_date, m,
                         boost::regex("\\A(\\d{1,2})\\D|\\D(\\d{1,2})\\D|\\D(\\d{1,2})\\Z", boost::regex_constants::icase),
                         boost::regex_constants::format_perl))
  {
   #ifdef DEBUG
   std::cerr << "DEBUG : m.str() == " << m.str() << std::endl;
   #endif
   tmp_string = m.str();
   boost::regex_search(tmp_string, m,
                      boost::regex("\\d{1,2}"),
                      boost::regex_constants::format_perl);
   day_str = m.str();
   #ifdef DEBUG
   std::cout << "DEBUG : day_str == " << day_str << std::endl;
   #endif
   valid_day = true;
  }
  else
  {
   std::cerr << "Sorry, but I couldn't extract "
             << "a valid day from the starting string. \n";
   valid_day = false;
  }

  std::string year_str;
  if (boost::regex_search(req_date, m,
                         boost::regex("\\A(\\d{4})\\D|\\D(\\d{4})\\D|\\D(\\d{4})\\Z", boost::regex_constants::icase),
                         boost::regex_constants::format_perl))
  {
   tmp_string = m.str();
   boost::regex_search(tmp_string, m,
                      boost::regex("\\d{4}", boost::regex_constants::icase),
                      boost::regex_constants::format_perl);
   year_str = m.str();
   #ifdef DEBUG
   std::cout << "DEBUG : year_str == " << year_str << std::endl;
   #endif
   valid_year = true;
  }
  else
  {
   std::cerr << "Sorry, but I couldn't extract "
             << "a valid year from the starting string. \n";
   valid_year = false;
  }

  if(valid_day && valid_month && valid_year)
  {
   std::cout << "Date typed is : "
             << day_str     << " - "
             << month_index << " - "
             << year_str    << std::endl;
  }
  else
  {
   std::cout << "Cannot understand data typed." << std::endl;
  }

return 0;

}

//
//
// EXAMPLE:
// Input  => ./ex1.x 21jun1992
// Output => Data typed is : 21 - 6 - 1992
//
// 
Topic archived. No new replies allowed.