1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
|
#include<iostream>
#include<string>
#include<vector>
#include<map>
#include<array>
#include<algorithm>
#include "CharacterMatch.h"
using namespace std;
#define USE_SORT_ALGORITHM
#define USE_FIND_ALGORITHM
void TestParser();
int main()
{
TestParser();
return 0;
}
class Orthography;
// stripped down Parser
class Parser
{
private:
size_t m_iCount;
vector<wstring> m_Results;
public:
Parser() {}
~Parser() {}
vector<wstring> OrthographyParser(const wstring& wstrInput, const Orthography& orthography);
};
// stripped down Orthography
class Orthography
{
private:
std::wstring m_Name;
std::vector<CharacterMatch> m_CharacterMatches;
public:
Orthography(const std::wstring& Name) : m_Name(Name) {}
~Orthography(){}
void AddCharacterMatch(const CharacterMatch& charMatch)
{
m_CharacterMatches.push_back(charMatch);
}
const vector<CharacterMatch>& GetCharacterMatches() const
{
return m_CharacterMatches;
}
void PrepareForUse();
};
#ifndef USE_SORT_ALGORITHM
// use function for predicate
bool CompareInputLenThenVal(CharacterMatch& lhs, CharacterMatch& rhs)
{
const wstring& strL = lhs.GetInput();
const wstring& strR = rhs.GetInput();
return ((strL.length() > strR.length()) || ((strL.length() == strR.length()) && (strL > strR)));
}
#endif
void Orthography::PrepareForUse()
{
#ifdef USE_SORT_ALGORITHM
// use C++ lambda function for predicate
auto CompareInputLenThenVal = [](CharacterMatch& lhs, CharacterMatch& rhs) -> bool
{
const wstring& strL = lhs.GetInput();
const wstring& strR = rhs.GetInput();
return ((strL.length() > strR.length()) || ((strL.length() == strR.length()) && (strL > strR)));
};
#endif
// predicate evaluates to true if the LHS CharacterMatch's input string is longer
// than that of the RHS, of if the strings have equal length and the LHS string
// is lexically greater than the RHS.
sort(m_CharacterMatches.begin(), m_CharacterMatches.end(), CompareInputLenThenVal);
}
vector<wstring> Parser::OrthographyParser(const wstring& wstrInput, const Orthography& orthography)
{
// Set the size of the vector array to 1 (m_iCount)
m_iCount = 1;
m_Results.resize(m_iCount); // TODO get rid of m_iCount and use m_Results.size() / resize() instead
// Set the first item in the vector array to an empty string
m_Results[0] = L"";
#if 0 // not used
// Map to hold the orth inputs and the number of times each one occurs in the user's input
map<wstring, int> matches;
#endif
size_t index = 0; // size_t better than int here
const vector<CharacterMatch>& charMatches = orthography.GetCharacterMatches();
#ifdef USE_FIND_ALGORITHM
auto MatchFirstInput = [&wstrInput, &index] (const CharacterMatch& charMatch) -> bool
{
const wchar_t* input = charMatch.GetInput();
return equal(input, input + wcslen(input), wstrInput.begin() + index);
};
#endif
while(index < wstrInput.size())
{
// typedef, to save typing
typedef vector<CharacterMatch>::const_iterator iter_type;
#ifdef USE_FIND_ALGORITHM
// using find_if algorithm
iter_type matchIter_match = find_if(charMatches.begin(), charMatches.end(), MatchFirstInput);
#else
// long hand...
// use iter rather than bool
iter_type matchIter_match = charMatches.end();
// Now iterate through the string looking for a match starting with the largest input string
// and work backwards (THIS ASSUMES VECTOR IS CORRECTLY SORTED)
for (iter_type matchIter2 = charMatches.begin(); matchIter2 != charMatches.end(); matchIter2++)
{
const wchar_t* input = matchIter2->GetInput();
if(equal(input, input + wcslen(input), wstrInput.begin() + index))
{
matchIter_match = matchIter2;
break;
}
}
#endif
// Then...
if (matchIter_match != charMatches.end())
{
// If there was a match, append the "output" value to the result and adjudt
// the index position accordingly.
const wchar_t* input = matchIter_match->GetInput();
const wchar_t* output = matchIter_match->GetOutput();
m_Results[0] += output;
index += wcslen(input);
}
else
{
// If there as no match found, just copy current char across and update
// the index by one and try again...
m_Results[0] += wstrInput[index];
++index;
}
}
return m_Results;
}
void TestParser()
{
Orthography ortho(L"Test");
ortho.AddCharacterMatch(CharacterMatch(L"oo" , L"u"));
ortho.AddCharacterMatch(CharacterMatch(L"u" , L"w"));
ortho.AddCharacterMatch(CharacterMatch(L"i" , L"y"));
ortho.AddCharacterMatch(CharacterMatch(L"sc" , L"sh"));
ortho.AddCharacterMatch(CharacterMatch(L"rs" , L"rz"));
ortho.AddCharacterMatch(CharacterMatch(L"s" , L"zz"));
ortho.AddCharacterMatch(CharacterMatch(L"sch", L"sk"));
ortho.AddCharacterMatch(CharacterMatch(L"ith" , L"iz"));
{
const vector<CharacterMatch>& charMatches = ortho.GetCharacterMatches();
wcout << L"Test char matches" << endl;
vector<CharacterMatch>::const_iterator matchIter;
for (matchIter = charMatches.begin(); matchIter != charMatches.end(); ++matchIter)
{
const wchar_t* input = matchIter->GetInput();
const wchar_t* output = matchIter->GetOutput();
wcout << input << L" -> " << output << endl;
}
wcout << endl;
}
ortho.PrepareForUse();
{
const vector<CharacterMatch>& charMatches = ortho.GetCharacterMatches();
wcout << L"Sorted char matches" << endl;
vector<CharacterMatch>::const_iterator matchIter;
for (matchIter = charMatches.begin(); matchIter != charMatches.end(); ++matchIter)
{
const wchar_t* input = matchIter->GetInput();
const wchar_t* output = matchIter->GetOutput();
wcout << input << L" -> " << output << endl;
}
wcout << endl;
}
{
wstring text = L"school is out, let's scoot! down with teachers!";
Parser parser;
vector<wstring> results = parser.OrthographyParser(text, ortho);
wcout << L"input text:" << endl;
wcout << text << endl;
wcout << endl;
wcout << L"output text:" << endl;
const size_t count = results.size();
for(size_t index = 0; index < count; ++index)
{
wcout << results[index] << endl;
}
wcout << endl;
}
}
|