1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
#define _SCL_SECURE_NO_WARNINGS
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include <string>
#include <algorithm>
#include <cstdio> // for _fileno
#include <io.h> // for _setmode
#include <fcntl.h> // for _O_U16TEXT
#include "utils.h" // for miscellaneous utility routines
#include "tee.h" // for basic ostream tee
using namespace std;
vector<wstring> Parser(const wstring& Input);
void Test_Parser();
int main()
{
// required to get VC++ runtime to diplay Unicode on
// console (though chars don't look any different to me...)
// The chars that turn up in the output file appear ok, though.
_setmode(_fileno(stdout), _O_U16TEXT);
Test_Parser();
return 0;
}
vector<wstring> Parser(const wstring& Input)
{
vector<wstring> Results(1); // assume one string to start with
wchar_t ch_last = L'\0';
const size_t Input_length = Input.length();
for(size_t i = 0; i < (Input_length); ++i)
{
wchar_t ch_this = Input[i];
if (ch_this == L'n')
{
// This character may output two possible outcomes.
// Do a, i, or u with ogonek (ą, į, ų) and the other regular.
// Now check to see if the previous characters are candidates
// for nasalization. If there hasn't been a last char yet, then
// ch_last will still be null (L'\0')
if (utils::is_nasalizable(ch_last))
{
// Now we can get into the double output
const size_t iCount_old = Results.size();
// Resize the vector array
Results.resize(2 * iCount_old);
// Copy the contents to both halves of the newly doubled array
// (using std::copy from <algorithms> header)
vector<wstring>::iterator iterHalfway = Results.begin() + iCount_old;
copy(Results.begin(), iterHalfway, iterHalfway);
// Now get nasalized form of last character
wchar_t ch_nasal = utils::nasalize(ch_last);
// Now cycle through first half of the strings and nasalize last char
for (size_t k = 0; k < (iCount_old); ++k)
{
// replace with a with ogonek
wstring& result = Results[k];
size_t pos_last = result.length() - 1;
result[pos_last] = ch_nasal;
}
}
// no need to handle 'n' here -- that's done like any other chars
}
else
{
}
// Add latest (this) char to all strings
{
const size_t iCount = Results.size();
for (size_t k = 0; k < iCount; ++k)
{
Results[k] += ch_this;
}
}
// remember last char
ch_last = ch_this;
}
return Results;
}
struct TestCase
{
const wchar_t* input;
};
const TestCase testCases[] = {
{L"n" },
{L"na" },
{L"an" },
{L"sultan"},
{L"banana"},
{L"animal"},
{L"Zoltan"},
{L"banana sultan"},
{L"animal banana"},
{L"Zoltan the animal banana sultan"},
{L" "},
{L"" }
};
const size_t testCaseCount = sizeof(testCases) / sizeof(testCases[0]);
void Test_Parser()
{
const wchar_t filePath[] = L"parser_test_results_msvc.txt";
const wchar_t UTF16BOM = L'\xFEFF'; // UTF-16 Byte Order Mark (BOM)
// This approach appears to be needed to open file you can write
// unusual unicode chars to. There might be a better way to do this,
// but I have not yet managed to track it down. :-(
FILE* fp = _wfopen(filePath , L"w");
_setmode(_fileno (fp), _O_U16TEXT);
wofstream ofs(fp);
ofs << UTF16BOM;
// Tee, so see o/p in console at the same time as writing to file.
wteestream os(std::wcout, ofs);
os << L"Test_Parser begin" << endl;
os << endl;
for(size_t index = 0; testCaseCount > index; ++index)
{
const TestCase& thisTestCase = testCases[index];
const wstring input = thisTestCase.input;
const size_t an_count = utils::count_substr(input, L"an");
os << L"input : \"" << input << "\"" << endl;
os << L" " << an_count << L" \"an\"(s)" << endl;
os << L" " << utils::raise_to(2, an_count) << L" permutation(s) expected" << endl;
os << endl;
vector<wstring> Results = Parser(input);
os << L"results : " << Results.size() << L" permutation(s) returned" << endl;
for(size_t index = 0; Results.size() > index; ++index)
{
const wstring& result = Results[index];
size_t a_with_ogonek_count = count(result.begin(), result.end(), L'ą');
os << L" Results[" << setw(2) << index << L"] = \"" << result << L"\""
<< L" [a with ogonek count : " << a_with_ogonek_count << L"]" << endl;
}
os << endl;
}
os << L"Test_Parser end" << endl;
os << endl;
fclose (fp);
}
|