1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
/*
* Implement suffix array to print the maximum suffix substring in a string
* Algorithm:
* Build a suffix tree and find the lowest node with multiple children
*
*/
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <ctype.h>
using namespace std;
struct node{
char ch;
node* next[26];
int treeHeight;
int childCount;
int end = 0;
};
bool checkEmptyVector(node * leaf){
for (int i = 0; i < 26; i++){
if (leaf->next[i] != NULL){
return false;
}
}
return true;
}
int countChildren(node * leaf){
int count = 0;
for (int i = 0; i < 26; i++){
if (leaf->next[i] != NULL){
count++;
}
}
if (leaf->end) count++;
return count;
}
void findMaxRepeatedString(node* root, std::string& result, int& maxChild,
int& lowestHeight, std::string str){
if (checkEmptyVector(root)){
return;
}
for (int i = 0; i<26; i++){
if (root->next[i]){
str += root->next[i]->ch;
if (root->next[i]->childCount >= maxChild &&
root->next[i]->treeHeight > lowestHeight){
maxChild = root->next[i]->childCount;
lowestHeight = root->next[i]->treeHeight;
result = str;
}
findMaxRepeatedString(root->next[i],result,maxChild,lowestHeight,str);
str = "";
}
}
}
void insertSuffixIntoTree(node* root, std::string suffix){
if (suffix.size() == 0){
root->childCount = countChildren(root);
return;
}
char c = suffix[0];
int index = tolower(c) - 'a';
if (root->next[index] == NULL){
root->next[index] = new node();
for (int k = 0; k < 26; k++){
root->next[index]->next[k] = NULL;
}
root->next[index]->ch = tolower(c);
root->next[index]->treeHeight = root->treeHeight + 1;
if (suffix.size() == 1){
root->next[index]->end = 1;
}
}
suffix.erase(suffix.begin());
root->childCount = countChildren(root);
insertSuffixIntoTree(root->next[index], suffix);
}
void buildSuffixTree(node* root, std::string str){
for (int i = str.size() - 1; i >= 0; i--){
std::string suffix = str.substr(i);
std::cout << "suffix is " << suffix << std::endl;
insertSuffixIntoTree(root, suffix);
}
}
void printSuffixTree(node* root, std::string str){
if (root->end){
std::cout << str <<": "<<root->childCount<<" : "<<root->treeHeight<<std::endl;
}
if (checkEmptyVector(root)){
return;
}
for (int i = 0; i<26; i++){
//cout << "inside for loop, i is " << i << endl;
while (root->next[i]){
str += root->next[i]->ch;
printSuffixTree(root->next[i],str);
str = "";
break;
}
}
}
int main() {
std::string str;
node* suffixRoot = new node();
suffixRoot->ch = ' ';
suffixRoot->treeHeight = 0;
for (int i = 0; i < 26; i++){
suffixRoot->next[i] = NULL;
}
std::cout << "enter the string" << std::endl;
std::cin >> str;
buildSuffixTree(suffixRoot, str);
std::string result="", str2="";
int zero = 0,zero2 = 0;
findMaxRepeatedString(suffixRoot,result,zero,zero2,str2);
std::cout << "max repeated string is " << result << std::endl;
str = "";
printSuffixTree(suffixRoot,str2);
getchar();
return 0;
}
|