Reading in strings & merging

I have a final project in my intro C++ class that I'm having trouble with. I have made two classes (a list class and a strand class) and I have to read in user input for the strands, and then find a match within a user defined threshold and lastly merge the two strands and their labels together.

I have included the two header and implementation files as well as my main that will be running the program. makefile is included as well.

My professor has taught the basics of this stuff, but then threw me in the deep end with this project.

Thanks for any responses/help!!!!!

main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
  /*DNAmain.cpp

*/

#include "DNAList.h"
#include "DNAtype.h"
#include <iostream>
#include <string>

using namespace std;

int main (){

   //Variable declaration
   ListItemType DNAList;
   int numStrands;
   DNAitem s1;
   DNAitem s2;
   DNAitem strand;
   int size;
   string matchThresh;

   //Get input
   cout << "How many DNA strands do you want to enter?" << endl;
   cin >>  numStrands;
   for (int i = 0; i < numStrands; i++){
      size = 0;
      if ((size < numStrands)||(size = numStrands)){
         cout << "Enter the label and strand you want to insert." << endl;
         cin >> s1 >> s2;
         DNAType strand;
         strand.set (s1, s2);
         DNAList.insert (strand);
         size ++;
         }
   }  
   
   //print everything in strands/DNAlist
   cout << (DNAList.retrieve(i)).getLabel() <<endl;
   cout << (DNAList.retrieve(i)).getBases() <<endl;

   //get threshhold
   cout << " " << endl;
   cout << "Matching Threshold: " << matchThresh << endl;
   
   
   //Function to check for and merge matches




return 0;
}



DNAList.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*DNAList.h

*/

#include <iostream>
#include <string>
#include "DNAtype.h"

using namespace std;

typedef DNAitem ListItemType;

class ListType {
   public:
      ListType ();
      int getLength () const;
      void insert(ListItemType item);
      ListItemType retrieve (int index) const;
      int search (ListItemType item) const;
      void remove (ListItemType item);
      void sort ();
      void print ();
      
   private:
      static const int MAX_SIZE= 100;
      int actualSize;
      ListItemType array[MAX_SIZE];

};


DNAList implementation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
DNAList.cpp
#include "DNAList.h"

//Purpose: Constructor, initialize actualSize to "0"

ListType::ListType () {
   actualSize=0;
}

//Purpose: Get length of list

int ListType::getLength() const{
   return actualSize;
}

//Purpose: Insert a new item into the list

void ListType::insert(ListItemType item){
   if (actualSize < MAX_SIZE){
      array[actualSize]= item;
      actualSize ++;
   }
}


//Purpose: remove an item from the list

void ListType::remove(ListItemType item){
   //look for item, find position
   int position= (*this).search(item);
   if (position !=-1){
      //copy everything to the right over 1 cell to the left
      for (int i= position; i < (actualSize - 1); i++){
         array [i] = array [i+1];
      }
      actualSize --;
   }
}


//Purpose: retrieve a specific element from the list

ListItemType ListType::retrieve (int index) const{
   
   return array [index];
}


//Purpose: search list for specific item

int ListType::search (ListItemType item) const {
   //sequential search
   int position= -1;
   int index= 0;
   while ((position == -1) && (index < actualSize)){
      if (item == array [index]){
         position = index;
      }
      else {
         index ++;
      }
    }
    return position;

}


//Purpose: sort list

void ListType::sort (){
   //bubble sort
   for (int i= 0; i < actualSize - 1; i++){
      for (int j= 0; j < actualSize - i; j++){
         if (array [j] > array [j+1]){
            ListItemType temp = array [j];
               array [j]= array [j+1];
               array [j+1] = temp;
         }
     }
   }
}

//Print array

void ListType::print (){
   
   for (int i = actualSize - 1; i >= 0; i--)
      cout << array [i];
   
}


DNAtype header
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
DNAtype.h
/* DNAtype.h

*/

#include <iostream>
#include <string>

using namespace std;

typedef string DNAitem;

class DNAType {
   public:
      DNAType ();
      void set (string, string);
      string getLabel (); //returns value of label (s1)
      string getBases (); //actg char that are inputted (s2)
      //bool operator ==;
      //bool operator >;
   
   private:
      DNAitem DNAstrand;
      DNAitem DNAlabel;
      DNAitem DNAbase;
      int matchThresh;
         
   
   
};


DNAtype implementation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
DNAtype.cpp
/*DNAtype.cpp

*/

#include "DNAtype.h"

//Purpose: Constructor

DNAType::DNAType () {
   matchThresh = 0;
}

//Purpose: set function
void DNAType::set (string s1, string s2){
   DNAlabel= s1;
   DNAbase= s2;

}

//Purpose: get the bases
string DNAType::getBases (){
   return DNAbase;
}

//Purpose: get the label

string DNAType::getLabel (){
   return DNAlabel;
}



makefile
1
2
3
4
5
6
7
8
DNAmain: DNAmain.o DNAList.o DNAtype.o
  g++ DNAmain.o DNAList.o DNAtype.o -o DNAmain
DNAList.o: DNAList.cpp DNAList.h
   g++ -c DNAList.cpp
DNAtype.o: DNAtype.cpp DNAtype.h
   g++ -c DNAtype.cpp
DNAmain.o: DNAList.h DNAtype.h DNAmain.cpp
   g++ -c DNAmain.cpp
It would probably make it easier if you supplied some examples of strands and what the output would look like for different thresholds. Is the threshold a minimum/maximum value?
Sorry about that! Haven't slept much. Here are examples of different senarios that I have to check and merge:

case 1:
Threshold= 5
Strands: X= acggtcacgg, Y= gtcacatta
Output:
XY
acggtcacatta

case 2:
Threshold= Doesn't apply as Y is contained completely in X
Strands: X= acggtcac, Y= cggt
Output:
XY
acggtcac

case 3:
No matches or only matches with size < threshold.

Expected Input format:
6 //matching threshold
7 //number of strands processed

B0
tgaaaattcctttctattttaggccc

C0
tgaaaattcctttctattttaggcccatgcaat

C1
ggcattagggcggttaa

..so on and so forth

Expected Output:
If all strands can combine:

Success
C0B1: tgaaaattcctttctattttaggcccatgcaatggcattagggcggttaa

If only some can be combined:
(different example from above)
Failure:
X1= acct
X2= cggtca
X3Y4= gtttta

What the assignment seems to be requiring is an algorithm which will extract a substring of a length specified by the threshold from one string and then search for a match in another. Once a match is found, you probably have to do a comparison between the two strands to see how many bases actually match between the two strands. Then you can chop the end of one and append the end of the other strand to it (the merge). Tell me if this sounds right to you.

Take your example 1:
Strands: X= acggtcacgg, Y= gtcacatta
I guess you can start with strand Y since it is shorter. Threshold is 5 so you take a substring of Y which is the first five characters: gtcac
Now you find the substring in X, and you see that there is a match starting at index 4 of string X. Now you advance 5 indices from the match position in X and the start of Y and start comparing. Immediately you find there are no more matches. So you substring X to get acggtcac and you substring the rest of Y to get atta and then you put them together to get the output.

I don't understand case 3. It seems the first and second parts contradict themselves.
Topic archived. No new replies allowed.