Efficient way to covert a file to DAG

Hello everyone

I have to read a file which describes a circuit and convert it to Directed Acyclic Graph.

The files formats looks like:

1
2
3
4
5
6
7
8
9
10
11
# of nodes 11
# of edges 12

INPUT(G1gat)
INPUT(G2gat)
INPUT(G3gat)
OUTPUT(G10gat)
OUTPUT(G11gat)
G10gat = NAND2(G1gat, G2gat)
G11gat = NAND2(G2gat, G3gat)
...


For Graph construction i've decided to use the following approach:
1
2
3
4
5
6
7
Read file line by line
if(line.find(INPUT) or line.find(OUTPUT))
assign what inside the "()" as Graph node
if(line.find("="))
assign left-side of "=" as child node 
assign each word inside "()" as parents of the child //the Gates arguments
add-edge from each parent to the child node 

In graph construction the type of gates is not important. just connect the inputs of them to output by edges no matter what gate is mentioned (nand, or xor, not, ...)

I've done the following:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
int main ()
{
	
	vector<string> node(11);

	Graph G(11, node);
	int** edge = new int*[12];

	ifstream fin("bench/c17.bench");
	string line , word, token , gate , parents, child;

	vector<string> PIs, POs, GATEs, FANINs;
	int nodes =0; int edges =0; int pi=0, po=0;
	int node_counter = 0;
	
	map <vector<string>, string> cells;
	while (getline(fin, line))
	{
		int child_indx = 0, parent_indx = 0;
		
		//-----------------------------------------------------------
		if (line.find("INPUT") != string::npos)
		{
			string Pin = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1);
			PIs.push_back(Pin);
			//---------------------------------------------------
			if (verify(Pin, node, node_counter) == -1)
			{
				node[node_counter] = Pin;
				G.nodes[node_counter] = node[node_counter];
				node_counter++;
			}
			//---------------------------------------------------
		}
		if (line.find("OUTPUT") != string::npos)
		{
			string Pout = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1);
			POs.push_back(Pout);
			//---------------------------------------------------
			if (verify(Pout, node, node_counter) == -1)
			{
				node[node_counter] = Pout;
				G.nodes[node_counter] = node[node_counter];
				node_counter++;
			}
			//---------------------------------------------------
		}
		if (line.find("=") != string::npos)
		{
			cout << edges++ << endl;
			child = line.substr(0, line.find("=") - 1);
			child.erase(remove_if(child.begin(), child.end(), isspace), child.end());
			
			//---------------------------------------------------
			if (verify(child, node, node_counter) == -1)
			{
				node[node_counter] = child;
				G.nodes[node_counter] = node[node_counter];
				node_counter++;
			}
			//---------------------------------------------------
			gate = line.substr(line.find("=") + 1, line.find("(") - line.find("=") - 1);
			//---------------------------------------------------
			string Ginputs = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1); // gate inputs
			//---------------------------------------------------
			int inputs = (int)(count(begin(Ginputs), end(Ginputs), ',' ) + 1);
			vector<string> parent(inputs);
			//--------------------------------------------------
			child_indx = verify(child, node, node_counter);
			edge[child_indx] = new int[inputs];
			//---------------------------------------------------
			if (inputs == 1)
			{
				parent[0] = Ginputs;
				parent[0].erase(remove_if(parent[0].begin(), parent[0].end(), isspace), parent[0].end());
				cells.insert(make_pair(parent, child));
				//-------------------------------------------
				for (int i = 0; i < parent.size(); i++)
				{
					if (verify(parent[i], node, node_counter) == -1)
					{
						node[node_counter] = parent[i];
						G.nodes[node_counter] = node[node_counter];
						node_counter++;
					}
					inputs--;
					edge[child_indx][inputs] = verify(parent[i], node, node_counter + 1);
					G.Add_Edge(edge[child_indx][inputs], child_indx, 1.5);
				}
				//-------------------------------------------
				
				
			}
			if (inputs == 2)
			{
				parent[0] = Ginputs.substr(0, Ginputs.find(","));
				parent[0].erase(remove_if(parent[0].begin(), parent[0].end(), isspace), parent[0].end());
				Ginputs = Ginputs.substr(Ginputs.find(",") + 2, Ginputs.length());
				parent[1] = Ginputs.substr(0, Ginputs.length());
				parent[1].erase(remove_if(parent[1].begin(), parent[1].end(), isspace), parent[1].end());
				cells.insert(make_pair(parent, child));
				//-------------------------------------------
				for (int i = 0; i < parent.size(); i++) {
					if (verify(parent[i], node, node_counter) == -1)
					{
						node[node_counter] = parent[i];
						G.nodes[node_counter] = node[node_counter];
						node_counter++;
					}
					inputs--;
					edge[child_indx][inputs] = verify(parent[i], node, node_counter + 1);
					G.Add_Edge(edge[child_indx][inputs], child_indx, 1.5);
				}
				//-------------------------------------------
			}
			
			
		
		}
	}
	G.Draw_Graph();

	char z;
	cin >> z;
	return 0;
}

//--------------------------------------------------------------------
int verify(string node_name, vector<string> arr, int num)
{
	for (int i = 0; i < num; i++)
	{
		if (arr[i] == node_name)
			return i;
	}
	return -1;
}
//-------------------------------------------------------------------- 


It works fine. But the problem is it takes a really long time for large circuits(file) (i.e. with 20k gates or higher). I tested the largest file with release mode in VS2015 in several hours!!

So could you please suggest any ways to reduce the runtime?

Any comments will be appreciated. Thank you
Last edited on
The way to find performance problems is to profile your code. To get help here, post a program that can compile and some sample input.

I'm going to bet that the problem is verify(), which does a linear search in a possibly large vector AND it passes the vector and search string by value which requires copying them.

To fix this, I'd add a map<string, NodeId> to the graph. This lets you quickly lookup a node by name. The "NodeId" here is something that lets you find a node very quickly (a pointer to the node, index into a node array, or whatever). This map replaces your verify() function.
Thank you for your fast reply. I will add the link to input files and the code.

smallest file:
https://drive.google.com/open?id=1ITbsf93MkTNfRlp9AwLQ1jdfY2VbC_wC

largest file:
https://drive.google.com/open?id=1GrkC7DBb5pUYA6BaQOQEOIOeQd688qnW

main.cpp:
https://drive.google.com/open?id=1mz86uvdcRy4ERtJHUsl0gDByu4E0LFqE

yes i agree. i think it is the verify(). Can we just add nodes and edges to construct the graph without checking (without verify())? as long as the nodes has the same name.
I'm not sure of all the details you need, but here's a version that constructs and prints the graph for b18.bench in about 650ms on my computer.

The nodes are stored in vector Graph::nodes. Similarly, edges are stored in vector Graph::edges.

Each node has a name, a vector of edges in and a vector of edges out. These vectors contain indexes into Graph::edges.

Each edge contains node it comes from, the node it goes to and the weight. The "from" and "to" nodes are stored as the indexes into Graph::nodes.

Graph also contains a map<string,int> that maps node names to their indexes.

With this structure, you can quickly find a node from it's name, and once you have a node you can find its edges. Also if you have an edge, you can find the nodes that it connects.

I suspect that much of the data you're storing, like fanIn and fanOut, isn't needed. Those are just the edges in and edges out of the graph.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <algorithm>
#include <cctype>
using namespace std;
struct Edge
{
    unsigned from, to;		// index of nodes in this edge
    double weight;
    Edge(unsigned f, unsigned t, double w): from(f), to(t), weight(w)
    {}
};


// Class to represent a graph
class Graph {
public:
    struct Node {
	std::string name;
	std::vector < unsigned >in, out;	// Index of edges leading in and out of this node
    };

    // A vector to store the nodes. Nodes are refered to by their index into this array
    std::vector < Node > nodes;

    // Map node name to index
    std::map < std::string, int >node_names;

    // Find a node by name. Add it if not already present
    int findOrAdd(const string & name);

    // A vector to store the edges. Edges are referred to by their index into this array
    std::vector < Edge > edges;
    void Add_Edge(unsigned u, unsigned v, double weight);

    void Draw();
};

// Add an edge from u to v with weight
void
Graph::Add_Edge(unsigned u, unsigned v, double weight)
{
    if (u >= nodes.size() || v >= nodes.size()) {
	cout << "ERROR: attempted to add edge " << u << " -> " << v << " but only " << nodes.size() << " nodes.\n";
	return;
    }
    Edge e(u, v, weight);
    unsigned idx = edges.size();
    edges.push_back(e);
    nodes[u].out.push_back(idx);
    nodes[v].in.push_back(idx);
    // cout << "Added edge from " << nodes[u].name << " to " << nodes[v].name << '\n';
}

void
Graph::Draw()
{
    cout << " \n\nCircuit Graph:\n\n";


    for (unsigned i = 0; i < nodes.size(); i++) { // for each node
	cout << "\tV(" << nodes[i].name << ") -> {  ";
	for (unsigned edgeIdx : nodes[i].out) { // for each edge
	    cout << nodes[edges[edgeIdx].to].name << ' ';
	}
	cout << " }\n";
    }
}

// Like isspace, but takes a char. Needed by replace_if below
static bool
myIsSpace(char ch)
{
    return std::isspace(ch);
}

int
main()
{
    Graph G;
    ifstream fin("bench/c17.bench");
    string line, word, token, gate, parents, child;
    vector < string > PIs, POs, GATEs, FANINs;
    int nodes;
    int pi, po;
    int edges;
    while (getline(fin, line)) {

	//-----------------------------------------------------------------------
	if (line.find("nodes") != string::npos)
	    nodes = stoi(line.substr(line.find("s") + 1, line.length()));

	//-----------------------------------------------------------------------
	if (line.find("edges") != string::npos)
	    edges = stoi(line.substr(line.find("s") + 1, line.length()));

	//-----------------------------------------------------------------------
	if (line.find("input") != string::npos)
	    pi = stoi(line.substr(line.find_last_of(".") + 1, line.length()));

	//-----------------------------------------------------------------------
	if (line.find("output") != string::npos)
	    po = stoi(line.substr(line.find_last_of(".") + 1, line.length()));

	//-----------------------------------------------------------------------
	if (line.find("INPUT") != string::npos) {
	    string Pin = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1);
	    PIs.push_back(Pin);
	    G.findOrAdd(Pin);
	}
	if (line.find("OUTPUT") != string::npos) {
	    string Pout = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1);
	    POs.push_back(Pout);
	    G.findOrAdd(Pout);
	}
	if (line.find("=") != string::npos) {
	    // cout << edges++ << endl;
	    child = line.substr(0, line.find("=") - 1);
	    child.erase(remove_if(child.begin(), child.end(), myIsSpace), child.end());
	    unsigned childIdx = G.findOrAdd(child);
	    
	    //-----------------------------------------------------------------------
	    //-----------------------------------------------------------------------
	    gate = line.substr(line.find("=") + 1, line.find("(") - line.find("=") - 1);

	    //-----------------------------------------------------------------------
	    string Ginputs = line.substr(line.find("(") + 1, line.find(")") - line.find("(") - 1);	// gate inputs
	    //-----------------------------------------------------------------------
	    while (Ginputs.size()) {
		string parent;
		size_t pos = Ginputs.find(',');
		if (pos == string::npos) {
		    parent = Ginputs;
		    Ginputs.clear();
		} else {
		    parent = Ginputs.substr(0, pos);
		    Ginputs.erase(0,pos+1);
		}
		parent.erase(remove_if(parent.begin(), parent.end(), myIsSpace), parent.end());
		unsigned parentIdx = G.findOrAdd(parent);
		G.Add_Edge(parentIdx, childIdx, 1.5);
	    }
	}
    }

    G.Draw();
    return 0;
}

//--------------------------------------------------------------------
int
Graph::findOrAdd(const string & name)
{
    auto pos = node_names.find(name);
    if (pos == node_names.end()) {
	Node n;
	n.name = name;
	node_names[name] = nodes.size();
	nodes.push_back(n);
	return nodes.size() - 1;
    } else {
	return pos->second;
    }
}

//-------------------------------------------------------------------- 

It works really well...
Thank you dear dhayden.

I need a function inside the graph class to return the weight of specific edges, how can i do that? (i.e. double Weight(unsigned from, unsigned to) return Edge(from, to).weight;)

Could you please guide me with one other problem? what if we want to find all common parents of two or more nodes inside the graph -namely the gates inputs- if they share any nodes in prior levels of the graph? how can we use this graph structure to find these nodes?
Last edited on
I need a function inside the graph class to return the weight of specific edges, how can i do that? (i.e. double Weight(unsigned from, unsigned to) return Edge(from, to).weight;)
Create a function that returns a pointer to an edge. Once you have that, getting the weight is trivial. This is untested but I think it will work:
1
2
3
4
5
6
7
8
9
10
Edge *
Graph::findEdge(unsigned from, unsigned to)
{
    for (unsigned idx : nodes[from].out) {  // For each edge leading OUT of node "from"
        if (edges[idx].to == to) {    // Does edge lead TO "to"?
            return &edges[idx];
        }
    }
    return nullptr;
}


what if we want to find all common parents of two or more nodes inside the graph -namely the gates inputs- if they share any nodes in prior levels of the graph? how can we use this graph structure to find these nodes?

- Add some flags to class Node. This could be a bitset<> or just an unsigned if you're comfortable doing the bit manipulations yourself.
- Add a clearFlags() method to clear the flags of all nodes.
- Add a recursive method to set a flag on all ancestors of a given node:
void Graph::flagAncestors(Node &node; unsigned bit);

Now to find the common ancestors of two nodes:
1
2
3
4
5
6
clearFlags();
flagAncestors(node1, bit1);
flagAncestors(node2, bit2);
for each node in the graph {
    if (bit1 is set and bit2 is set) then it's a common ancestor
} 

Thank you again.

1
2
    nodes[u].out.push_back(idx);
    nodes[v].in.push_back(idx);

in the code above instead of passing "idx" we should passing the other nodes? right? (why idx?)
1
2
    nodes[u].out.push_back(v);
    nodes[v].in.push_back(u);

because i did a topological sorting on the graph and it gives the correct answer using latter statements.


Add some flags to class Node. This could be a bitset<> or just an unsigned if you're comfortable doing the bit manipulations yourself.
- Add a clearFlags() method to clear the flags of all nodes.
- Add a recursive method to set a flag on all ancestors of a given node:
void Graph::flagAncestors(Node &node; unsigned bit);

Thank you for your comments...although i did not completely understand the flow...I'll try to implement it. could you please post a related link if there is?
Last edited on
in the code above instead of passing "idx" we should passing the other nodes? right? (why idx?)

See struct Node:
1
2
3
4
    struct Node {
        std::string name;
        std::vector < unsigned >in, out;        // Index of edges leading in and out of this node
    };


So I'm storing the index of the edges themselves, not the nodes. This way you can easily get any edge info, like the weight.

Really, it would be better to have the nodes store a vector of pointers to the edges and the edges store pointers to the nodes. The only tricky part there is that the nodes and edges vectors, which own the data within them, would have to store pointers to heap-allocated nodes and edges. Otherwise the nodes and edges would move around when the vectors resized. Since nodes and edges would own these pointers, they could be unique_ptr<>'s if you don't want to manage them yourself.

The advantage of storing pointers is that it's easier to access them since you don't have to go through the nodes or edges vectors. Perhaps more importantly, it would be much clearer what those values are. As you've seen, having them be indexes means it's easy to mistake what that index is.

I can't convert the code to use pointers if you want to see what I mean.
Just one last thing.

For topological sorting my code is:
1
2
3
4
5
6
7
8
9
10
11
12
void Graph::TopoSort(unsigned v, bool visited[], std::stack<int>& Stack)
{

	visited[v] = true;
	//-----------------------------------------------------
	for (unsigned edgeIdx : nodes[v].out) {
		if (!visited[edgeIdx])
			TopoSort(edgeIdx, visited, Stack);
	}
	//------------------------------------------------------
	Stack.push(v);
}


output for c17:
9 10 3 2 1 4 7 8 6 5 0


How can i make it compatible with with your code?
You've mixed up edges and nodes again.
1
2
3
4
5
6
7
8
9
10
11
12
13
void Graph::TopoSort(unsigned v, bool visited[], std::stack<int>& Stack)
{

	visited[v] = true;
	//-----------------------------------------------------
	for (unsigned edgeIdx : nodes[v].out) {
		unsigned nodeIdx = edges[edgeIdx].to;
		if (!visited[nodes[nodeIdx]])
			TopoSort(nodeIdx, visited, Stack);
	}
	//------------------------------------------------------
	Stack.push(v);
}
Thank you so much.
There was a little spelling error below.
if (!visited[nodeIdx])

Thanks for your help.
Good catch. Sorry about that.
Topic archived. No new replies allowed.