Skip to content

Commit c54c36e

Browse files
authored
Bug Fixes
Now works fine almost all test cases. More testing to be done.
1 parent d81184d commit c54c36e

File tree

1 file changed

+97
-84
lines changed

1 file changed

+97
-84
lines changed

dc3.cpp

Lines changed: 97 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,126 @@
11
#include <bits/stdc++.h>
22
#include <algorithm>
33
using namespace std;
4-
string str;
5-
bool isSame(int a1, int b1)
4+
5+
bool isSame(const string &str, int a1, int b1)
66
{
77
if ((str[a1]==str[b1]) && (str[a1+1]==str[b1+1]) && (str[a1+2]==str[b1+2]))
88
return true;
99
else
1010
return false;
1111
}
12-
vector<int> DC3(string str, bool isAlpha=0)
12+
vector<int> DC3(string str, bool isAlpha=false)
1313
{
1414
int l = str.size();
1515
int token = isAlpha?'$':'0';
1616
int offset = isAlpha?'a':'0';
1717

1818
// Modiy string to append $
19-
if((l-1) % 3 == 2)
20-
str.append(2, token);
21-
else if( ((l-1) % 3 == 1) || ((l-1) % 3 == 0))
22-
str.append(3, token);
19+
str.append(3, token);
2320

2421
cout <<"Modified String is : "<< str<<"\n";
2522
// Step 1:
2623
// Generate SA12
2724

28-
// Get 1,2 mod 3 position
29-
vector<int> pos;
25+
// Get 1,2 mod 3 position Suffix Array
26+
vector<int> SA12;
3027
for (int i=0 ; i<str.size()-2; i++)
3128
if(i%3)
32-
pos.push_back(i);
29+
SA12.push_back(i);
3330

31+
//Radix sort
3432
for (int i = 1; i<= 3; i++)
3533
{
3634
vector<queue<int>> bucket(27); // '$' occupy 0th index
37-
for (int j=0; j< pos.size(); j++)
38-
if(str[pos[j] + 3 -i]==token)
39-
bucket[0].push(pos[j]);
35+
for (int j=0; j< SA12.size(); j++)
36+
if(str[SA12[j] + 3 -i]==token)
37+
bucket[0].push(SA12[j]);
4038
else
41-
bucket[str[pos[j] + 3 -i]-offset + 1 ].push(pos[j]); // +1 is done to accomodate '$' on index 0 bucket
39+
bucket[str[SA12[j] + 3 -i]-offset + 1 ].push(SA12[j]); // +1 is done to accomodate '$' on index 0 bucket
4240

43-
pos.clear();
41+
SA12.clear();
4442
for (int i =0; i< 27; i++)
4543
if(bucket[i].size())
4644
while(!bucket[i].empty())
4745
{
48-
pos.push_back(bucket[i].front());
46+
SA12.push_back(bucket[i].front());
4947
bucket[i].pop();
5048
}
5149
}
5250
//Calculate Rank
53-
vector<int> rank(pos.size(), 1);
51+
vector<int> rank(SA12.size(), 1);
5452
int next_rank =1 ;
55-
vector<int> i2r(str.size()-2, -1); // Help in egenarting reverse mapping index i is which rank suffix
56-
i2r[pos[0]] = next_rank;
57-
for (int i=1; i< pos.size(); i++)
53+
map<int, int> i2r; // Help in regenarting reverse mapping index i is which rank suffix
54+
i2r[SA12[0]] = next_rank;
55+
for (int i=1; i< SA12.size(); i++)
5856
{
59-
if(!isSame(pos[i],pos[i-1]))
57+
if(!isSame(str, SA12[i],SA12[i-1]))
6058
++next_rank;
61-
i2r[pos[i]] = next_rank;
59+
i2r[SA12[i]] = next_rank;
6260
rank[i] = next_rank;
6361
}
6462

65-
// pos is SA12
66-
if(next_rank!=pos.size())
63+
// Are all rank unique ? if not we have to recurvisely sort rank string
64+
if(next_rank!=SA12.size())
6765
{
68-
string q(pos.size(), '0');
66+
string q(SA12.size(), '0');
6967
// Create a string which has rank of [1 mod 3 || 2 mod 3]
7068
// For example [1,4,7,10]||[2, 5, 8, 11] , rank of these indices, form a string
71-
for (int i=0; i< pos.size(); i++)
69+
for (int i=0; i< SA12.size(); i++)
7270
{
7371
int j =0;
74-
if((pos[i] % 3 )==2)// if its 2 mod 3 , it falls in 2nd half hence offset ;
75-
j = pos.size()/2;
76-
int index = j + (pos[i]/3);
72+
if((SA12[i] % 3 )==2)// if its 2 mod 3 , it falls in 2nd half hence offset ;
73+
{
74+
j = SA12.size()/2;
75+
if(SA12.size()&1)//147||25 case
76+
++j;
77+
}
78+
int index = j + (SA12[i]/3);
7779
q[index] = '0' + rank[i];
7880
}
79-
vector<int> SA12 = DC3(q);
80-
int t[pos.size()];
81-
for (int i=0 ; i < pos.size()/2; i++)
81+
vector<int> t_SA12 = DC3(q);//Recursively find SA
82+
int t[SA12.size()];
83+
int i;
84+
int other_half = (SA12.size()/2);
85+
if(SA12.size()&1)//147||25 case
86+
++other_half;
87+
88+
for (i=0 ; i < SA12.size()/2; i++)
8289
{
83-
t[i] = 1 +(3*i);
84-
t[i+(pos.size()/2)] = 2 +(3*i);
90+
t[i] = 1 +(3*i); // 1 mod n multiple
91+
t[i+other_half] = 2 +(3*i); // 2 mod n multiple
8592
}
86-
pos.clear();
87-
for (int i =1; i < SA12.size(); i++)
88-
pos.push_back(t[SA12[i]]);
89-
93+
if(SA12.size()&1)//147||25 case
94+
t[i] = 1 +(3*i);
95+
SA12.clear();
96+
for (int i =0; i < t_SA12.size(); i++)
97+
if(t_SA12[i]<q.size())
98+
SA12.push_back(t[t_SA12[i]]);
9099
}
100+
//i2r get updated after ranks are resolved.
101+
for (int i =0; i<SA12.size(); i++)
102+
i2r[SA12[i]] = i+1;
103+
//For sentiel data i.e. $ or 0 make their rank lowest
104+
for (int i= l ; i< str.size(); i++)
105+
i2r[i] = 0;
106+
91107

92108
// Step 2:
93109
// Generate SA0 using SA12
94110
vector<string> s0;
95111
map<string, int> m;
96-
for (int i=0; i< str.size()-2 ; i+=3)
112+
for (int i=0; i< l; i+=3)
97113
{
98114
string t; t += str[i];
99115
s0.push_back(t);
100116
s0.back() += i2r[i+1]+'0';
101117
m[s0.back()] = i;
102118
}
103-
vector<int> newPos;
119+
vector<int> SA0;
104120
//Radix sort s0
105121
for (int i = 0; i < 2; ++i)
106122
{
107-
newPos.clear();
123+
SA0.clear();
108124
vector<queue<int>> bucket(27);
109125
//bucket.assign(bucket.capacity(), -1);
110126

@@ -124,86 +140,83 @@ vector<int> DC3(string str, bool isAlpha=0)
124140
if(bucket[i].size())
125141
while(!bucket[i].empty())
126142
{
127-
newPos.push_back(bucket[i].front());
143+
SA0.push_back(bucket[i].front());
128144
bucket[i].pop();
129145
}
130146
vector<string> t;
131-
for (int i =0; i<newPos.size(); i++)
132-
t.push_back(s0[newPos[i]]);
147+
for (int i =0; i<SA0.size(); i++)
148+
t.push_back(s0[SA0[i]]);
133149
s0 = t;
134150
}
135-
//newPos is SA0
136-
if(isAlpha)
137-
for(int i =0 ; i< newPos.size(); ++i)
138-
newPos[i] = m[s0[i]];
139-
else
140-
// newPos stores index in SA0 string, to get index in actual string jyst multiply by 3
141-
for(int i =0 ; i< newPos.size(); ++i)
142-
newPos[i] *= 3;
143-
144-
151+
for(int i =0 ; i< SA0.size(); ++i)
152+
SA0[i] = m[s0[i]];
153+
145154
// Step 3:
146155
// Merge SA12 & SA0 using below rules
147-
// Rule 1:
148156
vector<int> SA;
149-
int posIndex =0, newPosIndex =0;
157+
int t_SA12Index =0, t_SA0Index =0;
150158
while(1)
151159
{
152-
int t_SA12Index = posIndex;
153-
int t_SA0Index = newPosIndex;
154160
{
155-
if(str[pos[t_SA12Index]] < str[newPos[t_SA0Index]])
156-
{SA.push_back(pos[t_SA12Index]);++posIndex;}
157-
else if(str[pos[t_SA12Index]] > str[newPos[t_SA0Index]])
158-
{SA.push_back(newPos[t_SA0Index]);++newPosIndex;}
161+
if(str[SA12[t_SA12Index]] < str[SA0[t_SA0Index]])
162+
{SA.push_back(SA12[t_SA12Index]);++t_SA12Index;}
163+
else if(str[SA12[t_SA12Index]] > str[SA0[t_SA0Index]])
164+
{SA.push_back(SA0[t_SA0Index]);++t_SA0Index;}
159165
else // they are equal
160166
{
161-
if((pos[posIndex]%3) == 1) //B0 vs B1 case
167+
if((SA12[t_SA12Index]%3) == 1) //B0 vs B1 case
162168
{
163-
// check the rank at b0+1 b1+1
164-
if(i2r [newPos[newPosIndex]+1] < i2r[pos[posIndex]+1])
165-
{SA.push_back(newPos[t_SA0Index]);++newPosIndex;}
169+
// check the rank at b0+1 b1+1 because index b0+1 & b1+1 falls in [1,2 mod n]
170+
if(i2r [SA0[t_SA0Index]+1] < i2r[SA12[t_SA12Index]+1])
171+
{SA.push_back(SA0[t_SA0Index]);++t_SA0Index;}
166172
else
167-
{SA.push_back(pos[t_SA12Index]);++posIndex;}
173+
{SA.push_back(SA12[t_SA12Index]);++t_SA12Index;}
168174
}
169-
else if((pos[posIndex]%3) == 2) //B0 vs B2 case
175+
else if((SA12[t_SA12Index]%3) == 2) //B0 vs B2 case
170176
{
171-
// First check the letter at b0+1 b2+1
172-
if(str[pos[t_SA12Index]+1] < str[newPos[t_SA0Index]+1])
173-
{SA.push_back(pos[t_SA12Index]);++posIndex;}
174-
else if(str[pos[t_SA12Index]+1] > str[newPos[t_SA0Index]+1])
175-
{SA.push_back(newPos[t_SA0Index]);++newPosIndex;}
177+
// b0+1 falls in [1,2 mod n] but b2+1 falls in 0 modn , hence first check the letter
178+
if(str[SA12[t_SA12Index]+1] < str[SA0[t_SA0Index]+1])
179+
{SA.push_back(SA12[t_SA12Index]);++t_SA12Index;}
180+
else if(str[SA12[t_SA12Index]+1] > str[SA0[t_SA0Index]+1])
181+
{SA.push_back(SA0[t_SA0Index]);++t_SA0Index;}
176182
else
177183
{
178-
if(i2r [newPos[newPosIndex]+2] < i2r[pos[posIndex]+2])
179-
{SA.push_back(newPos[t_SA0Index]);++newPosIndex;}
184+
// If still they are equal
185+
// check the rank at b0+1 b1+1 because index b0+2(1) & b2+2(1) falls in [1,2 mod n]
186+
if(i2r [SA0[t_SA0Index]+2] < i2r[SA12[t_SA12Index]+2])
187+
{SA.push_back(SA0[t_SA0Index]);++t_SA0Index;}
180188
else
181-
{SA.push_back(pos[t_SA12Index]);++posIndex;}
189+
{SA.push_back(SA12[t_SA12Index]);++t_SA12Index;}
182190
}
183191
}
184192
}
185193
}
186-
if(newPosIndex==newPos.size())
194+
//One of the array will exhaust first, then copy all of the remaining
195+
if(t_SA0Index==SA0.size())
187196
{
188197
//Copy SA12 left over directly
189-
for(int i = posIndex; i< pos.size(); i++)
190-
SA.push_back(pos[i]);
198+
for(int i = t_SA12Index; i< SA12.size(); i++)
199+
SA.push_back(SA12[i]);
191200
break;
192201
}
193-
else if(posIndex==pos.size())
202+
else if(t_SA12Index==SA12.size())
194203
{
195204
//Copy SA0 left over directly
196-
for(int i = newPosIndex; i< newPos.size(); i++)
197-
SA.push_back(newPos[i]);
205+
for(int i = t_SA0Index; i< SA0.size(); i++)
206+
SA.push_back(SA0[i]);
198207
break;
199208
}
200209
}
201210
return SA;
202211
}
203212
int main(int argc, char*argv[])
204213
{
214+
string str;
205215
cin >> str;
206-
str.append("$");
207-
DC3(str, 1);
216+
vector<int> SA = DC3(str, true);
217+
for(int i=0; i< SA.size(); i++)
218+
if(SA[i]< str.size())
219+
cout << SA[i]<<" : "<< str.substr(SA[i])<<"\n";
220+
//cout << SA[i]<<"\n";
208221
return 0;
209222
}

0 commit comments

Comments
 (0)