1
1
#include < bits/stdc++.h>
2
2
#include < algorithm>
3
3
using namespace std ;
4
- string str;
5
- bool isSame (int a1, int b1)
4
+
5
+ bool isSame (const string &str, int a1, int b1)
6
6
{
7
7
if ((str[a1]==str[b1]) && (str[a1+1 ]==str[b1+1 ]) && (str[a1+2 ]==str[b1+2 ]))
8
8
return true ;
9
9
else
10
10
return false ;
11
11
}
12
- vector<int > DC3 (string str, bool isAlpha=0 )
12
+ vector<int > DC3 (string str, bool isAlpha=false )
13
13
{
14
14
int l = str.size ();
15
15
int token = isAlpha?' $' :' 0' ;
16
16
int offset = isAlpha?' a' :' 0' ;
17
17
18
18
// Modiy string to append $
19
- if ((l-1 ) % 3 == 2 )
20
- str.append (2 , token);
21
- else if ( ((l-1 ) % 3 == 1 ) || ((l-1 ) % 3 == 0 ))
22
- str.append (3 , token);
19
+ str.append (3 , token);
23
20
24
21
cout <<" Modified String is : " << str<<" \n " ;
25
22
// Step 1:
26
23
// Generate SA12
27
24
28
- // Get 1,2 mod 3 position
29
- vector<int > pos ;
25
+ // Get 1,2 mod 3 position Suffix Array
26
+ vector<int > SA12 ;
30
27
for (int i=0 ; i<str.size ()-2 ; i++)
31
28
if (i%3 )
32
- pos .push_back (i);
29
+ SA12 .push_back (i);
33
30
31
+ // Radix sort
34
32
for (int i = 1 ; i<= 3 ; i++)
35
33
{
36
34
vector<queue<int >> bucket (27 ); // '$' occupy 0th index
37
- for (int j=0 ; j< pos .size (); j++)
38
- if (str[pos [j] + 3 -i]==token)
39
- bucket[0 ].push (pos [j]);
35
+ for (int j=0 ; j< SA12 .size (); j++)
36
+ if (str[SA12 [j] + 3 -i]==token)
37
+ bucket[0 ].push (SA12 [j]);
40
38
else
41
- bucket[str[pos [j] + 3 -i]-offset + 1 ].push (pos [j]); // +1 is done to accomodate '$' on index 0 bucket
39
+ bucket[str[SA12 [j] + 3 -i]-offset + 1 ].push (SA12 [j]); // +1 is done to accomodate '$' on index 0 bucket
42
40
43
- pos .clear ();
41
+ SA12 .clear ();
44
42
for (int i =0 ; i< 27 ; i++)
45
43
if (bucket[i].size ())
46
44
while (!bucket[i].empty ())
47
45
{
48
- pos .push_back (bucket[i].front ());
46
+ SA12 .push_back (bucket[i].front ());
49
47
bucket[i].pop ();
50
48
}
51
49
}
52
50
// Calculate Rank
53
- vector<int > rank (pos .size (), 1 );
51
+ vector<int > rank (SA12 .size (), 1 );
54
52
int next_rank =1 ;
55
- vector <int > i2r (str. size ()- 2 , - 1 ) ; // Help in egenarting reverse mapping index i is which rank suffix
56
- i2r[pos [0 ]] = next_rank;
57
- for (int i=1 ; i< pos .size (); i++)
53
+ map <int , int > i2r; // Help in regenarting reverse mapping index i is which rank suffix
54
+ i2r[SA12 [0 ]] = next_rank;
55
+ for (int i=1 ; i< SA12 .size (); i++)
58
56
{
59
- if (!isSame (pos [i],pos [i-1 ]))
57
+ if (!isSame (str, SA12 [i],SA12 [i-1 ]))
60
58
++next_rank;
61
- i2r[pos [i]] = next_rank;
59
+ i2r[SA12 [i]] = next_rank;
62
60
rank[i] = next_rank;
63
61
}
64
62
65
- // pos is SA12
66
- if (next_rank!=pos .size ())
63
+ // Are all rank unique ? if not we have to recurvisely sort rank string
64
+ if (next_rank!=SA12 .size ())
67
65
{
68
- string q (pos .size (), ' 0' );
66
+ string q (SA12 .size (), ' 0' );
69
67
// Create a string which has rank of [1 mod 3 || 2 mod 3]
70
68
// For example [1,4,7,10]||[2, 5, 8, 11] , rank of these indices, form a string
71
- for (int i=0 ; i< pos .size (); i++)
69
+ for (int i=0 ; i< SA12 .size (); i++)
72
70
{
73
71
int j =0 ;
74
- if ((pos[i] % 3 )==2 )// if its 2 mod 3 , it falls in 2nd half hence offset ;
75
- j = pos.size ()/2 ;
76
- int index = j + (pos[i]/3 );
72
+ if ((SA12[i] % 3 )==2 )// if its 2 mod 3 , it falls in 2nd half hence offset ;
73
+ {
74
+ j = SA12.size ()/2 ;
75
+ if (SA12.size ()&1 )// 147||25 case
76
+ ++j;
77
+ }
78
+ int index = j + (SA12[i]/3 );
77
79
q[index] = ' 0' + rank[i];
78
80
}
79
- vector<int > SA12 = DC3 (q);
80
- int t[pos.size ()];
81
- for (int i=0 ; i < pos.size ()/2 ; i++)
81
+ vector<int > t_SA12 = DC3 (q);// Recursively find SA
82
+ int t[SA12.size ()];
83
+ int i;
84
+ int other_half = (SA12.size ()/2 );
85
+ if (SA12.size ()&1 )// 147||25 case
86
+ ++other_half;
87
+
88
+ for (i=0 ; i < SA12.size ()/2 ; i++)
82
89
{
83
- t[i] = 1 +(3 *i);
84
- t[i+(pos. size ()/ 2 ) ] = 2 +(3 *i);
90
+ t[i] = 1 +(3 *i); // 1 mod n multiple
91
+ t[i+other_half ] = 2 +(3 *i); // 2 mod n multiple
85
92
}
86
- pos.clear ();
87
- for (int i =1 ; i < SA12.size (); i++)
88
- pos.push_back (t[SA12[i]]);
89
-
93
+ if (SA12.size ()&1 )// 147||25 case
94
+ t[i] = 1 +(3 *i);
95
+ SA12.clear ();
96
+ for (int i =0 ; i < t_SA12.size (); i++)
97
+ if (t_SA12[i]<q.size ())
98
+ SA12.push_back (t[t_SA12[i]]);
90
99
}
100
+ // i2r get updated after ranks are resolved.
101
+ for (int i =0 ; i<SA12.size (); i++)
102
+ i2r[SA12[i]] = i+1 ;
103
+ // For sentiel data i.e. $ or 0 make their rank lowest
104
+ for (int i= l ; i< str.size (); i++)
105
+ i2r[i] = 0 ;
106
+
91
107
92
108
// Step 2:
93
109
// Generate SA0 using SA12
94
110
vector<string> s0;
95
111
map<string, int > m;
96
- for (int i=0 ; i< str. size ()- 2 ; i+=3 )
112
+ for (int i=0 ; i< l ; i+=3 )
97
113
{
98
114
string t; t += str[i];
99
115
s0.push_back (t);
100
116
s0.back () += i2r[i+1 ]+' 0' ;
101
117
m[s0.back ()] = i;
102
118
}
103
- vector<int > newPos ;
119
+ vector<int > SA0 ;
104
120
// Radix sort s0
105
121
for (int i = 0 ; i < 2 ; ++i)
106
122
{
107
- newPos .clear ();
123
+ SA0 .clear ();
108
124
vector<queue<int >> bucket (27 );
109
125
// bucket.assign(bucket.capacity(), -1);
110
126
@@ -124,86 +140,83 @@ vector<int> DC3(string str, bool isAlpha=0)
124
140
if (bucket[i].size ())
125
141
while (!bucket[i].empty ())
126
142
{
127
- newPos .push_back (bucket[i].front ());
143
+ SA0 .push_back (bucket[i].front ());
128
144
bucket[i].pop ();
129
145
}
130
146
vector<string> t;
131
- for (int i =0 ; i<newPos .size (); i++)
132
- t.push_back (s0[newPos [i]]);
147
+ for (int i =0 ; i<SA0 .size (); i++)
148
+ t.push_back (s0[SA0 [i]]);
133
149
s0 = t;
134
150
}
135
- // newPos is SA0
136
- if (isAlpha)
137
- for (int i =0 ; i< newPos.size (); ++i)
138
- newPos[i] = m[s0[i]];
139
- else
140
- // newPos stores index in SA0 string, to get index in actual string jyst multiply by 3
141
- for (int i =0 ; i< newPos.size (); ++i)
142
- newPos[i] *= 3 ;
143
-
144
-
151
+ for (int i =0 ; i< SA0.size (); ++i)
152
+ SA0[i] = m[s0[i]];
153
+
145
154
// Step 3:
146
155
// Merge SA12 & SA0 using below rules
147
- // Rule 1:
148
156
vector<int > SA;
149
- int posIndex =0 , newPosIndex =0 ;
157
+ int t_SA12Index =0 , t_SA0Index =0 ;
150
158
while (1 )
151
159
{
152
- int t_SA12Index = posIndex;
153
- int t_SA0Index = newPosIndex;
154
160
{
155
- if (str[pos [t_SA12Index]] < str[newPos [t_SA0Index]])
156
- {SA.push_back (pos [t_SA12Index]);++posIndex ;}
157
- else if (str[pos [t_SA12Index]] > str[newPos [t_SA0Index]])
158
- {SA.push_back (newPos [t_SA0Index]);++newPosIndex ;}
161
+ if (str[SA12 [t_SA12Index]] < str[SA0 [t_SA0Index]])
162
+ {SA.push_back (SA12 [t_SA12Index]);++t_SA12Index ;}
163
+ else if (str[SA12 [t_SA12Index]] > str[SA0 [t_SA0Index]])
164
+ {SA.push_back (SA0 [t_SA0Index]);++t_SA0Index ;}
159
165
else // they are equal
160
166
{
161
- if ((pos[posIndex ]%3 ) == 1 ) // B0 vs B1 case
167
+ if ((SA12[t_SA12Index ]%3 ) == 1 ) // B0 vs B1 case
162
168
{
163
- // check the rank at b0+1 b1+1
164
- if (i2r [newPos[newPosIndex ]+1 ] < i2r[pos[posIndex ]+1 ])
165
- {SA.push_back (newPos [t_SA0Index]);++newPosIndex ;}
169
+ // check the rank at b0+1 b1+1 because index b0+1 & b1+1 falls in [1,2 mod n]
170
+ if (i2r [SA0[t_SA0Index ]+1 ] < i2r[SA12[t_SA12Index ]+1 ])
171
+ {SA.push_back (SA0 [t_SA0Index]);++t_SA0Index ;}
166
172
else
167
- {SA.push_back (pos [t_SA12Index]);++posIndex ;}
173
+ {SA.push_back (SA12 [t_SA12Index]);++t_SA12Index ;}
168
174
}
169
- else if ((pos[posIndex ]%3 ) == 2 ) // B0 vs B2 case
175
+ else if ((SA12[t_SA12Index ]%3 ) == 2 ) // B0 vs B2 case
170
176
{
171
- // First check the letter at b0+1 b2+1
172
- if (str[pos [t_SA12Index]+1 ] < str[newPos [t_SA0Index]+1 ])
173
- {SA.push_back (pos [t_SA12Index]);++posIndex ;}
174
- else if (str[pos [t_SA12Index]+1 ] > str[newPos [t_SA0Index]+1 ])
175
- {SA.push_back (newPos [t_SA0Index]);++newPosIndex ;}
177
+ // b0+1 falls in [1,2 mod n] but b2+1 falls in 0 modn , hence first check the letter
178
+ if (str[SA12 [t_SA12Index]+1 ] < str[SA0 [t_SA0Index]+1 ])
179
+ {SA.push_back (SA12 [t_SA12Index]);++t_SA12Index ;}
180
+ else if (str[SA12 [t_SA12Index]+1 ] > str[SA0 [t_SA0Index]+1 ])
181
+ {SA.push_back (SA0 [t_SA0Index]);++t_SA0Index ;}
176
182
else
177
183
{
178
- if (i2r [newPos[newPosIndex]+2 ] < i2r[pos[posIndex]+2 ])
179
- {SA.push_back (newPos[t_SA0Index]);++newPosIndex;}
184
+ // If still they are equal
185
+ // check the rank at b0+1 b1+1 because index b0+2(1) & b2+2(1) falls in [1,2 mod n]
186
+ if (i2r [SA0[t_SA0Index]+2 ] < i2r[SA12[t_SA12Index]+2 ])
187
+ {SA.push_back (SA0[t_SA0Index]);++t_SA0Index;}
180
188
else
181
- {SA.push_back (pos [t_SA12Index]);++posIndex ;}
189
+ {SA.push_back (SA12 [t_SA12Index]);++t_SA12Index ;}
182
190
}
183
191
}
184
192
}
185
193
}
186
- if (newPosIndex==newPos.size ())
194
+ // One of the array will exhaust first, then copy all of the remaining
195
+ if (t_SA0Index==SA0.size ())
187
196
{
188
197
// Copy SA12 left over directly
189
- for (int i = posIndex ; i< pos .size (); i++)
190
- SA.push_back (pos [i]);
198
+ for (int i = t_SA12Index ; i< SA12 .size (); i++)
199
+ SA.push_back (SA12 [i]);
191
200
break ;
192
201
}
193
- else if (posIndex==pos .size ())
202
+ else if (t_SA12Index==SA12 .size ())
194
203
{
195
204
// Copy SA0 left over directly
196
- for (int i = newPosIndex ; i< newPos .size (); i++)
197
- SA.push_back (newPos [i]);
205
+ for (int i = t_SA0Index ; i< SA0 .size (); i++)
206
+ SA.push_back (SA0 [i]);
198
207
break ;
199
208
}
200
209
}
201
210
return SA;
202
211
}
203
212
int main (int argc, char *argv[])
204
213
{
214
+ string str;
205
215
cin >> str;
206
- str.append (" $" );
207
- DC3 (str, 1 );
216
+ vector<int > SA = DC3 (str, true );
217
+ for (int i=0 ; i< SA.size (); i++)
218
+ if (SA[i]< str.size ())
219
+ cout << SA[i]<<" : " << str.substr (SA[i])<<" \n " ;
220
+ // cout << SA[i]<<"\n";
208
221
return 0 ;
209
222
}
0 commit comments