Skip to content

Commit 59aa74d

Browse files
committed
Merge branch 'master' of https://github.com/ZhangYou0122/algorithms into ZhangYou0122-master
Conflicts: Makefile
2 parents 09db9c7 + 5ce8ce9 commit 59aa74d

File tree

3 files changed

+649
-2
lines changed

3 files changed

+649
-2
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ PROGRAMS = m_based_demo \
6868
bubble_sort_demo \
6969
selection_sort_demo \
7070
8queue_demo \
71-
palindrome_demo \
72-
suffix_array_demo
71+
palindrome_demo \
72+
suffix_array_demo \
73+
suffix_tree_demo
7374

7475
all: $(PROGRAMS)
7576

include/suffix_tree.h

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
#include <string>
2+
//#include <tr1/unordered_map>
3+
#include <limits>
4+
#include <map>
5+
#include <vector>
6+
#include <iostream>
7+
#include <stdexcept>
8+
9+
using std::vector;
10+
using std::string;
11+
using std::map;
12+
using std::make_pair;
13+
using std::cout;
14+
using std::endl;
15+
using std::out_of_range;
16+
using std::ostream;
17+
//typedef tr1::unordered_map map;
18+
19+
// TODO: upgrade it to process trace. Rule: char-->elem string-->elem_list
20+
class SuffixTree
21+
{
22+
public:
23+
// active point is initialized as (root, None, 0), remainder initialized as 1
24+
SuffixTree(string str):test_str(str), pos(0), root(test_str), active_point(&root, 0, 0), remainder(0), ls() {}
25+
int construct(void);
26+
27+
// return -1 if no such sub exist, return the beginning postion of this substring in thr original string if it exist
28+
int search(string sub);
29+
30+
// return the length of the longest prefix of sub which can be matched in suffix tree
31+
template <class Iterator>
32+
Iterator inc_search(Iterator sub)
33+
{
34+
typedef typename Iterator::value_type T; // extract real type
35+
36+
Iterator result = sub;
37+
Node* node = &root;
38+
Edge* edge = NULL;
39+
int pos = 0; // the iter's pos at edge
40+
int edge_len = -1;
41+
bool flag = true;
42+
43+
44+
while (flag) {
45+
if (edge == NULL) {
46+
edge = node->find_edge(*result);
47+
if (edge == NULL) {
48+
flag = false;
49+
}
50+
else {
51+
result++;
52+
pos = 1; // the second element of the edge
53+
edge_len = edge->length();
54+
}
55+
}
56+
else {
57+
if (pos >= edge_len) {
58+
node = edge->endpoint;
59+
edge = NULL;
60+
edge_len = 0;
61+
}
62+
else {
63+
if (*result == (*edge)[pos]) {
64+
result++;
65+
pos++;
66+
}
67+
else
68+
flag = false;
69+
}
70+
}
71+
}
72+
73+
return result;
74+
}
75+
76+
int print_tree(void);
77+
private:
78+
string test_str;
79+
80+
struct Node;
81+
typedef struct Node Node;
82+
83+
struct Edge{
84+
// the begin and end pos of this edge, note that INT_MAX stands for #(the changing end pos of this entire string)
85+
int begin, end;
86+
// Is there a better way to find test_str?
87+
string& test_node_str;
88+
89+
Node * endpoint;
90+
91+
Edge(int b, int e, string& str):
92+
test_node_str(str)
93+
{
94+
begin = b;
95+
end = e;
96+
endpoint = NULL;
97+
//std::cout << "Edge initialized" << std::endl;
98+
}
99+
100+
void change_edge(int b, int e)
101+
{
102+
begin = b;
103+
end = e;
104+
}
105+
106+
int length(void)
107+
{
108+
109+
if (end > test_node_str.size())
110+
return test_node_str.size() - begin;
111+
else
112+
return end - begin + 1;
113+
}
114+
115+
// needed by map
116+
friend bool operator<(const Edge& me, const Edge& other)
117+
{
118+
return me.begin < other.begin;
119+
}
120+
121+
char operator[](int i)
122+
{
123+
i += begin;
124+
if (i > end)
125+
throw out_of_range("Edge [] out of range.");
126+
127+
return test_node_str[i];
128+
}
129+
130+
friend ostream& operator<<(ostream& os, Edge& edge)
131+
{
132+
int end = edge.test_node_str.size()-1;
133+
if (end >= edge.end)
134+
end = edge.end;
135+
136+
char c;
137+
for (int i=edge.begin; i<=end; i++) {
138+
c = edge.test_node_str[i];
139+
os << c;
140+
}
141+
if (end != edge.end)
142+
os << '#';
143+
144+
return os;
145+
}
146+
147+
bool is_none(void) { return begin == 0 && end == 0; }
148+
};
149+
typedef struct Edge Edge;
150+
151+
struct Node{
152+
string& test_node_str;
153+
map<int, int> testmap;
154+
map<Edge*, bool> edges;
155+
// find the edge quicky by storing the leading char of this edge
156+
map<char, Edge*> findedges;
157+
Node* suffix_link;
158+
159+
friend class LinkState;
160+
161+
Node(string& str) :
162+
test_node_str(str), suffix_link(NULL) { edges.clear(); findedges.clear(); }
163+
164+
void add_edge(Edge* edge) {
165+
if (edge->endpoint == NULL)
166+
edge->endpoint = new Node(test_node_str);
167+
make_pair(edge, true);
168+
edges.insert(make_pair(edge, true));
169+
findedges.insert(make_pair(test_node_str[edge->begin], edge));
170+
//cout << "edge added. Now we have " << edges.size() << "edges." << endl;
171+
}
172+
173+
void del_edge(Edge* edge) {
174+
map<Edge*, bool>::iterator iter = edges.find(edge);
175+
176+
if (iter == edges.end())
177+
throw out_of_range("edge don't exit");
178+
else {
179+
// note we should erase the findedges too
180+
edges.erase(edge);
181+
//cout << "delete" << (*edge)[0] << endl;
182+
findedges.erase((*edge)[0]);
183+
//cout << "edge deleted. Now we have " << edges.size() << "edges." << endl;
184+
}
185+
186+
}
187+
188+
// find edge by the first char
189+
Edge* find_edge(char c)
190+
{
191+
//cout << "finding edge";
192+
map<char, Edge*>::iterator iter = findedges.find(c);
193+
//cout << "founded?" << endl;
194+
if (iter != findedges.end())
195+
return iter->second;
196+
else
197+
return NULL;
198+
}
199+
200+
bool isleaf() { return edges.empty(); }
201+
202+
bool operator==(Node& other)
203+
{
204+
return (this) == (&other);
205+
}
206+
207+
friend ostream& operator<<(ostream& os, Node& node)
208+
{
209+
map<Edge*, bool>::iterator iter;
210+
map<char, Edge*>::iterator iter_f;
211+
212+
for (iter=node.edges.begin(); iter!=node.edges.end(); ++iter)
213+
os << iter->first << '\t';
214+
os << endl;
215+
216+
for (iter_f=node.findedges.begin(); iter_f!=node.findedges.end(); ++iter_f)
217+
os << iter_f->first << "-->" << iter_f->second << endl;
218+
219+
return os;
220+
}
221+
};
222+
//typedef struct Node Node;
223+
224+
class ActivePoint{
225+
public:
226+
Node* active_node;
227+
char active_edge;
228+
int active_length;
229+
230+
ActivePoint(Node* node, char edge, int length):
231+
active_node(node), active_edge(edge), active_length(length) { std::cout << "ActivePoint initialized" << std::endl; }
232+
};
233+
234+
Node root;
235+
ActivePoint active_point;
236+
237+
Node* get_active_node(void) { return active_point.active_node; }
238+
void set_active_node(Node* node) { active_point.active_node = node; cout << "Active node set as " << node << endl; }
239+
char get_active_edge(void) { return active_point.active_edge; }
240+
void set_active_edge(char edge) { active_point.active_edge = edge; }
241+
int get_active_length(void) { return active_point.active_length; }
242+
void set_active_length(int len) { active_point.active_length = len; }
243+
void inc_active_len() { active_point.active_length++; }
244+
void dec_active_len() { active_point.active_length--; }
245+
246+
// how many suffixes is to be inserted?
247+
int remainder;
248+
// how many characters inserted?
249+
int pos;
250+
char get_ele(int i) { return test_str[i]; }
251+
// insert a char from pos to suffix tree
252+
int insert();
253+
int insert_rule1();
254+
int insert_rule3();
255+
int print_node(Node* node, int level);
256+
257+
258+
Node* seperate_edge(Node * node, Edge* edge, int rule);
259+
260+
// check if we can change active node
261+
void check_an(void)
262+
{
263+
Node* node = get_active_node();
264+
Edge* edge = node->find_edge(get_active_edge());
265+
266+
if (edge == NULL)
267+
return;
268+
269+
int edge_size = edge->end - edge->begin + 1;
270+
271+
// update
272+
if (edge_size == get_active_length()) {
273+
set_active_node(edge->endpoint);
274+
set_active_edge(0);
275+
set_active_length(0);
276+
}
277+
}
278+
279+
// this class indicate when shall we insert a suffix link
280+
// ls should be a singleton
281+
class LinkState
282+
{
283+
bool first;
284+
285+
Node* prev, *curr;
286+
287+
public:
288+
LinkState() : first(true), prev(NULL), curr(NULL) {}
289+
290+
void ins_link(Node* node)
291+
{
292+
prev = curr;
293+
curr = node;
294+
295+
if (!first) {
296+
prev->suffix_link = curr;
297+
cout << "Suffix link added from prev " << prev << " to curr " << curr << endl;
298+
}
299+
300+
first = false;
301+
}
302+
303+
void clear(void)
304+
{
305+
first = true;
306+
prev = curr = NULL;
307+
}
308+
};
309+
LinkState ls;
310+
};

0 commit comments

Comments
 (0)