Skip to content

Commit e23c921

Browse files
committed
add kmp algorithms
1 parent 383dcc6 commit e23c921

File tree

3 files changed

+127
-1
lines changed

3 files changed

+127
-1
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ PROGRAMS = m_based \
5151
simhash_demo \
5252
imath_demo \
5353
random_demo \
54-
k-means_demo
54+
k-means_demo \
55+
kmp_demo
5556

5657
all: $(PROGRAMS)
5758

include/kmp.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*******************************************************************************
2+
* ALGORITHM IMPLEMENTAIONS
3+
*
4+
* /\ | _ _ ._ o _|_ |_ ._ _ _
5+
* /--\ | (_| (_) | | |_ | | | | | _>
6+
* _|
7+
*
8+
* KNUTH-MORRIS-PRATT ALGORITHMS
9+
*
10+
* Features:
11+
* Complexity is O(n + k), where n is the target string length,
12+
* and k is the pattern length
13+
*
14+
* http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
15+
*
16+
******************************************************************************/
17+
18+
#ifndef __KMP_H__
19+
#define __KMP_H__
20+
#include <string.h>
21+
22+
namespace alg {
23+
static void kmp_table(const char *W, int * T, int len);
24+
/**
25+
* S -> the text to be searched
26+
* W -> the word to search
27+
* return the position where W is found S
28+
*/
29+
static int kmp_search(const char * S, const char * W) {
30+
int LEN_S = strlen(S);
31+
int LEN_W = strlen(W);
32+
33+
int m = 0;
34+
int i = 0;
35+
int * T = new int[LEN_W];
36+
37+
kmp_table(W,T, LEN_W);
38+
39+
while (m+i < LEN_S) {
40+
if (W[i] == S[m+i]) {
41+
if (i == LEN_W -1) {
42+
return m;
43+
}
44+
i++;
45+
} else {
46+
m = m+i-T[i];
47+
if (T[i] > -1) {
48+
i = T[i];
49+
} else {
50+
i = 0;
51+
}
52+
}
53+
}
54+
return -1;
55+
}
56+
57+
/**
58+
* build a table for the word to be searched
59+
* eg:
60+
* i 0 1 2 3 4 5 6
61+
* W[i] A B C D A B D
62+
* T[i] -1 0 0 0 0 1 2
63+
*/
64+
static void kmp_table(const char *W, int * T, int len) {
65+
int pos = 2; // the current position we are computing in T
66+
int cnd = 0; // the next character of the current candidate substring
67+
T[0] = -1;
68+
T[1] = 0;
69+
70+
while (pos < len) {
71+
// first case: the substring continues
72+
if (W[pos-1] == W[cnd]) {
73+
cnd++;
74+
T[pos] = cnd;
75+
pos++;
76+
} else if (cnd >0) { // second case: it doesn't, but we can fall back
77+
cnd = T[cnd];
78+
} else { // third case: we have run out of candidates. Note cnd = 0
79+
T[pos] = 0;
80+
pos++;
81+
}
82+
}
83+
}
84+
}
85+
86+
#endif //

src/kmp_demo.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <time.h>
5+
6+
#include "kmp.h"
7+
using namespace alg;
8+
int main(void)
9+
{
10+
srand(time(NULL));
11+
char * S = (char*)malloc(10000);
12+
char * W = (char*)malloc(6);
13+
14+
memset(S,0, 10000);
15+
memset(W,0, 10);
16+
17+
// random genrate a pattern for A, G, C,T
18+
const char P[] = {'A', 'G','C','T'};
19+
20+
for (int i=0;i<10000;i++) {
21+
int k = rand()%4;
22+
S[i] = P[k];
23+
}
24+
25+
for (int i=0;i<6;i++) {
26+
int k = rand()%4;
27+
W[i] = P[k];
28+
}
29+
30+
// do a search for W from S
31+
int pos = kmp_search(S, W);
32+
33+
printf("text:\n%s\n", S);
34+
if (pos > 0) {
35+
printf("found %s from text, pos %d\n", W, pos);
36+
} else {
37+
printf("cannot found %s from text\n", W);
38+
}
39+
}

0 commit comments

Comments
 (0)