Skip to content

Commit d9e1b9e

Browse files
author
Andy Kipp
committed
added unitests for libgrok
1 parent 797da12 commit d9e1b9e

File tree

4 files changed

+309
-30
lines changed

4 files changed

+309
-30
lines changed

libgrok/__init__.py

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,80 @@
1+
import ctypes as CTYPES
12
from _libgrok import *
23

3-
print dir()
4+
class GrokError(Exception):
5+
def __init__(self, message=None, err=0):
6+
if message:
7+
Exception.__init__(self, message)
8+
else:
9+
Exception.__init__(self, self.error_to_message(err))
10+
11+
def error_to_message(self, err):
12+
if err == 1:
13+
return "File not found"
14+
if err == 2:
15+
return "Pattern not found"
16+
if err == 3:
17+
return "Unexpected read size"
18+
if err == 4:
19+
return "Compile failed"
20+
if err == 5:
21+
return "Uninitialized"
22+
if err == 6:
23+
return "PCRE Error"
24+
if err == 7:
25+
return "No match"
26+
else:
27+
return "Unknown Error: %d" % (err)
28+
29+
class GrokMatch(object):
30+
def __init__(self):
31+
self._grok_match = _libgrok._grok_match()
32+
self._captures = None
33+
34+
@property
35+
def subject(self):
36+
return self._grok_match.subject
37+
38+
@property
39+
def start(self):
40+
return self._grok_match.start
41+
42+
@property
43+
def end(self):
44+
return self._grok_match.end
45+
46+
@property
47+
def captures(self):
48+
if self._captures is None:
49+
self._captures = dict()
50+
for name, data in self.walk():
51+
self._captures[name] = data
52+
return self._captures
53+
54+
def walk(self):
55+
_libgrok._grok_match_walk_init(self._grok_match)
56+
name = CTYPES.create_string_buffer('\00' * 100)
57+
name_ptr = CTYPES.c_char_p(CTYPES.addressof(name))
58+
name_len = CTYPES.c_int(0)
59+
data = CTYPES.create_string_buffer('\00' * 255)
60+
data_ptr = CTYPES.c_char_p(CTYPES.addressof(data))
61+
data_len = CTYPES.c_int(0)
62+
while _libgrok._grok_match_walk_next(self._grok_match,
63+
CTYPES.byref(name_ptr),
64+
CTYPES.byref(name_len),
65+
CTYPES.byref(data_ptr),
66+
CTYPES.byref(data_len)) == _libgrok.GROK_OK:
67+
yield name_ptr.value[:name_len.value], data_ptr.value[:data_len.value]
68+
_libgrok._grok_match_walk_end(self._grok_match)
69+
70+
def __getitem__(self, k):
71+
substring = CTYPES.create_string_buffer('\00' * 100)
72+
substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring))
73+
substring_len = CTYPES.c_int(0)
74+
ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len))
75+
if ret != _libgrok.GROK_OK:
76+
return None
77+
return substring_ptr.value[:substring_len.value]
478

579
class Grok(object):
680

@@ -11,13 +85,29 @@ def __del__(self):
1185
_libgrok._grok_free(self._grok)
1286

1387
def add_pattern(self, name, pattern):
14-
_libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern))
88+
ret = _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern))
89+
if ret != _libgrok.GROK_OK:
90+
raise GrokError(err=ret)
1591

1692
def add_patterns_from_file(self, filename):
17-
_libgrok._grok_patterns_import_from_file(self._grok, filename)
93+
ret = _libgrok._grok_patterns_import_from_file(self._grok, filename)
94+
if ret != _libgrok.GROK_OK:
95+
raise GrokError(err=ret)
1896

1997
def compile(self, pattern):
20-
_libgrok._grok_compile(self._grok, pattern)
98+
ret = _libgrok._grok_compile(self._grok, pattern)
99+
if ret != _libgrok.GROK_OK:
100+
raise GrokError(err=ret)
101+
102+
def execute(self, text, match=None):
103+
grok_match_p = None if match is None else CTYPES.pointer(match._grok_match)
104+
ret = _libgrok._grok_exec(self._grok, text, grok_match_p)
105+
return ret == _libgrok.GROK_OK
21106

22107
def __call__(self, text):
23-
return _libgrok._grok_exec(self._grok, text, None)
108+
match = GrokMatch()
109+
if self.execute(text, match):
110+
return match
111+
else:
112+
return None
113+

libgrok/_libgrok.py

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,56 @@
1-
import ctypes
1+
import ctypes as CTYPES
22

3-
_libgrok = ctypes.cdll.LoadLibrary('libgrok.dylib')
3+
_libgrok_so = CTYPES.cdll.LoadLibrary('libgrok.dylib')
44

5-
_grok_new = _libgrok.grok_new
6-
_grok_new.argtypes = []
7-
_grok_new.restype = ctypes.c_void_p
8-
9-
_grok_free = _libgrok.grok_free
10-
_grok_free.argtypes = [ctypes.c_void_p]
11-
12-
_grok_compile = _libgrok.grok_compile
13-
_grok_compile.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
14-
_grok_compile.restype = ctypes.c_int
5+
GROK_OK = 0
156

16-
_grok_exec = _libgrok.grok_exec
17-
_grok_exec.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p]
18-
_grok_exec.restype = ctypes.c_int
19-
20-
_grok_pattern_add = _libgrok.grok_pattern_add
21-
_grok_pattern_add.argtypes = [ctypes.c_void_p,
22-
ctypes.c_char_p, ctypes.c_size_t,
23-
ctypes.c_char_p, ctypes.c_size_t]
24-
_grok_pattern_add.restype = ctypes.c_int
25-
26-
_grok_patterns_import_from_file = _libgrok.grok_patterns_import_from_file
27-
_grok_patterns_import_from_file.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
28-
_grok_patterns_import_from_file.restype = ctypes.c_int
7+
class _grok_match(CTYPES.Structure):
8+
_fields_ = [("grok_t", CTYPES.c_void_p),
9+
("subject", CTYPES.c_char_p),
10+
("start", CTYPES.c_int),
11+
("end", CTYPES.c_int)]
2912

13+
_grok_match_p = CTYPES.POINTER(_grok_match)
3014

15+
_grok_new = _libgrok_so.grok_new
16+
_grok_new.argtypes = []
17+
_grok_new.restype = CTYPES.c_void_p
18+
19+
_grok_free = _libgrok_so.grok_free
20+
_grok_free.argtypes = [CTYPES.c_void_p]
21+
22+
_grok_compile = _libgrok_so.grok_compile
23+
_grok_compile.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p]
24+
_grok_compile.restype = CTYPES.c_int
25+
26+
_grok_exec = _libgrok_so.grok_exec
27+
_grok_exec.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p, _grok_match_p]
28+
_grok_exec.restype = CTYPES.c_int
29+
30+
_grok_pattern_add = _libgrok_so.grok_pattern_add
31+
_grok_pattern_add.argtypes = [CTYPES.c_void_p,
32+
CTYPES.c_char_p, CTYPES.c_size_t,
33+
CTYPES.c_char_p, CTYPES.c_size_t]
34+
_grok_pattern_add.restype = CTYPES.c_int
35+
36+
_grok_patterns_import_from_file = _libgrok_so.grok_patterns_import_from_file
37+
_grok_patterns_import_from_file.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p]
38+
_grok_patterns_import_from_file.restype = CTYPES.c_int
39+
40+
_grok_match_get_named_substring = _libgrok_so.grok_match_get_named_substring
41+
_grok_match_get_named_substring.argtypes = [_grok_match_p, CTYPES.c_char_p, CTYPES.POINTER(CTYPES.c_char_p), CTYPES.POINTER(CTYPES.c_int)]
42+
_grok_match_get_named_substring.restype = CTYPES.c_int
43+
44+
_grok_match_walk_init = _libgrok_so.grok_match_walk_init
45+
_grok_match_walk_init.argtypes = [_grok_match_p]
46+
47+
_grok_match_walk_next = _libgrok_so.grok_match_walk_next
48+
_grok_match_walk_next.argtypes = [_grok_match_p,
49+
CTYPES.POINTER(CTYPES.c_char_p),
50+
CTYPES.POINTER(CTYPES.c_int),
51+
CTYPES.POINTER(CTYPES.c_char_p),
52+
CTYPES.POINTER(CTYPES.c_int)]
53+
_grok_match_walk_next.restype = CTYPES.c_int
54+
55+
_grok_match_walk_end = _libgrok_so.grok_match_walk_end
56+
_grok_match_walk_end.argtypes = [_grok_match_p]

test/patterns/base

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
USERNAME [a-zA-Z0-9_-]+
2+
USER %{USERNAME}
3+
INT (?:[+-]?(?:[0-9]+))
4+
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
5+
NUMBER (?:%{BASE10NUM})
6+
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
7+
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
8+
9+
POSINT \b(?:[1-9][0-9]*)\b
10+
NONNEGINT \b(?:[0-9]+)\b
11+
WORD \b\w+\b
12+
NOTSPACE \S+
13+
SPACE \s*
14+
DATA .*?
15+
GREEDYDATA .*
16+
#QUOTEDSTRING (?:(?<!\\)(?:"(?:\\.|[^\\"])*"|(?:'(?:\\.|[^\\'])*')|(?:`(?:\\.|[^\\`])*`)))
17+
QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
18+
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
19+
20+
# Networking
21+
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
22+
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
23+
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
24+
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
25+
IP (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9])
26+
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
27+
HOST %{HOSTNAME}
28+
IPORHOST (?:%{HOSTNAME}|%{IP})
29+
HOSTPORT (?:%{IPORHOST=~/\./}:%{POSINT})
30+
31+
# paths
32+
PATH (?:%{UNIXPATH}|%{WINPATH})
33+
UNIXPATH (?>/(?>[\w_%!$@:.,-]+|\\.)*)+
34+
#UNIXPATH (?<![\w\/])(?:/[^\/\s?*]*)+
35+
LINUXTTY (?>/dev/pts/%{NONNEGINT})
36+
BSDTTY (?>/dev/tty[pq][a-z0-9])
37+
TTY (?:%{BSDTTY}|%{LINUXTTY})
38+
WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
39+
URIPROTO [A-Za-z]+(\+[A-Za-z+]+)?
40+
URIHOST %{IPORHOST}(?::%{POSINT:port})?
41+
# uripath comes loosely from RFC1738, but mostly from what Firefox
42+
# doesn't turn into %XX
43+
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=#%_-]*)+
44+
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
45+
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~#%&/=:;_?-\[\]]*
46+
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
47+
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
48+
49+
# Months: January, Feb, 3, 03, 12, December
50+
MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b
51+
MONTHNUM (?:0?[1-9]|1[0-2])
52+
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
53+
54+
# Days: Monday, Tue, Thu, etc...
55+
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
56+
57+
# Years?
58+
YEAR (?>\d\d){1,2}
59+
# Time: HH:MM:SS
60+
#TIME \d{2}:\d{2}(?::\d{2}(?:\.\d+)?)?
61+
# I'm still on the fence about using grok to perform the time match,
62+
# since it's probably slower.
63+
# TIME %{POSINT<24}:%{POSINT<60}(?::%{POSINT<60}(?:\.%{POSINT})?)?
64+
HOUR (?:2[0123]|[01][0-9])
65+
MINUTE (?:[0-5][0-9])
66+
# '60' is a leap second in most time standards and thus is valid.
67+
SECOND (?:(?:[0-5][0-9]|60)(?:[.,][0-9]+)?)
68+
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
69+
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
70+
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
71+
DATE_EU %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY}
72+
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
73+
ISO8601_SECOND (?:%{SECOND}|60)
74+
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
75+
DATE %{DATE_US}|%{DATE_EU}
76+
DATESTAMP %{DATE}[- ]%{TIME}
77+
TZ (?:[PMCE][SD]T)
78+
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
79+
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
80+
81+
# Syslog Dates: Month Day HH:MM:SS
82+
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
83+
PROG (?:[\w._/%-]+)
84+
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
85+
SYSLOGHOST %{IPORHOST}
86+
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
87+
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
88+
89+
# Shortcuts
90+
QS %{QUOTEDSTRING}
91+
92+
# Log formats
93+
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
94+
COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent}
95+
96+
# Log Levels
97+
LOGLEVEL ([T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE)

test/testlibgrok.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import sys
3+
import unittest
4+
5+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
6+
7+
from libgrok import *
8+
9+
class GrokTestCase(unittest.TestCase):
10+
11+
def setUp(self):
12+
self.grok = Grok()
13+
self.grok.add_patterns_from_file('test/patterns/base')
14+
15+
def tearDown(self):
16+
del self.grok
17+
18+
def test_grok_add_patterns_file(self):
19+
self.assertRaises(GrokError, self.grok.add_patterns_from_file, 'nosuchfile')
20+
21+
def test_grok_compile(self):
22+
self.assertRaises(GrokError, self.grok.compile, '%{URI)')
23+
24+
def test_grok_substring(self):
25+
self.grok.compile("%{URI}")
26+
match = self.grok('https://example.com/test/')
27+
self.assertEquals(match['URIPROTO'], 'https')
28+
self.assertEquals(match['URIPATH'], '/test/')
29+
self.assertEquals(match['foo'], None)
30+
31+
def test_grok_substring_named(self):
32+
self.grok.compile("%{URI:foo}")
33+
match = self.grok('https://example.com/test/')
34+
self.assertEquals(match['URIPROTO'], 'https')
35+
self.assertEquals(match['URIPATH'], '/test/')
36+
self.assertEquals(match['foo'], 'https://example.com/test/')
37+
self.assertEquals(match['bar'], None)
38+
39+
def test_grok_captures(self):
40+
self.grok.compile("%{URI}")
41+
match = self.grok('https://example.com/test/')
42+
self.assertTrue('HOSTNAME' in match.captures)
43+
self.assertEquals(match.captures['HOSTNAME'], 'example.com')
44+
self.assertTrue('URIPROTO' in match.captures)
45+
self.assertEquals(match.captures['URIPROTO'], 'https')
46+
self.assertTrue('URIPATH' in match.captures)
47+
self.assertEquals(match.captures['URIPATH'], '/test/')
48+
49+
def test_grok_captures_named(self):
50+
self.grok.compile("%{URI:foo}")
51+
match = self.grok('https://example.com/test/')
52+
self.assertTrue('URI:foo' in match.captures)
53+
self.assertEquals(match.captures['URI:foo'], 'https://example.com/test/')
54+
55+
def test_grok_execute(self):
56+
self.grok.compile("%{URI}")
57+
self.assertTrue(self.grok.execute('https://example.com/test/'))
58+
self.assertFalse(self.grok.execute('thisisnotauri.com'))
59+
60+
def test_grok_call(self):
61+
self.grok.compile("%{URI}")
62+
self.assertTrue(self.grok('https://example.com/test/') != None)
63+
self.assertTrue(self.grok('thisisnotauri.com') == None)
64+
65+
if __name__ == "__main__":
66+
unittest.main()

0 commit comments

Comments
 (0)