From 797da12ba56e9291383645d0a4845b3f1eb7c8ff Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Tue, 29 Jan 2013 15:37:15 -0500 Subject: [PATCH 01/11] move into module and added setuptools --- libgrok/__init__.py | 23 +++++++++++++++++++++++ libgrok.py => libgrok/_libgrok.py | 22 +--------------------- setup.py | 12 ++++++++++++ 3 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 libgrok/__init__.py rename libgrok.py => libgrok/_libgrok.py (61%) create mode 100644 setup.py diff --git a/libgrok/__init__.py b/libgrok/__init__.py new file mode 100644 index 0000000..7df4e51 --- /dev/null +++ b/libgrok/__init__.py @@ -0,0 +1,23 @@ +from _libgrok import * + +print dir() + +class Grok(object): + + def __init__(self): + self._grok = _libgrok._grok_new() + + def __del__(self): + _libgrok._grok_free(self._grok) + + def add_pattern(self, name, pattern): + _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) + + def add_patterns_from_file(self, filename): + _libgrok._grok_patterns_import_from_file(self._grok, filename) + + def compile(self, pattern): + _libgrok._grok_compile(self._grok, pattern) + + def __call__(self, text): + return _libgrok._grok_exec(self._grok, text, None) diff --git a/libgrok.py b/libgrok/_libgrok.py similarity index 61% rename from libgrok.py rename to libgrok/_libgrok.py index 5479e6f..2d8ce59 100644 --- a/libgrok.py +++ b/libgrok/_libgrok.py @@ -1,7 +1,6 @@ import ctypes - -_libgrok = ctypes.cdll.LoadLibrary('libgrok.so') +_libgrok = ctypes.cdll.LoadLibrary('libgrok.dylib') _grok_new = _libgrok.grok_new _grok_new.argtypes = [] @@ -29,22 +28,3 @@ _grok_patterns_import_from_file.restype = ctypes.c_int -class Grok(object): - - def __init__(self): - self._grok = _grok_new() - - def __del__(self): - _grok_free(self._grok) - - def add_pattern(self, name, pattern): - _grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) - - def add_patterns_from_file(self, filename): - _grok_patterns_import_from_file(self._grok, filename) - - def compile(self, pattern): - _grok_compile(self._grok, pattern) - - def __call__(self, text): - return _grok_exec(self._grok, text, None) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d432a88 --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup + +setup( + name = "libgrok", + version = "0.0.1", + author = "Matt Goodall", + author_email = "matt.goodall@gmail.com", + description = ("Python wrapper for semicomplete's Grok"), + license = "BSD", + url = "https://github.com/emgee/libgrok-py", + packages=['libgrok'], +) From d9e1b9e6b0fa584157f3424d0fd357bce5477c86 Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Wed, 30 Jan 2013 15:16:09 -0500 Subject: [PATCH 02/11] added unitests for libgrok --- libgrok/__init__.py | 100 +++++++++++++++++++++++++++++++++++++++++--- libgrok/_libgrok.py | 76 ++++++++++++++++++++++----------- test/patterns/base | 97 ++++++++++++++++++++++++++++++++++++++++++ test/testlibgrok.py | 66 +++++++++++++++++++++++++++++ 4 files changed, 309 insertions(+), 30 deletions(-) create mode 100755 test/patterns/base create mode 100644 test/testlibgrok.py diff --git a/libgrok/__init__.py b/libgrok/__init__.py index 7df4e51..38aa8d8 100644 --- a/libgrok/__init__.py +++ b/libgrok/__init__.py @@ -1,6 +1,80 @@ +import ctypes as CTYPES from _libgrok import * -print dir() +class GrokError(Exception): + def __init__(self, message=None, err=0): + if message: + Exception.__init__(self, message) + else: + Exception.__init__(self, self.error_to_message(err)) + + def error_to_message(self, err): + if err == 1: + return "File not found" + if err == 2: + return "Pattern not found" + if err == 3: + return "Unexpected read size" + if err == 4: + return "Compile failed" + if err == 5: + return "Uninitialized" + if err == 6: + return "PCRE Error" + if err == 7: + return "No match" + else: + return "Unknown Error: %d" % (err) + +class GrokMatch(object): + def __init__(self): + self._grok_match = _libgrok._grok_match() + self._captures = None + + @property + def subject(self): + return self._grok_match.subject + + @property + def start(self): + return self._grok_match.start + + @property + def end(self): + return self._grok_match.end + + @property + def captures(self): + if self._captures is None: + self._captures = dict() + for name, data in self.walk(): + self._captures[name] = data + return self._captures + + def walk(self): + _libgrok._grok_match_walk_init(self._grok_match) + name = CTYPES.create_string_buffer('\00' * 100) + name_ptr = CTYPES.c_char_p(CTYPES.addressof(name)) + name_len = CTYPES.c_int(0) + data = CTYPES.create_string_buffer('\00' * 255) + data_ptr = CTYPES.c_char_p(CTYPES.addressof(data)) + data_len = CTYPES.c_int(0) + while _libgrok._grok_match_walk_next(self._grok_match, + CTYPES.byref(name_ptr), + CTYPES.byref(name_len), + CTYPES.byref(data_ptr), + CTYPES.byref(data_len)) == _libgrok.GROK_OK: + yield name_ptr.value[:name_len.value], data_ptr.value[:data_len.value] + _libgrok._grok_match_walk_end(self._grok_match) + + def __getitem__(self, k): + substring = CTYPES.create_string_buffer('\00' * 100) + substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring)) + substring_len = CTYPES.c_int(0) + ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) + if ret != _libgrok.GROK_OK: + return None + return substring_ptr.value[:substring_len.value] class Grok(object): @@ -11,13 +85,29 @@ def __del__(self): _libgrok._grok_free(self._grok) def add_pattern(self, name, pattern): - _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) + ret = _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) def add_patterns_from_file(self, filename): - _libgrok._grok_patterns_import_from_file(self._grok, filename) + ret = _libgrok._grok_patterns_import_from_file(self._grok, filename) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) def compile(self, pattern): - _libgrok._grok_compile(self._grok, pattern) + ret = _libgrok._grok_compile(self._grok, pattern) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) + + def execute(self, text, match=None): + grok_match_p = None if match is None else CTYPES.pointer(match._grok_match) + ret = _libgrok._grok_exec(self._grok, text, grok_match_p) + return ret == _libgrok.GROK_OK def __call__(self, text): - return _libgrok._grok_exec(self._grok, text, None) + match = GrokMatch() + if self.execute(text, match): + return match + else: + return None + diff --git a/libgrok/_libgrok.py b/libgrok/_libgrok.py index 2d8ce59..f848a06 100644 --- a/libgrok/_libgrok.py +++ b/libgrok/_libgrok.py @@ -1,30 +1,56 @@ -import ctypes +import ctypes as CTYPES -_libgrok = ctypes.cdll.LoadLibrary('libgrok.dylib') +_libgrok_so = CTYPES.cdll.LoadLibrary('libgrok.dylib') -_grok_new = _libgrok.grok_new -_grok_new.argtypes = [] -_grok_new.restype = ctypes.c_void_p - -_grok_free = _libgrok.grok_free -_grok_free.argtypes = [ctypes.c_void_p] - -_grok_compile = _libgrok.grok_compile -_grok_compile.argtypes = [ctypes.c_void_p, ctypes.c_char_p] -_grok_compile.restype = ctypes.c_int +GROK_OK = 0 -_grok_exec = _libgrok.grok_exec -_grok_exec.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p] -_grok_exec.restype = ctypes.c_int - -_grok_pattern_add = _libgrok.grok_pattern_add -_grok_pattern_add.argtypes = [ctypes.c_void_p, - ctypes.c_char_p, ctypes.c_size_t, - ctypes.c_char_p, ctypes.c_size_t] -_grok_pattern_add.restype = ctypes.c_int - -_grok_patterns_import_from_file = _libgrok.grok_patterns_import_from_file -_grok_patterns_import_from_file.argtypes = [ctypes.c_void_p, ctypes.c_char_p] -_grok_patterns_import_from_file.restype = ctypes.c_int +class _grok_match(CTYPES.Structure): + _fields_ = [("grok_t", CTYPES.c_void_p), + ("subject", CTYPES.c_char_p), + ("start", CTYPES.c_int), + ("end", CTYPES.c_int)] +_grok_match_p = CTYPES.POINTER(_grok_match) +_grok_new = _libgrok_so.grok_new +_grok_new.argtypes = [] +_grok_new.restype = CTYPES.c_void_p + +_grok_free = _libgrok_so.grok_free +_grok_free.argtypes = [CTYPES.c_void_p] + +_grok_compile = _libgrok_so.grok_compile +_grok_compile.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p] +_grok_compile.restype = CTYPES.c_int + +_grok_exec = _libgrok_so.grok_exec +_grok_exec.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p, _grok_match_p] +_grok_exec.restype = CTYPES.c_int + +_grok_pattern_add = _libgrok_so.grok_pattern_add +_grok_pattern_add.argtypes = [CTYPES.c_void_p, + CTYPES.c_char_p, CTYPES.c_size_t, + CTYPES.c_char_p, CTYPES.c_size_t] +_grok_pattern_add.restype = CTYPES.c_int + +_grok_patterns_import_from_file = _libgrok_so.grok_patterns_import_from_file +_grok_patterns_import_from_file.argtypes = [CTYPES.c_void_p, CTYPES.c_char_p] +_grok_patterns_import_from_file.restype = CTYPES.c_int + +_grok_match_get_named_substring = _libgrok_so.grok_match_get_named_substring +_grok_match_get_named_substring.argtypes = [_grok_match_p, CTYPES.c_char_p, CTYPES.POINTER(CTYPES.c_char_p), CTYPES.POINTER(CTYPES.c_int)] +_grok_match_get_named_substring.restype = CTYPES.c_int + +_grok_match_walk_init = _libgrok_so.grok_match_walk_init +_grok_match_walk_init.argtypes = [_grok_match_p] + +_grok_match_walk_next = _libgrok_so.grok_match_walk_next +_grok_match_walk_next.argtypes = [_grok_match_p, + CTYPES.POINTER(CTYPES.c_char_p), + CTYPES.POINTER(CTYPES.c_int), + CTYPES.POINTER(CTYPES.c_char_p), + CTYPES.POINTER(CTYPES.c_int)] +_grok_match_walk_next.restype = CTYPES.c_int + +_grok_match_walk_end = _libgrok_so.grok_match_walk_end +_grok_match_walk_end.argtypes = [_grok_match_p] diff --git a/test/patterns/base b/test/patterns/base new file mode 100755 index 0000000..95a9c4d --- /dev/null +++ b/test/patterns/base @@ -0,0 +1,97 @@ +USERNAME [a-zA-Z0-9_-]+ +USER %{USERNAME} +INT (?:[+-]?(?:[0-9]+)) +BASE10NUM (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) +NUMBER (?:%{BASE10NUM}) +BASE16NUM (?(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) +UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} + +# Networking +MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) +CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) +WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) +COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) +IP (?/(?>[\w_%!$@:.,-]+|\\.)*)+ +#UNIXPATH (?/dev/pts/%{NONNEGINT}) +BSDTTY (?>/dev/tty[pq][a-z0-9]) +TTY (?:%{BSDTTY}|%{LINUXTTY}) +WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ +URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? +URIHOST %{IPORHOST}(?::%{POSINT:port})? +# uripath comes loosely from RFC1738, but mostly from what Firefox +# doesn't turn into %XX +URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=#%_-]*)+ +#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? +URIPARAM \?[A-Za-z0-9$.+!*'|(){},~#%&/=:;_?-\[\]]* +URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? +URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? + +# Months: January, Feb, 3, 03, 12, December +MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b +MONTHNUM (?:0?[1-9]|1[0-2]) +MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) + +# Days: Monday, Tue, Thu, etc... +DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) + +# Years? +YEAR (?>\d\d){1,2} +# Time: HH:MM:SS +#TIME \d{2}:\d{2}(?::\d{2}(?:\.\d+)?)? +# I'm still on the fence about using grok to perform the time match, +# since it's probably slower. +# TIME %{POSINT<24}:%{POSINT<60}(?::%{POSINT<60}(?:\.%{POSINT})?)? +HOUR (?:2[0123]|[01][0-9]) +MINUTE (?:[0-5][0-9]) +# '60' is a leap second in most time standards and thus is valid. +SECOND (?:(?:[0-5][0-9]|60)(?:[.,][0-9]+)?) +TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) +# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) +DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} +DATE_EU %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY} +ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) +ISO8601_SECOND (?:%{SECOND}|60) +TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? +DATE %{DATE_US}|%{DATE_EU} +DATESTAMP %{DATE}[- ]%{TIME} +TZ (?:[PMCE][SD]T) +DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} +DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} + +# Syslog Dates: Month Day HH:MM:SS +SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} +PROG (?:[\w._/%-]+) +SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? +SYSLOGHOST %{IPORHOST} +SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> +HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} + +# Shortcuts +QS %{QUOTEDSTRING} + +# Log formats +SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: +COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent} + +# Log Levels +LOGLEVEL ([T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE) diff --git a/test/testlibgrok.py b/test/testlibgrok.py new file mode 100644 index 0000000..741a3a3 --- /dev/null +++ b/test/testlibgrok.py @@ -0,0 +1,66 @@ +import os +import sys +import unittest + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from libgrok import * + +class GrokTestCase(unittest.TestCase): + + def setUp(self): + self.grok = Grok() + self.grok.add_patterns_from_file('test/patterns/base') + + def tearDown(self): + del self.grok + + def test_grok_add_patterns_file(self): + self.assertRaises(GrokError, self.grok.add_patterns_from_file, 'nosuchfile') + + def test_grok_compile(self): + self.assertRaises(GrokError, self.grok.compile, '%{URI)') + + def test_grok_substring(self): + self.grok.compile("%{URI}") + match = self.grok('https://example.com/test/') + self.assertEquals(match['URIPROTO'], 'https') + self.assertEquals(match['URIPATH'], '/test/') + self.assertEquals(match['foo'], None) + + def test_grok_substring_named(self): + self.grok.compile("%{URI:foo}") + match = self.grok('https://example.com/test/') + self.assertEquals(match['URIPROTO'], 'https') + self.assertEquals(match['URIPATH'], '/test/') + self.assertEquals(match['foo'], 'https://example.com/test/') + self.assertEquals(match['bar'], None) + + def test_grok_captures(self): + self.grok.compile("%{URI}") + match = self.grok('https://example.com/test/') + self.assertTrue('HOSTNAME' in match.captures) + self.assertEquals(match.captures['HOSTNAME'], 'example.com') + self.assertTrue('URIPROTO' in match.captures) + self.assertEquals(match.captures['URIPROTO'], 'https') + self.assertTrue('URIPATH' in match.captures) + self.assertEquals(match.captures['URIPATH'], '/test/') + + def test_grok_captures_named(self): + self.grok.compile("%{URI:foo}") + match = self.grok('https://example.com/test/') + self.assertTrue('URI:foo' in match.captures) + self.assertEquals(match.captures['URI:foo'], 'https://example.com/test/') + + def test_grok_execute(self): + self.grok.compile("%{URI}") + self.assertTrue(self.grok.execute('https://example.com/test/')) + self.assertFalse(self.grok.execute('thisisnotauri.com')) + + def test_grok_call(self): + self.grok.compile("%{URI}") + self.assertTrue(self.grok('https://example.com/test/') != None) + self.assertTrue(self.grok('thisisnotauri.com') == None) + +if __name__ == "__main__": + unittest.main() From 0bef9e186d97a2a6a7fa25d37c2172963cfc58ee Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Wed, 30 Jan 2013 15:20:37 -0500 Subject: [PATCH 03/11] deleted demo --- demo.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 demo.py diff --git a/demo.py b/demo.py deleted file mode 100644 index aaac7bd..0000000 --- a/demo.py +++ /dev/null @@ -1,11 +0,0 @@ -import libgrok -import sys - -g = libgrok.Grok() - -for filename in sys.argv[1:]: - g.add_patterns_from_file(filename) - -g.compile(r"^%{NUMBER}$") -print g("200") -print g("404") From b74a0a5f3c0719d167f807d84b1c05c3dd87346c Mon Sep 17 00:00:00 2001 From: Perry Stoll Date: Thu, 31 Jan 2013 17:17:55 -0500 Subject: [PATCH 04/11] fix call to create_string_buffer, use variable --- libgrok/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libgrok/__init__.py b/libgrok/__init__.py index 38aa8d8..e93ba99 100644 --- a/libgrok/__init__.py +++ b/libgrok/__init__.py @@ -1,6 +1,8 @@ import ctypes as CTYPES from _libgrok import * +_fixed_buffer_size = 4096 + class GrokError(Exception): def __init__(self, message=None, err=0): if message: @@ -53,10 +55,10 @@ def captures(self): def walk(self): _libgrok._grok_match_walk_init(self._grok_match) - name = CTYPES.create_string_buffer('\00' * 100) + name = CTYPES.create_string_buffer( _fixed_buffer_size) name_ptr = CTYPES.c_char_p(CTYPES.addressof(name)) name_len = CTYPES.c_int(0) - data = CTYPES.create_string_buffer('\00' * 255) + data = CTYPES.create_string_buffer( _fixed_buffer_size) data_ptr = CTYPES.c_char_p(CTYPES.addressof(data)) data_len = CTYPES.c_int(0) while _libgrok._grok_match_walk_next(self._grok_match, @@ -68,7 +70,7 @@ def walk(self): _libgrok._grok_match_walk_end(self._grok_match) def __getitem__(self, k): - substring = CTYPES.create_string_buffer('\00' * 100) + substring = CTYPES.create_string_buffer( _fixed_buffer_size) substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring)) substring_len = CTYPES.c_int(0) ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) From eb70b2fd64b53a82e276d4861ddd3ed9fab0435d Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Fri, 1 Feb 2013 14:02:34 -0500 Subject: [PATCH 05/11] reorged libgrok code since it annoyed perry --- libgrok/__init__.py | 116 +------------------------------------------- libgrok/grok.py | 115 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 115 deletions(-) create mode 100644 libgrok/grok.py diff --git a/libgrok/__init__.py b/libgrok/__init__.py index e93ba99..3bfe365 100644 --- a/libgrok/__init__.py +++ b/libgrok/__init__.py @@ -1,115 +1 @@ -import ctypes as CTYPES -from _libgrok import * - -_fixed_buffer_size = 4096 - -class GrokError(Exception): - def __init__(self, message=None, err=0): - if message: - Exception.__init__(self, message) - else: - Exception.__init__(self, self.error_to_message(err)) - - def error_to_message(self, err): - if err == 1: - return "File not found" - if err == 2: - return "Pattern not found" - if err == 3: - return "Unexpected read size" - if err == 4: - return "Compile failed" - if err == 5: - return "Uninitialized" - if err == 6: - return "PCRE Error" - if err == 7: - return "No match" - else: - return "Unknown Error: %d" % (err) - -class GrokMatch(object): - def __init__(self): - self._grok_match = _libgrok._grok_match() - self._captures = None - - @property - def subject(self): - return self._grok_match.subject - - @property - def start(self): - return self._grok_match.start - - @property - def end(self): - return self._grok_match.end - - @property - def captures(self): - if self._captures is None: - self._captures = dict() - for name, data in self.walk(): - self._captures[name] = data - return self._captures - - def walk(self): - _libgrok._grok_match_walk_init(self._grok_match) - name = CTYPES.create_string_buffer( _fixed_buffer_size) - name_ptr = CTYPES.c_char_p(CTYPES.addressof(name)) - name_len = CTYPES.c_int(0) - data = CTYPES.create_string_buffer( _fixed_buffer_size) - data_ptr = CTYPES.c_char_p(CTYPES.addressof(data)) - data_len = CTYPES.c_int(0) - while _libgrok._grok_match_walk_next(self._grok_match, - CTYPES.byref(name_ptr), - CTYPES.byref(name_len), - CTYPES.byref(data_ptr), - CTYPES.byref(data_len)) == _libgrok.GROK_OK: - yield name_ptr.value[:name_len.value], data_ptr.value[:data_len.value] - _libgrok._grok_match_walk_end(self._grok_match) - - def __getitem__(self, k): - substring = CTYPES.create_string_buffer( _fixed_buffer_size) - substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring)) - substring_len = CTYPES.c_int(0) - ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) - if ret != _libgrok.GROK_OK: - return None - return substring_ptr.value[:substring_len.value] - -class Grok(object): - - def __init__(self): - self._grok = _libgrok._grok_new() - - def __del__(self): - _libgrok._grok_free(self._grok) - - def add_pattern(self, name, pattern): - ret = _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) - if ret != _libgrok.GROK_OK: - raise GrokError(err=ret) - - def add_patterns_from_file(self, filename): - ret = _libgrok._grok_patterns_import_from_file(self._grok, filename) - if ret != _libgrok.GROK_OK: - raise GrokError(err=ret) - - def compile(self, pattern): - ret = _libgrok._grok_compile(self._grok, pattern) - if ret != _libgrok.GROK_OK: - raise GrokError(err=ret) - - def execute(self, text, match=None): - grok_match_p = None if match is None else CTYPES.pointer(match._grok_match) - ret = _libgrok._grok_exec(self._grok, text, grok_match_p) - return ret == _libgrok.GROK_OK - - def __call__(self, text): - match = GrokMatch() - if self.execute(text, match): - return match - else: - return None - +from grok import Grok, GrokError, GrokMatch diff --git a/libgrok/grok.py b/libgrok/grok.py new file mode 100644 index 0000000..9e6442a --- /dev/null +++ b/libgrok/grok.py @@ -0,0 +1,115 @@ +import ctypes as CTYPES +import _libgrok + +_fixed_buffer_size = 4096 + +class GrokError(Exception): + def __init__(self, message=None, err=0): + if message: + Exception.__init__(self, message) + else: + Exception.__init__(self, self.error_to_message(err)) + + def error_to_message(self, err): + if err == 1: + return "File not found" + if err == 2: + return "Pattern not found" + if err == 3: + return "Unexpected read size" + if err == 4: + return "Compile failed" + if err == 5: + return "Uninitialized" + if err == 6: + return "PCRE Error" + if err == 7: + return "No match" + else: + return "Unknown Error: %d" % (err) + +class GrokMatch(object): + def __init__(self): + self._grok_match = _libgrok._grok_match() + self._captures = None + + @property + def subject(self): + return self._grok_match.subject + + @property + def start(self): + return self._grok_match.start + + @property + def end(self): + return self._grok_match.end + + @property + def captures(self): + if self._captures is None: + self._captures = dict() + for name, data in self.walk(): + self._captures[name] = data + return self._captures + + def walk(self): + _libgrok._grok_match_walk_init(self._grok_match) + name = CTYPES.create_string_buffer( _fixed_buffer_size) + name_ptr = CTYPES.c_char_p(CTYPES.addressof(name)) + name_len = CTYPES.c_int(0) + data = CTYPES.create_string_buffer( _fixed_buffer_size) + data_ptr = CTYPES.c_char_p(CTYPES.addressof(data)) + data_len = CTYPES.c_int(0) + while _libgrok._grok_match_walk_next(self._grok_match, + CTYPES.byref(name_ptr), + CTYPES.byref(name_len), + CTYPES.byref(data_ptr), + CTYPES.byref(data_len)) == _libgrok.GROK_OK: + yield name_ptr.value[:name_len.value], data_ptr.value[:data_len.value] + _libgrok._grok_match_walk_end(self._grok_match) + + def __getitem__(self, k): + substring = CTYPES.create_string_buffer( _fixed_buffer_size) + substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring)) + substring_len = CTYPES.c_int(0) + ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) + if ret != _libgrok.GROK_OK: + return None + return substring_ptr.value[:substring_len.value] + +class Grok(object): + + def __init__(self): + self._grok = _libgrok._grok_new() + + def __del__(self): + _libgrok._grok_free(self._grok) + + def add_pattern(self, name, pattern): + ret = _libgrok._grok_pattern_add(self._grok, name, len(name), pattern, len(pattern)) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) + + def add_patterns_from_file(self, filename): + ret = _libgrok._grok_patterns_import_from_file(self._grok, filename) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) + + def compile(self, pattern): + ret = _libgrok._grok_compile(self._grok, pattern) + if ret != _libgrok.GROK_OK: + raise GrokError(err=ret) + + def execute(self, text, match=None): + grok_match_p = None if match is None else CTYPES.pointer(match._grok_match) + ret = _libgrok._grok_exec(self._grok, text, grok_match_p) + return ret == _libgrok.GROK_OK + + def __call__(self, text): + match = GrokMatch() + if self.execute(text, match): + return match + else: + return None + From 4d587ee60a0c4642c61f3ea6f32a2dd49b6054eb Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 13:14:36 -0500 Subject: [PATCH 06/11] changed loading of dynamic library to use find_library --- libgrok/_libgrok.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libgrok/_libgrok.py b/libgrok/_libgrok.py index f848a06..98bc54a 100644 --- a/libgrok/_libgrok.py +++ b/libgrok/_libgrok.py @@ -1,6 +1,10 @@ +import sys import ctypes as CTYPES +from ctypes.util import find_library -_libgrok_so = CTYPES.cdll.LoadLibrary('libgrok.dylib') +_libgrok_lib_name = find_library('grok') + +_libgrok_so = CTYPES.cdll.LoadLibrary(_libgrok_lib_name) GROK_OK = 0 From 4f97b9dec72c93acc781e82d1e4801e3a63ce2aa Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 13:38:21 -0500 Subject: [PATCH 07/11] updated readme --- README | 4 ---- README.md | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) delete mode 100644 README create mode 100644 README.md diff --git a/README b/README deleted file mode 100644 index 06be63b..0000000 --- a/README +++ /dev/null @@ -1,4 +0,0 @@ -Python wrapper for semicomplete's Grok library. - -Grok allows you to easily parse logs and other files and turns the unstructured -log and event data into structured data. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8f78203 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +Python wrapper for semicomplete's Grok library. + +About +----- +Grok allows you to easily parse logs and other files and turns the unstructured +log and event data into structured data. + +Installing +---------- + +You will need libgrok installed in other to use libgrok-py. On MacOSX this is available via Homebrew: + +Install Grok Dependencies + brew install tokyo-cabinet pcre libevent + +Install Grok + brew install grok + +You can also compile from source. + + +Usage +----- + + >>> import libgrok + >>> grok = libgrok.Grok() + >>> grok.add_patterns_from_file('test/patterns/base') + >>> grok.compile('%{URI:foo}') + >>> match = grok("http://www.example.com/test/") + >>> match.captures.items() + [('USERNAME', ''), ('HOSTNAME', 'www.example.com'), ('URIPATH', '/test/'), ('IPORHOST', 'www.example.com'), ('POSINT:port', ''), ('URIPROTO', 'http'), ('IP', ''), ('URIHOST', 'www.example.com'), ('URIPATHPARAM', '/test/'), ('URI:foo', 'http://www.example.com/test/'), ('URIPARAM', ''), ('USER', '')] From e60ab22bac6a126c2456d375c515f751d5b669f2 Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 13:44:35 -0500 Subject: [PATCH 08/11] more readme updates --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8f78203..ae27149 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,20 @@ Python wrapper for semicomplete's Grok library. About ----- -Grok allows you to easily parse logs and other files and turns the unstructured -log and event data into structured data. +Grok allows you to easily parse logs and other files and turns the unstructured log and event data into structured data. Installing ---------- -You will need libgrok installed in other to use libgrok-py. On MacOSX this is available via Homebrew: +You will need libgrok installed in other to use libgrok-py. On MacOSX, Grok is available via Homebrew: -Install Grok Dependencies - brew install tokyo-cabinet pcre libevent +_Install Grok Dependencies_ -Install Grok - brew install grok + brew install tokyo-cabinet pcre libevent -You can also compile from source. +_Install Grok_ + brew install grok Usage ----- @@ -29,3 +27,5 @@ Usage >>> match = grok("http://www.example.com/test/") >>> match.captures.items() [('USERNAME', ''), ('HOSTNAME', 'www.example.com'), ('URIPATH', '/test/'), ('IPORHOST', 'www.example.com'), ('POSINT:port', ''), ('URIPROTO', 'http'), ('IP', ''), ('URIHOST', 'www.example.com'), ('URIPATHPARAM', '/test/'), ('URI:foo', 'http://www.example.com/test/'), ('URIPARAM', ''), ('USER', '')] + >>> match["foo"] + 'http://www.example.com/test/' From 39f618dabf4bf62ddd97fac38578b1d8a8d8197b Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 13:45:44 -0500 Subject: [PATCH 09/11] readme updates --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ae27149..6bab069 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ _Install Grok_ brew install grok +_Install Libgrok-Py_ + python test/testlibgrok.py + python setup.py install + Usage ----- From 705c92664ec5a146113f28de4ab0a06bd7a06dc8 Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 13:46:32 -0500 Subject: [PATCH 10/11] readme updates --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6bab069..9e491f7 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ _Install Grok_ brew install grok -_Install Libgrok-Py_ +_Install libgrok-py_ + python test/testlibgrok.py python setup.py install From aec11b1602b46965b933f390a8991864eb738360 Mon Sep 17 00:00:00 2001 From: Andy Kipp Date: Mon, 4 Feb 2013 16:35:45 -0500 Subject: [PATCH 11/11] minor cleanup --- libgrok/grok.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/libgrok/grok.py b/libgrok/grok.py index 9e6442a..4d2b1b8 100644 --- a/libgrok/grok.py +++ b/libgrok/grok.py @@ -31,6 +31,7 @@ def error_to_message(self, err): class GrokMatch(object): def __init__(self): self._grok_match = _libgrok._grok_match() + self._grok_match_ptr = CTYPES.pointer(self._grok_match) self._captures = None @property @@ -54,29 +55,31 @@ def captures(self): return self._captures def walk(self): - _libgrok._grok_match_walk_init(self._grok_match) + _libgrok._grok_match_walk_init(self._grok_match_ptr) + # Create Buffers name = CTYPES.create_string_buffer( _fixed_buffer_size) name_ptr = CTYPES.c_char_p(CTYPES.addressof(name)) name_len = CTYPES.c_int(0) data = CTYPES.create_string_buffer( _fixed_buffer_size) data_ptr = CTYPES.c_char_p(CTYPES.addressof(data)) data_len = CTYPES.c_int(0) - while _libgrok._grok_match_walk_next(self._grok_match, + while _libgrok._grok_match_walk_next(self._grok_match_ptr, CTYPES.byref(name_ptr), CTYPES.byref(name_len), CTYPES.byref(data_ptr), CTYPES.byref(data_len)) == _libgrok.GROK_OK: - yield name_ptr.value[:name_len.value], data_ptr.value[:data_len.value] - _libgrok._grok_match_walk_end(self._grok_match) + yield CTYPES.string_at(name_ptr, name_len.value), CTYPES.string_at(data_ptr, data_len.value) + _libgrok._grok_match_walk_end(self._grok_match_ptr) def __getitem__(self, k): + # Create Buffer substring = CTYPES.create_string_buffer( _fixed_buffer_size) substring_ptr = CTYPES.c_char_p(CTYPES.addressof(substring)) substring_len = CTYPES.c_int(0) - ret = _libgrok._grok_match_get_named_substring(self._grok_match, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) + ret = _libgrok._grok_match_get_named_substring(self._grok_match_ptr, k, CTYPES.byref(substring_ptr), CTYPES.byref(substring_len)) if ret != _libgrok.GROK_OK: return None - return substring_ptr.value[:substring_len.value] + return CTYPES.string_at(substring_ptr, substring_len.value) class Grok(object): @@ -102,7 +105,7 @@ def compile(self, pattern): raise GrokError(err=ret) def execute(self, text, match=None): - grok_match_p = None if match is None else CTYPES.pointer(match._grok_match) + grok_match_p = None if match is None else match._grok_match_ptr ret = _libgrok._grok_exec(self._grok, text, grok_match_p) return ret == _libgrok.GROK_OK