1
+ from pymongo import MongoClient
2
+ import argparse
3
+ from pprint import PrettyPrinter
4
+ import logging
5
+
6
+ pp = PrettyPrinter ()
7
+
8
+ logging .basicConfig (level = logging .INFO ,
9
+ format = '%(asctime)s %(levelname)s %(message)s' ,
10
+ datefmt = '%a, %d %b %Y %H:%M:%S' )
11
+
12
+ logger = logging .getLogger (__name__ )
13
+
14
+
15
+ def main ():
16
+ client = MongoClient (str (args ['mongodbhost' ]), int (args ['mongodbport' ]))
17
+ db = client .scrape
18
+ logger .info ("MongoDB Connection created" )
19
+
20
+ dbcursor = db .pastebins .find ({})
21
+
22
+ # open keyword file
23
+ file = open (args ['f' ], "r" )
24
+ search_exp = file .readlines ()
25
+
26
+ # per expression -> one collection in DB
27
+ # Clear search_exp (whitespaces + \n)
28
+ for exp in search_exp :
29
+ clear_exp = exp .rstrip ()
30
+
31
+ # is the collection not already there?
32
+ if clear_exp not in db .collection_names ():
33
+ db .create_collection (str (clear_exp ))
34
+ logger .info ("MongoDB Collection new: " + str (clear_exp ))
35
+
36
+ # Iterate through documents
37
+ for document in dbcursor :
38
+
39
+ # Iterate through dictionary
40
+ for key , value in document .items ():
41
+
42
+ # Iterate through keywords
43
+ for exp in search_exp :
44
+ clear_exp = exp .rstrip ()
45
+
46
+ if key == "title" or key == "user" or key == "content" :
47
+
48
+ if value is not None :
49
+ splitted_string = value .split (' ' )
50
+
51
+ # Compare every exp keyword with every other
52
+ # splitted string
53
+ for string in splitted_string :
54
+ if string == clear_exp :
55
+
56
+ # Check whether the pastebin has been added already
57
+ possible_pastebin = db [string ].find_one ({"key" : document ['key' ]})
58
+
59
+ if possible_pastebin is None :
60
+ logger .info ("Entry found for key: " + str (document ['key' ]))
61
+
62
+ # Insert Data into collection
63
+ db [string ].insert_one (document )
64
+
65
+
66
+ if __name__ == "__main__" :
67
+ parser = argparse .ArgumentParser (description = "Pastebin Analyzer - Offline" )
68
+
69
+ parser .add_argument ('-f' ,
70
+ help = "Config file containing all keywords to search for. Only matching pastebins will be saved." ,
71
+ default = "keywords.txt" ,
72
+ required = True )
73
+
74
+ parser .add_argument ('-mongodbhost' ,
75
+ help = "A string with the URL to your MongoDB Server." ,
76
+ default = "localhost" ,
77
+ required = True )
78
+
79
+ parser .add_argument ('-mongodbport' ,
80
+ help = "THe port to which your MongoDB listens." ,
81
+ default = 27017 ,
82
+ required = True )
83
+
84
+ args = vars (parser .parse_args ())
85
+
86
+ main ()
0 commit comments