Skip to content

Commit 1cae778

Browse files
committed
feature added where you can analyze the content of a pastebin and categorize based on this.
1 parent 25e0401 commit 1cae778

File tree

1 file changed

+86
-0
lines changed

1 file changed

+86
-0
lines changed

pastebin_analyze.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from pymongo import MongoClient
2+
import argparse
3+
from pprint import PrettyPrinter
4+
import logging
5+
6+
pp = PrettyPrinter()
7+
8+
logging.basicConfig(level=logging.INFO,
9+
format='%(asctime)s %(levelname)s %(message)s',
10+
datefmt='%a, %d %b %Y %H:%M:%S')
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def main():
16+
client = MongoClient(str(args['mongodbhost']), int(args['mongodbport']))
17+
db = client.scrape
18+
logger.info("MongoDB Connection created")
19+
20+
dbcursor = db.pastebins.find({})
21+
22+
# open keyword file
23+
file = open(args['f'], "r")
24+
search_exp = file.readlines()
25+
26+
# per expression -> one collection in DB
27+
# Clear search_exp (whitespaces + \n)
28+
for exp in search_exp:
29+
clear_exp = exp.rstrip()
30+
31+
# is the collection not already there?
32+
if clear_exp not in db.collection_names():
33+
db.create_collection(str(clear_exp))
34+
logger.info("MongoDB Collection new: " + str(clear_exp))
35+
36+
# Iterate through documents
37+
for document in dbcursor:
38+
39+
# Iterate through dictionary
40+
for key, value in document.items():
41+
42+
# Iterate through keywords
43+
for exp in search_exp:
44+
clear_exp = exp.rstrip()
45+
46+
if key == "title" or key == "user" or key == "content":
47+
48+
if value is not None:
49+
splitted_string = value.split(' ')
50+
51+
# Compare every exp keyword with every other
52+
# splitted string
53+
for string in splitted_string:
54+
if string == clear_exp:
55+
56+
# Check whether the pastebin has been added already
57+
possible_pastebin = db[string].find_one({"key": document['key']})
58+
59+
if possible_pastebin is None:
60+
logger.info("Entry found for key: " + str(document['key']))
61+
62+
# Insert Data into collection
63+
db[string].insert_one(document)
64+
65+
66+
if __name__ == "__main__":
67+
parser = argparse.ArgumentParser(description="Pastebin Analyzer - Offline")
68+
69+
parser.add_argument('-f',
70+
help="Config file containing all keywords to search for. Only matching pastebins will be saved.",
71+
default="keywords.txt",
72+
required=True)
73+
74+
parser.add_argument('-mongodbhost',
75+
help="A string with the URL to your MongoDB Server.",
76+
default="localhost",
77+
required=True)
78+
79+
parser.add_argument('-mongodbport',
80+
help="THe port to which your MongoDB listens.",
81+
default=27017,
82+
required=True)
83+
84+
args = vars(parser.parse_args())
85+
86+
main()

0 commit comments

Comments
 (0)