From 57b33c50d51cfa631bc211f6ebce0f61f71d3738 Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sat, 25 Aug 2018 00:16:36 +0200
Subject: [PATCH 01/10] added the function to scrape the most recent pastebins
 and created the first step. basic scraping. still missing the duplicate check

---
 README.md                             |  54 ++++++++++
 pastebin_python/__init__.py           |   2 +-
 pastebin_python/pastebin.py           |  28 +++++-
 pastebin_python/pastebin_constants.py |   3 +-
 pastebin_scrape.py                    | 137 ++++++++++++++++++++++++++
 requirements.txt                      |   0
 6 files changed, 217 insertions(+), 7 deletions(-)
 create mode 100644 README.md
 create mode 100644 pastebin_scrape.py
 create mode 100644 requirements.txt
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a27f789
--- /dev/null
+++ b/README.md
@@ -0,0 +1,54 @@
+# Pastebin Scraper
+
+The scraper here is based on the one from [six519](https://github.com/six519/PastebinPython). Thank you very much for giving us this present :-)
+
+## Installation
+
+Simply clone it:
+
+```
+cd /your/desired/directory
+clone https://github.com/six519/PastebinPython.git
+pip install -r requirements.txt
+```
+
+## Usage
+
+This fork of the PastebinPython project downloads all pastebin entries ... or well at least it starts downloading as much as it can.
+
+The results will be saved within a MongoDB collection. Another script will be triggered to identify keywords, which have to be provided in front.
+
+The second step results in new collections where each keyword gets one new collection and all found pastebin entries will be copied there.
+
+The third step might be the accumulator. It identifies specific words (similiar to step 2) but also specials like eMail addresses, Bitcoin addresses, URLs, IP Adresses etc. 
+
+### 1. pastebin_scrape.py
+
+For this one to work good you need an API key. I bought a lifetime access to the pastebin API a while ago for 29,99 USD. It doesn't make you poor.
+
+You will also need to update your Scraping IP, in order to make it work: [Change Scraping IP](https://pastebin.com/doc_scraping_api)
+
+        python pastebin_scrape.py -v 1 -db 1 -api <YOUR_PASTE_BIN_API_KEY>
+
+### 2. pastebin_analyze.py
+
+### 3. pastebin_accumulate.py
+
+### Access Data via Flask API
+
+Finally you can either write yourself a clean data retriever or you can use this Flask API implementation here:
+
+```
+# start it in debug and verbose mode first!
+python pastebin_api.py -d -v
+```
+
+Well there is only one API method. Grab yourself a browser or use curl:
+
+```
+http://localhost:5000/api/getpastebins/<keyword>
+```
+
+The result should be a nice JSON document collection. Maybe too large to handle for a browser. Anyway this is just intended for demonstration reasons. 
+
+If you want to use that data somehow, you might find the JSON format handy and start to parse it for your own purpose.
\ No newline at end of file
diff --git a/pastebin_python/__init__.py b/pastebin_python/__init__.py
index b2f7c81..1412008 100644
--- a/pastebin_python/__init__.py
+++ b/pastebin_python/__init__.py
@@ -3,7 +3,7 @@
 .. moduleauthor:: Ferdinand Silva <ferdinandsilva@ferdinandsilva.com>
 
 """
-from pastebin import PastebinPython
+from pastebin_python.pastebin import PastebinPython
 
 __version__ = "1.2"
 __app_name__ = "pastebin_python"
diff --git a/pastebin_python/pastebin.py b/pastebin_python/pastebin.py
index a95d33d..44e609d 100644
--- a/pastebin_python/pastebin.py
+++ b/pastebin_python/pastebin.py
@@ -8,9 +8,9 @@
 import re
 import requests
 from xml.dom.minidom import parseString
-from pastebin_options import OPTION_PASTE, OPTION_LIST, OPTION_TRENDS, OPTION_DELETE, OPTION_USER_DETAILS
-from pastebin_constants import PASTEBIN_API_POST_URL, PASTEBIN_API_LOGIN_URL, PASTEBIN_RAW_URL
-from pastebin_exceptions import PastebinBadRequestException, PastebinNoPastesException, PastebinFileException, PastebinHTTPErrorException
+from pastebin_python.pastebin_options import OPTION_PASTE, OPTION_LIST, OPTION_TRENDS, OPTION_DELETE, OPTION_USER_DETAILS
+from pastebin_python.pastebin_constants import PASTEBIN_API_POST_URL, PASTEBIN_API_LOGIN_URL, PASTEBIN_RAW_URL, PASTEBIN_URL_SCRAPE
+from pastebin_python.pastebin_exceptions import PastebinBadRequestException, PastebinNoPastesException, PastebinFileException, PastebinHTTPErrorException
 
 
 class PastebinPython(object):
@@ -150,9 +150,9 @@ def __processRequest(self, method, url, data):
             req = self.api_session.request(method, url, data=data)
 
         response = req.content
-        if re.search('^Bad API request', response):
+        if re.search('^Bad API request', response.decode('utf-8')):
             raise PastebinBadRequestException(response)
-        elif re.search('^No pastes found', response):
+        elif re.search('^No pastes found', response.decode('utf-8')):
             raise PastebinNoPastesException
 
         return response
@@ -394,3 +394,21 @@ def getPasteRawOutput(self, api_paste_key):
             retMsg = str(e)
 
         return retMsg.decode('utf-8')
+
+    def scrapeMostRecent(self):
+        """
+        Returns the most recent Pastebin posts. You will need to have an API Key and a whitelisted IP
+        configured on pastebin.com (https://pastebin.com/api_scraping_faq)
+
+        :return: str
+        """
+        try:
+            print("Scraping ... on: " + PASTEBIN_URL_SCRAPE + "/api_scraping.php")
+            data = self.__processRequest('GET',
+                                         PASTEBIN_URL_SCRAPE + "/api_scraping.php",
+                                         None)
+            return data
+        except PastebinBadRequestException as e:
+            retMsg = str(e)
+            print("PastebinBadRequest")
+            return None
diff --git a/pastebin_python/pastebin_constants.py b/pastebin_python/pastebin_constants.py
index afdc34d..0e1a3e2 100644
--- a/pastebin_python/pastebin_constants.py
+++ b/pastebin_python/pastebin_constants.py
@@ -5,7 +5,8 @@
 .. moduleauthor:: Ferdinand Silva <ferdinandsilva@ferdinandsilva.com>
 
 """
-PASTEBIN_URL = "http://pastebin.com/" #: The pastebin.com base url
+PASTEBIN_URL_SCRAPE = "https://scrape.pastebin.com/"
+PASTEBIN_URL = "https://pastebin.com/" #: The pastebin.com base url
 PASTEBIN_RAW_URL = "%s%s" % (PASTEBIN_URL, "raw.php?i=%s")
 PASTEBIN_API_URL = "%s%s" % (PASTEBIN_URL, "api/") #: The pastebin.com API base URL
 PASTEBIN_API_POST_URL = "%s%s" % (PASTEBIN_API_URL, "api_post.php") #: The pastebin.com API POST URL
diff --git a/pastebin_scrape.py b/pastebin_scrape.py
new file mode 100644
index 0000000..3e07171
--- /dev/null
+++ b/pastebin_scrape.py
@@ -0,0 +1,137 @@
+from pastebin_python.pastebin import PastebinPython
+from pastebin_python.pastebin_exceptions import PastebinBadRequestException
+from pymongo import MongoClient
+from copy import deepcopy
+
+import json
+import urllib.request
+import argparse
+import logging
+import time
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+logger = logging.getLogger(__name__)
+
+
+def call_scrape_http://github.com/url(url):
+    """
+    Method is doing all the URL calling stuff.
+
+    :param url: The URL which should be called
+    :return: The content of the previously requested pastebin
+    """
+
+    try:
+        request = urllib.request.Request(url)
+        result = urllib.request.urlopen(request)
+        result_text = result.read()
+        text_encoded = result_text.decode(encoding='utf-8', errors='ignore')
+
+        return text_encoded
+
+    except json.decoder.JSONDecodeError as json_e:
+        logger.error("JSON Decoding Error ... Jumping to next element.")
+        return None
+
+
+def main(args):
+    """
+    Regular main method starts the entire process and interprets the
+    arguments.
+
+    :param args: arguments from argparse
+    :return: None
+    """
+
+    if args['v'] == 0:
+        logger.propagate = False
+    elif args['v'] == 1:
+        logger.propagate = True
+
+    logger.info("Start Pastebin Analyzer")
+
+    api_key = args['api']
+    pbin = PastebinPython(api_dev_key=api_key)
+
+    client = MongoClient(str(args['mongodbhost']), int(args['mongodbport']))
+    db = client.scrape
+    logger.info("MongoDB Connection created")
+
+    while True:
+        try:
+            data = pbin.scrapeMostRecent()
+
+            if data:
+                json_data = data.decode('utf8')  # .replace("'", '"')
+                final_data = json.loads(json_data)
+
+                # Iterate through list
+                for x in final_data:
+
+                    # Pre-create the content key-value pair
+                    x['content'] = 0
+
+                    copy_of_x = deepcopy(x)
+                    for key, value in copy_of_x.items():
+
+                        if key == "scrape_url":
+
+                            # value = scrape_url
+                            text_encoded = call_scrape_url(http://github.com/value)
+                            time.sleep(1)
+
+                            logger.info("Downloading content of " + value)
+
+                            # Add content
+                            x['content'] = text_encoded
+
+                            ## TODO: Add some identity check
+
+                            # DB Save mode args['db'] == 2
+                            if args['db'] == "1":
+                                db.pastebins.insert_one(x)
+            else:
+                logger.debug("No data arrived.")
+
+        except PastebinBadRequestException as e:
+            logger.debug("Pastebin Bad Request - You're doing it wrong")
+
+        except json.decoder.JSONDecodeError as e:
+            logger.debug("JSON Decoding Error ... 'You can't always get what you want!'")
+            continue
+        else:
+            logger.debug("No exception")
+        finally:
+            logger.info("End of Session!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Pastebin Analyzer")
+
+    parser.add_argument('-db',
+                        help="If this is set entries are being added into the DB.\n"
+                             "0 = no DB entries"
+                             "1 = all Pastebin entries are written to DB",
+                        default=0)
+
+    parser.add_argument('-v',
+                        help="Verbose mode.",
+                        default=0)
+
+    parser.add_argument('-api',
+                        help="Pastebin API Key for Scraping.",
+                        required=True)
+
+    parser.add_argument('-mongodbhost',
+                        help="A string with the URL to your MongoDB Server.")
+
+    parser.add_argument('-mongodbport',
+                        help="THe port to which your MongoDB listens.",
+                        default=27017)
+
+    args = vars(parser.parse_args())
+
+    main(args)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e69de29

From 25e04011440112ff6ca5267a660ce8c0b8deb9dd Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sat, 25 Aug 2018 22:38:44 +0200
Subject: [PATCH 02/10] added the identitiyhash in order to make it possible to
 identify items which bave been already scraped

---
 pastebin_scrape.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/pastebin_scrape.py b/pastebin_scrape.py
index 3e07171..8e1c920 100644
--- a/pastebin_scrape.py
+++ b/pastebin_scrape.py
@@ -8,6 +8,7 @@
 import argparse
 import logging
 import time
+import hashlib
 
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s %(levelname)s %(message)s',
@@ -68,12 +69,17 @@ def main(args):
                 json_data = data.decode('utf8')  # .replace("'", '"')
                 final_data = json.loads(json_data)
 
-                # Iterate through list
+                # Iterate through list (standard: 50 latest pastebins)
                 for x in final_data:
 
                     # Pre-create the content key-value pair
                     x['content'] = 0
 
+                    tohash = str(x['date']) + str(x['expire']) + str(x['full_url'] + str(x['key']) + str(x['scrape_url']) + str(x['size']) + str(x['syntax']) + str(x['title']) + str(x['user']))
+                    hobject = hashlib.sha256(tohash.encode())
+                    hash_string = str(hobject.hexdigest())
+                    x['identityhash'] = hash_string
+
                     copy_of_x = deepcopy(x)
                     for key, value in copy_of_x.items():
 
@@ -81,18 +87,22 @@ def main(args):
 
                             # value = scrape_url
                             text_encoded = call_scrape_url(http://github.com/value)
-                            time.sleep(1)
-
-                            logger.info("Downloading content of " + value)
 
                             # Add content
                             x['content'] = text_encoded
 
-                            ## TODO: Add some identity check
+                            logger.info("Downloading content of " + value)
+                            time.sleep(1)
 
-                            # DB Save mode args['db'] == 2
                             if args['db'] == "1":
-                                db.pastebins.insert_one(x)
+
+                                # save only if the hash is not found in the db
+                                if db.pastebins.find_one({ "identityhash": x['identityhash'] },{ "identityhash": 1}):
+                                    logger.info("Iteam already scraped: " + x['scrape_url'])
+                                else:
+                                    logger.info("Item added to db: " + x['scrape_url'])
+                                    db.pastebins.insert_one(x)
+
             else:
                 logger.debug("No data arrived.")
 

From 1cae778301052ee2dee2bad688c0ae7a3b0917ac Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sat, 25 Aug 2018 23:51:35 +0200
Subject: [PATCH 03/10] feature added where you can analyze the content of a
 pastebin and categorize based on this.

---
 pastebin_analyze.py | 86 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 pastebin_analyze.py

diff --git a/pastebin_analyze.py b/pastebin_analyze.py
new file mode 100644
index 0000000..b7c2fda
--- /dev/null
+++ b/pastebin_analyze.py
@@ -0,0 +1,86 @@
+from pymongo import MongoClient
+import argparse
+from pprint import PrettyPrinter
+import logging
+
+pp = PrettyPrinter()
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    client = MongoClient(str(args['mongodbhost']), int(args['mongodbport']))
+    db = client.scrape
+    logger.info("MongoDB Connection created")
+
+    dbcursor = db.pastebins.find({})
+
+    # open keyword file
+    file = open(args['f'], "r")
+    search_exp = file.readlines()
+
+    # per expression -> one collection in DB
+    # Clear search_exp (whitespaces + \n)
+    for exp in search_exp:
+        clear_exp = exp.rstrip()
+
+        # is the collection not already there?
+        if clear_exp not in db.collection_names():
+            db.create_collection(str(clear_exp))
+            logger.info("MongoDB Collection new: " + str(clear_exp))
+
+    # Iterate through documents
+    for document in dbcursor:
+
+        # Iterate through dictionary
+        for key, value in document.items():
+
+            # Iterate through keywords
+            for exp in search_exp:
+                clear_exp = exp.rstrip()
+
+                if key == "title" or key == "user" or key == "content":
+
+                    if value is not None:
+                        splitted_string = value.split(' ')
+
+                        # Compare every exp keyword with every other
+                        # splitted string
+                        for string in splitted_string:
+                            if string == clear_exp:
+
+                                # Check whether the pastebin has been added already
+                                possible_pastebin = db[string].find_one({"key": document['key']})
+
+                                if possible_pastebin is None:
+                                    logger.info("Entry found for key: " + str(document['key']))
+
+                                    # Insert Data into collection
+                                    db[string].insert_one(document)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Pastebin Analyzer - Offline")
+
+    parser.add_argument('-f',
+                        help="Config file containing all keywords to search for. Only matching pastebins will be saved.",
+                        default="keywords.txt",
+                        required=True)
+
+    parser.add_argument('-mongodbhost',
+                        help="A string with the URL to your MongoDB Server.",
+                        default="localhost",
+                        required=True)
+
+    parser.add_argument('-mongodbport',
+                        help="THe port to which your MongoDB listens.",
+                        default=27017,
+                        required=True)
+
+    args = vars(parser.parse_args())
+
+    main()
\ No newline at end of file

From 7eb6b9342b68754ef3c9477aa13a29dac58b10ef Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sat, 25 Aug 2018 23:51:58 +0200
Subject: [PATCH 04/10] added some documentation for new features

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index a27f789..d5a3d30 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,16 @@ You will also need to update your Scraping IP, in order to make it work: [Change
 
 ### 2. pastebin_analyze.py
 
+Once step 1 has finished a cycle you want to analyze whatever there is which is of interest to you. So start writing a keywordlist. One row, one keyword.
+
+When you have finished, start the analze module:
+
+        python pastebin_analyze.py -f <path_to_keyword_file> \
+                                   -mongodbhost <mongo_db_hostname> \   # default: localhost
+                                   -mongodbport <mongo_db_port<         # default: 27017
+                                   
+Finally it will create collections for all of the keywords it found and copy the pastebin into that collection. There might also be empty collections. Sometimes you
+just can't find anything you are searching for :-()       
 ### 3. pastebin_accumulate.py
 
 ### Access Data via Flask API

From d27a40e57dc11a88c3a374c74c31e8a78fbab7ca Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sun, 26 Aug 2018 17:29:50 +0200
Subject: [PATCH 05/10] added some documentation for the pastebin_scrape script

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d5a3d30..c478d48 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,11 @@ For this one to work good you need an API key. I bought a lifetime access to the
 
 You will also need to update your Scraping IP, in order to make it work: [Change Scraping IP](https://pastebin.com/doc_scraping_api)
 
-        python pastebin_scrape.py -v 1 -db 1 -api <YOUR_PASTE_BIN_API_KEY>
+        python pastebin_scrape.py -v 1 \                                # verbose mode
+                                  -db 1 \                               # save to DB (without this, nothing will be saved)
+                                  -api <YOUR_PASTE_BIN_API_KEY> \
+                                  -mongodbhost <mongo_db_hostname> \    # default: localhost
+                                  -mongodbport <mongo_db_port> \        # default: 27017
 
 ### 2. pastebin_analyze.py
 

From d2c076655e097bdbb1773eae92ccd3557ab93d59 Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Mon, 27 Aug 2018 12:41:25 +0200
Subject: [PATCH 06/10] implemented the flask api module and fixed some errors

---
 README.md            | 15 ++++----
 keywords_example.txt |  7 ++++
 pastebin_analyze.py  |  7 ++++
 pastebin_api.py      | 88 ++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt     | 10 +++++
 5 files changed, 119 insertions(+), 8 deletions(-)
 create mode 100644 keywords_example.txt
 create mode 100644 pastebin_api.py

diff --git a/README.md b/README.md
index c478d48..2a1bc35 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,6 @@ The results will be saved within a MongoDB collection. Another script will be tr
 
 The second step results in new collections where each keyword gets one new collection and all found pastebin entries will be copied there.
 
-The third step might be the accumulator. It identifies specific words (similiar to step 2) but also specials like eMail addresses, Bitcoin addresses, URLs, IP Adresses etc. 
-
 ### 1. pastebin_scrape.py
 
 For this one to work good you need an API key. I bought a lifetime access to the pastebin API a while ago for 29,99 USD. It doesn't make you poor.
@@ -32,7 +30,7 @@ You will also need to update your Scraping IP, in order to make it work: [Change
                                   -db 1 \                               # save to DB (without this, nothing will be saved)
                                   -api <YOUR_PASTE_BIN_API_KEY> \
                                   -mongodbhost <mongo_db_hostname> \    # default: localhost
-                                  -mongodbport <mongo_db_port> \        # default: 27017
+                                  -mongodbport <mongo_db_port>          # default: 27017
 
 ### 2. pastebin_analyze.py
 
@@ -42,19 +40,20 @@ When you have finished, start the analze module:
 
         python pastebin_analyze.py -f <path_to_keyword_file> \
                                    -mongodbhost <mongo_db_hostname> \   # default: localhost
-                                   -mongodbport <mongo_db_port<         # default: 27017
+                                   -mongodbport <mongo_db_port>         # default: 27017
                                    
 Finally it will create collections for all of the keywords it found and copy the pastebin into that collection. There might also be empty collections. Sometimes you
-just can't find anything you are searching for :-()       
-### 3. pastebin_accumulate.py
+just can't find anything you are searching for.
 
 ### Access Data via Flask API
 
 Finally you can either write yourself a clean data retriever or you can use this Flask API implementation here:
 
 ```
-# start it in debug and verbose mode first!
-python pastebin_api.py -d -v
+# start it in debug mode first!
+python pastebin_api.py -d \
+                       -mongodbhost <mongo_db_hostname> \
+                       -mongodbport <mongo_db_port> 
 ```
 
 Well there is only one API method. Grab yourself a browser or use curl:
diff --git a/keywords_example.txt b/keywords_example.txt
new file mode 100644
index 0000000..edae0b2
--- /dev/null
+++ b/keywords_example.txt
@@ -0,0 +1,7 @@
+ip
+malware
+glock
+android
+ios
+lenovo
+
diff --git a/pastebin_analyze.py b/pastebin_analyze.py
index b7c2fda..34d13e6 100644
--- a/pastebin_analyze.py
+++ b/pastebin_analyze.py
@@ -13,6 +13,13 @@
 
 
 def main():
+    """
+    starts the entire process of analyzation by creating new collections and appending new documents into existing
+    collections based on keywords.
+
+    :return:
+    """
+
     client = MongoClient(str(args['mongodbhost']), int(args['mongodbport']))
     db = client.scrape
     logger.info("MongoDB Connection created")
diff --git a/pastebin_api.py b/pastebin_api.py
new file mode 100644
index 0000000..5acfb62
--- /dev/null
+++ b/pastebin_api.py
@@ -0,0 +1,88 @@
+from flask import Flask, jsonify, make_response
+from pymongo import MongoClient
+from bson import json_util
+
+import argparse
+import json
+
+app = Flask(__name__)
+
+api_version = "1.0"
+
+
+@app.errorhandler(404)
+def not_found(error):
+    """
+    some standard error handling for unknown pages.
+
+    :param error:
+    :return:
+    """
+    return make_response(jsonify({'error': 'Notfound'}), 404)
+
+
+@app.route('/')
+def get_index():
+    """
+    standard output when nothing is set
+    :return:
+    """
+
+    basic_info = [
+        {
+            'api': '1.0',
+            'name': 'PastebinPython Flask Accessing API',
+            'author': 'Andre Fritsche / ihgalis'
+        }
+    ]
+
+    return jsonify({'basic_info': basic_info})
+
+
+@app.route('/api/getpastebins/<string:keyword>', methods=['GET'])
+def get_pastebins(keyword):
+    """
+    method gets all documents related to the specified keyword. It accesses the corresponding collections so you will
+    always get only the documents that have been identified by the pastebin_analyze.py script.
+
+    :param keyword: string
+    :return: JSON based dictionary
+    """
+
+    client = MongoClient(str(args['mongodbhost']), int(args['mongodbport']))
+    db = client.scrape
+
+    tlist = list()
+
+    dbcursor = db[keyword].find({})
+    for document in dbcursor:
+        sanitized = json.loads(json_util.dumps(document))
+        tlist.append(sanitized)
+
+    return jsonify(tlist)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="PastebinPython Flask Accessing API")
+
+    parser.add_argument('-mongodbhost',
+                        help="A string with the URL to your MongoDB Server.",
+                        default="localhost",
+                        required=True)
+
+    parser.add_argument('-mongodbport',
+                        help="THe port to which your MongoDB listens.",
+                        default=27017,
+                        required=True)
+
+    parser.add_argument('-d',
+                        action="http://github.com/store_true",
+                        help="Debug in Flask active or not.",
+                        default=0)
+
+    args = vars(parser.parse_args())
+
+    if args['d']:
+        app.run(debug=True)
+    else:
+        app.run(debug=False)
diff --git a/requirements.txt b/requirements.txt
index e69de29..7f7754a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+pymongo
+argparse
+logging
+re
+time
+sys
+pprint
+flask
+bson
+requests
\ No newline at end of file

From 699179df2a1a1ae81c5db89fb2452d7c0455a436 Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Tue, 28 Aug 2018 18:40:29 +0200
Subject: [PATCH 07/10] rearranged some files and changed to be able to package
 it for pypi

---
 pastebin_python/__init__.py                   | 18 +++++------
 pastebin_python_scraper/__init__.py           |  0
 .../pastebin_analyze.py                       |  0
 .../pastebin_api.py                           |  0
 .../pastebin_scrape.py                        |  0
 setup.py                                      | 32 +++++++++++++------
 6 files changed, 32 insertions(+), 18 deletions(-)
 create mode 100644 pastebin_python_scraper/__init__.py
 rename pastebin_analyze.py => pastebin_python_scraper/pastebin_analyze.py (100%)
 rename pastebin_api.py => pastebin_python_scraper/pastebin_api.py (100%)
 rename pastebin_scrape.py => pastebin_python_scraper/pastebin_scrape.py (100%)

diff --git a/pastebin_python/__init__.py b/pastebin_python/__init__.py
index 1412008..300b6cd 100644
--- a/pastebin_python/__init__.py
+++ b/pastebin_python/__init__.py
@@ -1,14 +1,14 @@
-"""A complete pastebin.com API wrapper for Python
-
-.. moduleauthor:: Ferdinand Silva <ferdinandsilva@ferdinandsilva.com>
+"""
+A complete pastebin.com API wrapper for Python.
+Fork from Ferdinand Silva.
 
 """
 from pastebin_python.pastebin import PastebinPython
 
-__version__ = "1.2"
-__app_name__ = "pastebin_python"
+__version__ = "1.2.1"
+__app_name__ = "pastebin_python_scraper"
 __description__ = "A complete pastebin.com API wrapper for Python"
-__author__ = "Ferdinand Silva"
-__author_email__ = "ferdinandsilva@ferdinandsilva.com"
-__app_url__ = "http://ferdinandsilva.com/"
-__download_url__ = "https://github.com/six519/PastebinPython"
\ No newline at end of file
+__author__ = "André Fritsche"
+__author_email__ = "github@andresilaghi.com"
+__app_url__ = "https://www.andresilaghi.com/"
+__download_url__ = "https://github.com/ihgalis/PastebinPython"
diff --git a/pastebin_python_scraper/__init__.py b/pastebin_python_scraper/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pastebin_analyze.py b/pastebin_python_scraper/pastebin_analyze.py
similarity index 100%
rename from pastebin_analyze.py
rename to pastebin_python_scraper/pastebin_analyze.py
diff --git a/pastebin_api.py b/pastebin_python_scraper/pastebin_api.py
similarity index 100%
rename from pastebin_api.py
rename to pastebin_python_scraper/pastebin_api.py
diff --git a/pastebin_scrape.py b/pastebin_python_scraper/pastebin_scrape.py
similarity index 100%
rename from pastebin_scrape.py
rename to pastebin_python_scraper/pastebin_scrape.py
diff --git a/setup.py b/setup.py
index 41a9806..33b2870 100644
--- a/setup.py
+++ b/setup.py
@@ -1,24 +1,38 @@
 import pastebin_python
+import setuptools
 
-try:
-    from setuptools import setup
-except ImportError:
-    from distutils.core import setup
+with open("README.md", "r") as f:
+    long_description = f.read()
 
-setup(
+setuptools.setup(
     name=pastebin_python.__app_name__,
     version=pastebin_python.__version__,
     description=pastebin_python.__description__,
+    long_description=long_description,
+    long_description_content_type="text/markdown",
     author=pastebin_python.__author__,
     author_email=pastebin_python.__author_email__,
-    packages=['pastebin_python'],
+    packages=['pastebin_python', 'pastebin_python_scraper'],
     url=pastebin_python.__app_url__,
+    install_requires=[
+        'pymongo',
+        'argparse',
+        'logging',
+        're',
+        'time',
+        'sys',
+        'pprint',
+        'flask',
+        'bson',
+        'requests'
+    ],
+    python_requires='>=3.6',
     classifiers=(
-        'Development Status :: 4 - Beta',
+        'Development Status :: 5 - Production/Stable',
         'Intended Audience :: Developers',
         'Natural Language :: English',
         'Programming Language :: Python',
-        'License :: Freeware',
+        'License :: OSI Approved :: MIT License',
     ),
     download_url=pastebin_python.__download_url__,
-)
\ No newline at end of file
+)

From 80286f57152ea9f2f87e678fbe3983b25ad57cc0 Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Tue, 28 Aug 2018 18:43:43 +0200
Subject: [PATCH 08/10] added some documentation for pip installation

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2a1bc35..5181ee0 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ The scraper here is based on the one from [six519](https://github.com/six519/Pas
 
 ## Installation
 
-Simply clone it:
+### Via Github
 
 ```
 cd /your/desired/directory
@@ -12,6 +12,14 @@ clone https://github.com/six519/PastebinPython.git
 pip install -r requirements.txt
 ```
 
+### Via pypi
+
+In this case you have to import the corresponding classes and methods to use it. I will try and make it somehow more usable this way. Just wanted to play around with pypi at first.
+
+```
+pip install pastebin-python-scraper
+```
+
 ## Usage
 
 This fork of the PastebinPython project downloads all pastebin entries ... or well at least it starts downloading as much as it can.

From 21b379858a0e7c283cb3ee7c119a39af56aa195b Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sun, 23 Sep 2018 12:32:22 +0200
Subject: [PATCH 09/10] sonarqube integration

---
 sonar-project.properties | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 sonar-project.properties

diff --git a/sonar-project.properties b/sonar-project.properties
new file mode 100644
index 0000000..a916374
--- /dev/null
+++ b/sonar-project.properties
@@ -0,0 +1,2 @@
+sonar.exclusions=doc/*
+sonar.exclusions=doc/**
\ No newline at end of file

From 5169217af378512a3699caf3beab2b2b073e74b4 Mon Sep 17 00:00:00 2001
From: Andre Fritsche <andre.fritsche@outlook.de>
Date: Sun, 20 Aug 2023 22:56:41 +0200
Subject: [PATCH 10/10] Create LICENSE

---
 LICENSE | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..aaffc10
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Andre Fritsche
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.