From 57b33c50d51cfa631bc211f6ebce0f61f71d3738 Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sat, 25 Aug 2018 00:16:36 +0200 Subject: [PATCH 01/10] added the function to scrape the most recent pastebins and created the first step. basic scraping. still missing the duplicate check --- README.md | 54 ++++++++++ pastebin_python/__init__.py | 2 +- pastebin_python/pastebin.py | 28 +++++- pastebin_python/pastebin_constants.py | 3 +- pastebin_scrape.py | 137 ++++++++++++++++++++++++++ requirements.txt | 0 6 files changed, 217 insertions(+), 7 deletions(-) create mode 100644 README.md create mode 100644 pastebin_scrape.py create mode 100644 requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..a27f789 --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +# Pastebin Scraper + +The scraper here is based on the one from [six519](https://github.com/six519/PastebinPython). Thank you very much for giving us this present :-) + +## Installation + +Simply clone it: + +``` +cd /your/desired/directory +clone https://github.com/six519/PastebinPython.git +pip install -r requirements.txt +``` + +## Usage + +This fork of the PastebinPython project downloads all pastebin entries ... or well at least it starts downloading as much as it can. + +The results will be saved within a MongoDB collection. Another script will be triggered to identify keywords, which have to be provided in front. + +The second step results in new collections where each keyword gets one new collection and all found pastebin entries will be copied there. + +The third step might be the accumulator. It identifies specific words (similiar to step 2) but also specials like eMail addresses, Bitcoin addresses, URLs, IP Adresses etc. + +### 1. pastebin_scrape.py + +For this one to work good you need an API key. I bought a lifetime access to the pastebin API a while ago for 29,99 USD. It doesn't make you poor. + +You will also need to update your Scraping IP, in order to make it work: [Change Scraping IP](https://pastebin.com/doc_scraping_api) + + python pastebin_scrape.py -v 1 -db 1 -api + +### 2. pastebin_analyze.py + +### 3. pastebin_accumulate.py + +### Access Data via Flask API + +Finally you can either write yourself a clean data retriever or you can use this Flask API implementation here: + +``` +# start it in debug and verbose mode first! +python pastebin_api.py -d -v +``` + +Well there is only one API method. Grab yourself a browser or use curl: + +``` +http://localhost:5000/api/getpastebins/ +``` + +The result should be a nice JSON document collection. Maybe too large to handle for a browser. Anyway this is just intended for demonstration reasons. + +If you want to use that data somehow, you might find the JSON format handy and start to parse it for your own purpose. \ No newline at end of file diff --git a/pastebin_python/__init__.py b/pastebin_python/__init__.py index b2f7c81..1412008 100644 --- a/pastebin_python/__init__.py +++ b/pastebin_python/__init__.py @@ -3,7 +3,7 @@ .. moduleauthor:: Ferdinand Silva """ -from pastebin import PastebinPython +from pastebin_python.pastebin import PastebinPython __version__ = "1.2" __app_name__ = "pastebin_python" diff --git a/pastebin_python/pastebin.py b/pastebin_python/pastebin.py index a95d33d..44e609d 100644 --- a/pastebin_python/pastebin.py +++ b/pastebin_python/pastebin.py @@ -8,9 +8,9 @@ import re import requests from xml.dom.minidom import parseString -from pastebin_options import OPTION_PASTE, OPTION_LIST, OPTION_TRENDS, OPTION_DELETE, OPTION_USER_DETAILS -from pastebin_constants import PASTEBIN_API_POST_URL, PASTEBIN_API_LOGIN_URL, PASTEBIN_RAW_URL -from pastebin_exceptions import PastebinBadRequestException, PastebinNoPastesException, PastebinFileException, PastebinHTTPErrorException +from pastebin_python.pastebin_options import OPTION_PASTE, OPTION_LIST, OPTION_TRENDS, OPTION_DELETE, OPTION_USER_DETAILS +from pastebin_python.pastebin_constants import PASTEBIN_API_POST_URL, PASTEBIN_API_LOGIN_URL, PASTEBIN_RAW_URL, PASTEBIN_URL_SCRAPE +from pastebin_python.pastebin_exceptions import PastebinBadRequestException, PastebinNoPastesException, PastebinFileException, PastebinHTTPErrorException class PastebinPython(object): @@ -150,9 +150,9 @@ def __processRequest(self, method, url, data): req = self.api_session.request(method, url, data=data) response = req.content - if re.search('^Bad API request', response): + if re.search('^Bad API request', response.decode('utf-8')): raise PastebinBadRequestException(response) - elif re.search('^No pastes found', response): + elif re.search('^No pastes found', response.decode('utf-8')): raise PastebinNoPastesException return response @@ -394,3 +394,21 @@ def getPasteRawOutput(self, api_paste_key): retMsg = str(e) return retMsg.decode('utf-8') + + def scrapeMostRecent(self): + """ + Returns the most recent Pastebin posts. You will need to have an API Key and a whitelisted IP + configured on pastebin.com (https://pastebin.com/api_scraping_faq) + + :return: str + """ + try: + print("Scraping ... on: " + PASTEBIN_URL_SCRAPE + "/api_scraping.php") + data = self.__processRequest('GET', + PASTEBIN_URL_SCRAPE + "/api_scraping.php", + None) + return data + except PastebinBadRequestException as e: + retMsg = str(e) + print("PastebinBadRequest") + return None diff --git a/pastebin_python/pastebin_constants.py b/pastebin_python/pastebin_constants.py index afdc34d..0e1a3e2 100644 --- a/pastebin_python/pastebin_constants.py +++ b/pastebin_python/pastebin_constants.py @@ -5,7 +5,8 @@ .. moduleauthor:: Ferdinand Silva """ -PASTEBIN_URL = "http://pastebin.com/" #: The pastebin.com base url +PASTEBIN_URL_SCRAPE = "https://scrape.pastebin.com/" +PASTEBIN_URL = "https://pastebin.com/" #: The pastebin.com base url PASTEBIN_RAW_URL = "%s%s" % (PASTEBIN_URL, "raw.php?i=%s") PASTEBIN_API_URL = "%s%s" % (PASTEBIN_URL, "api/") #: The pastebin.com API base URL PASTEBIN_API_POST_URL = "%s%s" % (PASTEBIN_API_URL, "api_post.php") #: The pastebin.com API POST URL diff --git a/pastebin_scrape.py b/pastebin_scrape.py new file mode 100644 index 0000000..3e07171 --- /dev/null +++ b/pastebin_scrape.py @@ -0,0 +1,137 @@ +from pastebin_python.pastebin import PastebinPython +from pastebin_python.pastebin_exceptions import PastebinBadRequestException +from pymongo import MongoClient +from copy import deepcopy + +import json +import urllib.request +import argparse +import logging +import time + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') + +logger = logging.getLogger(__name__) + + +def call_scrape_http://github.com/url(url): + """ + Method is doing all the URL calling stuff. + + :param url: The URL which should be called + :return: The content of the previously requested pastebin + """ + + try: + request = urllib.request.Request(url) + result = urllib.request.urlopen(request) + result_text = result.read() + text_encoded = result_text.decode(encoding='utf-8', errors='ignore') + + return text_encoded + + except json.decoder.JSONDecodeError as json_e: + logger.error("JSON Decoding Error ... Jumping to next element.") + return None + + +def main(args): + """ + Regular main method starts the entire process and interprets the + arguments. + + :param args: arguments from argparse + :return: None + """ + + if args['v'] == 0: + logger.propagate = False + elif args['v'] == 1: + logger.propagate = True + + logger.info("Start Pastebin Analyzer") + + api_key = args['api'] + pbin = PastebinPython(api_dev_key=api_key) + + client = MongoClient(str(args['mongodbhost']), int(args['mongodbport'])) + db = client.scrape + logger.info("MongoDB Connection created") + + while True: + try: + data = pbin.scrapeMostRecent() + + if data: + json_data = data.decode('utf8') # .replace("'", '"') + final_data = json.loads(json_data) + + # Iterate through list + for x in final_data: + + # Pre-create the content key-value pair + x['content'] = 0 + + copy_of_x = deepcopy(x) + for key, value in copy_of_x.items(): + + if key == "scrape_url": + + # value = scrape_url + text_encoded = call_scrape_url(http://github.com/value) + time.sleep(1) + + logger.info("Downloading content of " + value) + + # Add content + x['content'] = text_encoded + + ## TODO: Add some identity check + + # DB Save mode args['db'] == 2 + if args['db'] == "1": + db.pastebins.insert_one(x) + else: + logger.debug("No data arrived.") + + except PastebinBadRequestException as e: + logger.debug("Pastebin Bad Request - You're doing it wrong") + + except json.decoder.JSONDecodeError as e: + logger.debug("JSON Decoding Error ... 'You can't always get what you want!'") + continue + else: + logger.debug("No exception") + finally: + logger.info("End of Session!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Pastebin Analyzer") + + parser.add_argument('-db', + help="If this is set entries are being added into the DB.\n" + "0 = no DB entries" + "1 = all Pastebin entries are written to DB", + default=0) + + parser.add_argument('-v', + help="Verbose mode.", + default=0) + + parser.add_argument('-api', + help="Pastebin API Key for Scraping.", + required=True) + + parser.add_argument('-mongodbhost', + help="A string with the URL to your MongoDB Server.") + + parser.add_argument('-mongodbport', + help="THe port to which your MongoDB listens.", + default=27017) + + args = vars(parser.parse_args()) + + main(args) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 From 25e04011440112ff6ca5267a660ce8c0b8deb9dd Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sat, 25 Aug 2018 22:38:44 +0200 Subject: [PATCH 02/10] added the identitiyhash in order to make it possible to identify items which bave been already scraped --- pastebin_scrape.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pastebin_scrape.py b/pastebin_scrape.py index 3e07171..8e1c920 100644 --- a/pastebin_scrape.py +++ b/pastebin_scrape.py @@ -8,6 +8,7 @@ import argparse import logging import time +import hashlib logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', @@ -68,12 +69,17 @@ def main(args): json_data = data.decode('utf8') # .replace("'", '"') final_data = json.loads(json_data) - # Iterate through list + # Iterate through list (standard: 50 latest pastebins) for x in final_data: # Pre-create the content key-value pair x['content'] = 0 + tohash = str(x['date']) + str(x['expire']) + str(x['full_url'] + str(x['key']) + str(x['scrape_url']) + str(x['size']) + str(x['syntax']) + str(x['title']) + str(x['user'])) + hobject = hashlib.sha256(tohash.encode()) + hash_string = str(hobject.hexdigest()) + x['identityhash'] = hash_string + copy_of_x = deepcopy(x) for key, value in copy_of_x.items(): @@ -81,18 +87,22 @@ def main(args): # value = scrape_url text_encoded = call_scrape_url(http://github.com/value) - time.sleep(1) - - logger.info("Downloading content of " + value) # Add content x['content'] = text_encoded - ## TODO: Add some identity check + logger.info("Downloading content of " + value) + time.sleep(1) - # DB Save mode args['db'] == 2 if args['db'] == "1": - db.pastebins.insert_one(x) + + # save only if the hash is not found in the db + if db.pastebins.find_one({ "identityhash": x['identityhash'] },{ "identityhash": 1}): + logger.info("Iteam already scraped: " + x['scrape_url']) + else: + logger.info("Item added to db: " + x['scrape_url']) + db.pastebins.insert_one(x) + else: logger.debug("No data arrived.") From 1cae778301052ee2dee2bad688c0ae7a3b0917ac Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sat, 25 Aug 2018 23:51:35 +0200 Subject: [PATCH 03/10] feature added where you can analyze the content of a pastebin and categorize based on this. --- pastebin_analyze.py | 86 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 pastebin_analyze.py diff --git a/pastebin_analyze.py b/pastebin_analyze.py new file mode 100644 index 0000000..b7c2fda --- /dev/null +++ b/pastebin_analyze.py @@ -0,0 +1,86 @@ +from pymongo import MongoClient +import argparse +from pprint import PrettyPrinter +import logging + +pp = PrettyPrinter() + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') + +logger = logging.getLogger(__name__) + + +def main(): + client = MongoClient(str(args['mongodbhost']), int(args['mongodbport'])) + db = client.scrape + logger.info("MongoDB Connection created") + + dbcursor = db.pastebins.find({}) + + # open keyword file + file = open(args['f'], "r") + search_exp = file.readlines() + + # per expression -> one collection in DB + # Clear search_exp (whitespaces + \n) + for exp in search_exp: + clear_exp = exp.rstrip() + + # is the collection not already there? + if clear_exp not in db.collection_names(): + db.create_collection(str(clear_exp)) + logger.info("MongoDB Collection new: " + str(clear_exp)) + + # Iterate through documents + for document in dbcursor: + + # Iterate through dictionary + for key, value in document.items(): + + # Iterate through keywords + for exp in search_exp: + clear_exp = exp.rstrip() + + if key == "title" or key == "user" or key == "content": + + if value is not None: + splitted_string = value.split(' ') + + # Compare every exp keyword with every other + # splitted string + for string in splitted_string: + if string == clear_exp: + + # Check whether the pastebin has been added already + possible_pastebin = db[string].find_one({"key": document['key']}) + + if possible_pastebin is None: + logger.info("Entry found for key: " + str(document['key'])) + + # Insert Data into collection + db[string].insert_one(document) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Pastebin Analyzer - Offline") + + parser.add_argument('-f', + help="Config file containing all keywords to search for. Only matching pastebins will be saved.", + default="keywords.txt", + required=True) + + parser.add_argument('-mongodbhost', + help="A string with the URL to your MongoDB Server.", + default="localhost", + required=True) + + parser.add_argument('-mongodbport', + help="THe port to which your MongoDB listens.", + default=27017, + required=True) + + args = vars(parser.parse_args()) + + main() \ No newline at end of file From 7eb6b9342b68754ef3c9477aa13a29dac58b10ef Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sat, 25 Aug 2018 23:51:58 +0200 Subject: [PATCH 04/10] added some documentation for new features --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index a27f789..d5a3d30 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,16 @@ You will also need to update your Scraping IP, in order to make it work: [Change ### 2. pastebin_analyze.py +Once step 1 has finished a cycle you want to analyze whatever there is which is of interest to you. So start writing a keywordlist. One row, one keyword. + +When you have finished, start the analze module: + + python pastebin_analyze.py -f \ + -mongodbhost \ # default: localhost + -mongodbport Date: Sun, 26 Aug 2018 17:29:50 +0200 Subject: [PATCH 05/10] added some documentation for the pastebin_scrape script --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d5a3d30..c478d48 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,11 @@ For this one to work good you need an API key. I bought a lifetime access to the You will also need to update your Scraping IP, in order to make it work: [Change Scraping IP](https://pastebin.com/doc_scraping_api) - python pastebin_scrape.py -v 1 -db 1 -api + python pastebin_scrape.py -v 1 \ # verbose mode + -db 1 \ # save to DB (without this, nothing will be saved) + -api \ + -mongodbhost \ # default: localhost + -mongodbport \ # default: 27017 ### 2. pastebin_analyze.py From d2c076655e097bdbb1773eae92ccd3557ab93d59 Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Mon, 27 Aug 2018 12:41:25 +0200 Subject: [PATCH 06/10] implemented the flask api module and fixed some errors --- README.md | 15 ++++---- keywords_example.txt | 7 ++++ pastebin_analyze.py | 7 ++++ pastebin_api.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 10 +++++ 5 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 keywords_example.txt create mode 100644 pastebin_api.py diff --git a/README.md b/README.md index c478d48..2a1bc35 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,6 @@ The results will be saved within a MongoDB collection. Another script will be tr The second step results in new collections where each keyword gets one new collection and all found pastebin entries will be copied there. -The third step might be the accumulator. It identifies specific words (similiar to step 2) but also specials like eMail addresses, Bitcoin addresses, URLs, IP Adresses etc. - ### 1. pastebin_scrape.py For this one to work good you need an API key. I bought a lifetime access to the pastebin API a while ago for 29,99 USD. It doesn't make you poor. @@ -32,7 +30,7 @@ You will also need to update your Scraping IP, in order to make it work: [Change -db 1 \ # save to DB (without this, nothing will be saved) -api \ -mongodbhost \ # default: localhost - -mongodbport \ # default: 27017 + -mongodbport # default: 27017 ### 2. pastebin_analyze.py @@ -42,19 +40,20 @@ When you have finished, start the analze module: python pastebin_analyze.py -f \ -mongodbhost \ # default: localhost - -mongodbport # default: 27017 Finally it will create collections for all of the keywords it found and copy the pastebin into that collection. There might also be empty collections. Sometimes you -just can't find anything you are searching for :-() -### 3. pastebin_accumulate.py +just can't find anything you are searching for. ### Access Data via Flask API Finally you can either write yourself a clean data retriever or you can use this Flask API implementation here: ``` -# start it in debug and verbose mode first! -python pastebin_api.py -d -v +# start it in debug mode first! +python pastebin_api.py -d \ + -mongodbhost \ + -mongodbport ``` Well there is only one API method. Grab yourself a browser or use curl: diff --git a/keywords_example.txt b/keywords_example.txt new file mode 100644 index 0000000..edae0b2 --- /dev/null +++ b/keywords_example.txt @@ -0,0 +1,7 @@ +ip +malware +glock +android +ios +lenovo + diff --git a/pastebin_analyze.py b/pastebin_analyze.py index b7c2fda..34d13e6 100644 --- a/pastebin_analyze.py +++ b/pastebin_analyze.py @@ -13,6 +13,13 @@ def main(): + """ + starts the entire process of analyzation by creating new collections and appending new documents into existing + collections based on keywords. + + :return: + """ + client = MongoClient(str(args['mongodbhost']), int(args['mongodbport'])) db = client.scrape logger.info("MongoDB Connection created") diff --git a/pastebin_api.py b/pastebin_api.py new file mode 100644 index 0000000..5acfb62 --- /dev/null +++ b/pastebin_api.py @@ -0,0 +1,88 @@ +from flask import Flask, jsonify, make_response +from pymongo import MongoClient +from bson import json_util + +import argparse +import json + +app = Flask(__name__) + +api_version = "1.0" + + +@app.errorhandler(404) +def not_found(error): + """ + some standard error handling for unknown pages. + + :param error: + :return: + """ + return make_response(jsonify({'error': 'Notfound'}), 404) + + +@app.route('/') +def get_index(): + """ + standard output when nothing is set + :return: + """ + + basic_info = [ + { + 'api': '1.0', + 'name': 'PastebinPython Flask Accessing API', + 'author': 'Andre Fritsche / ihgalis' + } + ] + + return jsonify({'basic_info': basic_info}) + + +@app.route('/api/getpastebins/', methods=['GET']) +def get_pastebins(keyword): + """ + method gets all documents related to the specified keyword. It accesses the corresponding collections so you will + always get only the documents that have been identified by the pastebin_analyze.py script. + + :param keyword: string + :return: JSON based dictionary + """ + + client = MongoClient(str(args['mongodbhost']), int(args['mongodbport'])) + db = client.scrape + + tlist = list() + + dbcursor = db[keyword].find({}) + for document in dbcursor: + sanitized = json.loads(json_util.dumps(document)) + tlist.append(sanitized) + + return jsonify(tlist) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="PastebinPython Flask Accessing API") + + parser.add_argument('-mongodbhost', + help="A string with the URL to your MongoDB Server.", + default="localhost", + required=True) + + parser.add_argument('-mongodbport', + help="THe port to which your MongoDB listens.", + default=27017, + required=True) + + parser.add_argument('-d', + action="http://github.com/store_true", + help="Debug in Flask active or not.", + default=0) + + args = vars(parser.parse_args()) + + if args['d']: + app.run(debug=True) + else: + app.run(debug=False) diff --git a/requirements.txt b/requirements.txt index e69de29..7f7754a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,10 @@ +pymongo +argparse +logging +re +time +sys +pprint +flask +bson +requests \ No newline at end of file From 699179df2a1a1ae81c5db89fb2452d7c0455a436 Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Tue, 28 Aug 2018 18:40:29 +0200 Subject: [PATCH 07/10] rearranged some files and changed to be able to package it for pypi --- pastebin_python/__init__.py | 18 +++++------ pastebin_python_scraper/__init__.py | 0 .../pastebin_analyze.py | 0 .../pastebin_api.py | 0 .../pastebin_scrape.py | 0 setup.py | 32 +++++++++++++------ 6 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 pastebin_python_scraper/__init__.py rename pastebin_analyze.py => pastebin_python_scraper/pastebin_analyze.py (100%) rename pastebin_api.py => pastebin_python_scraper/pastebin_api.py (100%) rename pastebin_scrape.py => pastebin_python_scraper/pastebin_scrape.py (100%) diff --git a/pastebin_python/__init__.py b/pastebin_python/__init__.py index 1412008..300b6cd 100644 --- a/pastebin_python/__init__.py +++ b/pastebin_python/__init__.py @@ -1,14 +1,14 @@ -"""A complete pastebin.com API wrapper for Python - -.. moduleauthor:: Ferdinand Silva +""" +A complete pastebin.com API wrapper for Python. +Fork from Ferdinand Silva. """ from pastebin_python.pastebin import PastebinPython -__version__ = "1.2" -__app_name__ = "pastebin_python" +__version__ = "1.2.1" +__app_name__ = "pastebin_python_scraper" __description__ = "A complete pastebin.com API wrapper for Python" -__author__ = "Ferdinand Silva" -__author_email__ = "ferdinandsilva@ferdinandsilva.com" -__app_url__ = "http://ferdinandsilva.com/" -__download_url__ = "https://github.com/six519/PastebinPython" \ No newline at end of file +__author__ = "André Fritsche" +__author_email__ = "github@andresilaghi.com" +__app_url__ = "https://www.andresilaghi.com/" +__download_url__ = "https://github.com/ihgalis/PastebinPython" diff --git a/pastebin_python_scraper/__init__.py b/pastebin_python_scraper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pastebin_analyze.py b/pastebin_python_scraper/pastebin_analyze.py similarity index 100% rename from pastebin_analyze.py rename to pastebin_python_scraper/pastebin_analyze.py diff --git a/pastebin_api.py b/pastebin_python_scraper/pastebin_api.py similarity index 100% rename from pastebin_api.py rename to pastebin_python_scraper/pastebin_api.py diff --git a/pastebin_scrape.py b/pastebin_python_scraper/pastebin_scrape.py similarity index 100% rename from pastebin_scrape.py rename to pastebin_python_scraper/pastebin_scrape.py diff --git a/setup.py b/setup.py index 41a9806..33b2870 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,38 @@ import pastebin_python +import setuptools -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +with open("README.md", "r") as f: + long_description = f.read() -setup( +setuptools.setup( name=pastebin_python.__app_name__, version=pastebin_python.__version__, description=pastebin_python.__description__, + long_description=long_description, + long_description_content_type="text/markdown", author=pastebin_python.__author__, author_email=pastebin_python.__author_email__, - packages=['pastebin_python'], + packages=['pastebin_python', 'pastebin_python_scraper'], url=pastebin_python.__app_url__, + install_requires=[ + 'pymongo', + 'argparse', + 'logging', + 're', + 'time', + 'sys', + 'pprint', + 'flask', + 'bson', + 'requests' + ], + python_requires='>=3.6', classifiers=( - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Natural Language :: English', 'Programming Language :: Python', - 'License :: Freeware', + 'License :: OSI Approved :: MIT License', ), download_url=pastebin_python.__download_url__, -) \ No newline at end of file +) From 80286f57152ea9f2f87e678fbe3983b25ad57cc0 Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Tue, 28 Aug 2018 18:43:43 +0200 Subject: [PATCH 08/10] added some documentation for pip installation --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a1bc35..5181ee0 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The scraper here is based on the one from [six519](https://github.com/six519/Pas ## Installation -Simply clone it: +### Via Github ``` cd /your/desired/directory @@ -12,6 +12,14 @@ clone https://github.com/six519/PastebinPython.git pip install -r requirements.txt ``` +### Via pypi + +In this case you have to import the corresponding classes and methods to use it. I will try and make it somehow more usable this way. Just wanted to play around with pypi at first. + +``` +pip install pastebin-python-scraper +``` + ## Usage This fork of the PastebinPython project downloads all pastebin entries ... or well at least it starts downloading as much as it can. From 21b379858a0e7c283cb3ee7c119a39af56aa195b Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sun, 23 Sep 2018 12:32:22 +0200 Subject: [PATCH 09/10] sonarqube integration --- sonar-project.properties | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 sonar-project.properties diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..a916374 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,2 @@ +sonar.exclusions=doc/* +sonar.exclusions=doc/** \ No newline at end of file From 5169217af378512a3699caf3beab2b2b073e74b4 Mon Sep 17 00:00:00 2001 From: Andre Fritsche Date: Sun, 20 Aug 2023 22:56:41 +0200 Subject: [PATCH 10/10] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aaffc10 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Andre Fritsche + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.