From a6db9675c2a4fed744c3df54c2dd9f475447dc6c Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sun, 27 Aug 2017 20:27:56 +0200 Subject: [PATCH 01/51] Update to work with Python 3 --- duckduckgo.py | 77 +++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index 4f81e06..f4e56a7 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -5,15 +5,17 @@ # # See LICENSE for terms of usage, modification and redistribution. -import urllib -import urllib2 +import urllib.error +import urllib.request +import urllib.parse import json as j import sys __version__ = 0.242 -def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): +def query(query, useragent='python-duckduckgo ' + str(__version__), + safesearch=True, html=False, meanings=True, **kwargs): """ Query DuckDuckGo, returning a Results object. @@ -45,14 +47,14 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru 'no_redirect': '1', 'no_html': html, 'd': meanings, - } + } params.update(kwargs) - encparams = urllib.urlencode(params) + encparams = urllib.parse.urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams - request = urllib2.Request(url, headers={'User-Agent': useragent}) - response = urllib2.urlopen(request) - json = j.loads(response.read()) + request = urllib.request.Request(url, headers={'User-Agent': useragent}) + response = urllib.request.urlopen(request) + json = j.loads(response.read().decode("utf-8")) response.close() return Results(json) @@ -61,25 +63,26 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru class Results(object): def __init__(self, json): + json_type = json.get('Type', '') self.type = {'A': 'answer', 'D': 'disambiguation', 'C': 'category', 'N': 'name', - 'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '') + 'E': 'exclusive', '': 'nothing'}.get(json_type, '') self.json = json - self.api_version = None # compat + self.api_version = None # compat self.heading = json.get('Heading', '') - self.results = [Result(elem) for elem in json.get('Results',[])] + self.results = [Result(elem) for elem in json.get('Results', [])] self.related = [Result(elem) for elem in - json.get('RelatedTopics',[])] + json.get('RelatedTopics', [])] self.abstract = Abstract(json) self.redirect = Redirect(json) self.definition = Definition(json) self.answer = Answer(json) - self.image = Image({'Result':json.get('Image','')}) + self.image = Image({'Result': json.get('Image', '')}) class Abstract(object): @@ -90,11 +93,13 @@ def __init__(self, json): self.url = json.get('AbstractURL', '') self.source = json.get('AbstractSource') + class Redirect(object): def __init__(self, json): self.url = json.get('Redirect', '') + class Result(object): def __init__(self, json): @@ -127,21 +132,24 @@ def __init__(self, json): self.text = json.get('Answer') self.type = json.get('AnswerType', '') + class Definition(object): def __init__(self, json): - self.text = json.get('Definition','') + self.text = json.get('Definition', '') self.url = json.get('DefinitionURL') self.source = json.get('DefinitionSource') -def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs): +def get_zci(q, web_fallback=True, + priority=['answer', 'abstract', 'related.0', 'definition'], + urls=True, **kwargs): '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. - passed to query. This method will fall back to 'Sorry, no results.' + passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything.''' - ddg = query('\\'+q, **kwargs) + ddg = query('\\' + q, **kwargs) response = '' for p in priority: @@ -150,15 +158,20 @@ def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', ' index = int(ps[1]) if len(ps) > 1 else None result = getattr(ddg, type) - if index is not None: - if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type) + if index is not None: + if not hasattr(result, '__getitem__'): + raise TypeError('%s field is not indexable' % type) result = result[index] if len(result) > index else None - if not result: continue + if not result: + continue - if result.text: response = result.text - if result.text and hasattr(result,'url') and urls: - if result.url: response += ' (%s)' % result.url - if response: break + if result.text: + response = result.text + if result.text and hasattr(result, 'url') and urls: + if result.url: + response += ' (%s)' % result.url + if response: + break # if there still isn't anything, try to get the first web result if not response and web_fallback: @@ -166,21 +179,7 @@ def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', ' response = ddg.redirect.url # final fallback - if not response: + if not response: response = 'Sorry, no results.' return response - -def main(): - if len(sys.argv) > 1: - q = query(' '.join(sys.argv[1:])) - keys = q.json.keys() - keys.sort() - for key in keys: - sys.stdout.write(key) - if type(q.json[key]) in [str,unicode,int]: print(':', q.json[key]) - else: - sys.stdout.write('\n') - for i in q.json[key]: print('\t',i) - else: - print('Usage: %s [query]' % sys.argv[0]) From 7bc5f15740f03862376eca9e0c329a7e4602823b Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 20:53:03 +0200 Subject: [PATCH 02/51] Move to asynchronous implementation --- duckduckgo.py | 114 ++++++++++++++++++++++---------------------------- setup.py | 8 ++-- 2 files changed, 56 insertions(+), 66 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index f4e56a7..a9dbd10 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -5,31 +5,21 @@ # # See LICENSE for terms of usage, modification and redistribution. -import urllib.error -import urllib.request -import urllib.parse -import json as j -import sys +import urllib.parse + +import aiohttp __version__ = 0.242 -def query(query, useragent='python-duckduckgo ' + str(__version__), - safesearch=True, html=False, meanings=True, **kwargs): +async def query(query, useragent='python-duckduckgo ' + str(__version__), + safesearch=True, html=False, meanings=True, **kwargs): """ Query DuckDuckGo, returning a Results object. - Here's a query that's unlikely to change: - - >>> result = query('1 + 1') - >>> result.type - 'nothing' - >>> result.answer.text - '1 + 1 = 2' - >>> result.answer.type - 'calc' + The API is queried asynchronously. - Keword arguments: + Keyword arguments: useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) safesearch: True for on, False for off. Default: True (bool) html: True to allow HTML in output. Default: False (bool) @@ -51,67 +41,65 @@ def query(query, useragent='python-duckduckgo ' + str(__version__), params.update(kwargs) encparams = urllib.parse.urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams - - request = urllib.request.Request(url, headers={'User-Agent': useragent}) - response = urllib.request.urlopen(request) - json = j.loads(response.read().decode("utf-8")) - response.close() - - return Results(json) + async with aiohttp.ClientSession() as cs: + async with cs.get('http://api.duckduckgo.com/?' + encparams, + headers={'User-Agent': useragent}) as r: + response_json = await r.json() + return Results(response_json) class Results(object): - def __init__(self, json): - json_type = json.get('Type', '') + def __init__(self, data): + json_type = data.get('Type', '') self.type = {'A': 'answer', 'D': 'disambiguation', 'C': 'category', 'N': 'name', 'E': 'exclusive', '': 'nothing'}.get(json_type, '') - self.json = json + self.json = data self.api_version = None # compat - self.heading = json.get('Heading', '') + self.heading = data.get('Heading', '') - self.results = [Result(elem) for elem in json.get('Results', [])] + self.results = [Result(elem) for elem in data.get('Results', [])] self.related = [Result(elem) for elem in - json.get('RelatedTopics', [])] + data.get('RelatedTopics', [])] - self.abstract = Abstract(json) - self.redirect = Redirect(json) - self.definition = Definition(json) - self.answer = Answer(json) + self.abstract = Abstract(data) + self.redirect = Redirect(data) + self.definition = Definition(data) + self.answer = Answer(data) - self.image = Image({'Result': json.get('Image', '')}) + self.image = Image({'Result': data.get('Image', '')}) class Abstract(object): - def __init__(self, json): - self.html = json.get('Abstract', '') - self.text = json.get('AbstractText', '') - self.url = json.get('AbstractURL', '') - self.source = json.get('AbstractSource') + def __init__(self, data): + self.html = data.get('Abstract', '') + self.text = data.get('AbstractText', '') + self.url = data.get('AbstractURL', '') + self.source = data.get('AbstractSource') class Redirect(object): - def __init__(self, json): - self.url = json.get('Redirect', '') + def __init__(self, data): + self.url = data.get('Redirect', '') class Result(object): - def __init__(self, json): - self.topics = json.get('Topics', []) + def __init__(self, data): + self.topics = data.get('Topics', []) if self.topics: self.topics = [Result(t) for t in self.topics] return - self.html = json.get('Result') - self.text = json.get('Text') - self.url = json.get('FirstURL') + self.html = data.get('Result') + self.text = data.get('Text') + self.url = data.get('FirstURL') - icon_json = json.get('Icon') + icon_json = data.get('Icon') if icon_json is not None: self.icon = Image(icon_json) else: @@ -120,36 +108,36 @@ def __init__(self, json): class Image(object): - def __init__(self, json): - self.url = json.get('Result') - self.height = json.get('Height', None) - self.width = json.get('Width', None) + def __init__(self, data): + self.url = data.get('Result') + self.height = data.get('Height', None) + self.width = data.get('Width', None) class Answer(object): - def __init__(self, json): - self.text = json.get('Answer') - self.type = json.get('AnswerType', '') + def __init__(self, data): + self.text = data.get('Answer') + self.type = data.get('AnswerType', '') class Definition(object): - def __init__(self, json): - self.text = json.get('Definition', '') - self.url = json.get('DefinitionURL') - self.source = json.get('DefinitionSource') + def __init__(self, data): + self.text = data.get('Definition', '') + self.url = data.get('DefinitionURL') + self.source = data.get('DefinitionSource') -def get_zci(q, web_fallback=True, - priority=['answer', 'abstract', 'related.0', 'definition'], - urls=True, **kwargs): +async def get_zci(q, web_fallback=True, + priority=['answer', 'abstract', 'related.0', 'definition'], + urls=True, **kwargs): '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything.''' - ddg = query('\\' + q, **kwargs) + ddg = await query('\\' + q, **kwargs) response = '' for p in priority: diff --git a/setup.py b/setup.py index 31d578a..4cf20c8 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ from setuptools import setup from duckduckgo import __version__ -long_description = open('README.rst').read() +with open('README.rst') as f: + long_description = f.read() setup(name='duckduckgo2', version=__version__, @@ -20,5 +21,6 @@ "Programming Language :: Python", "Topic :: Internet :: WWW/HTTP :: Indexing/Search", ], - entry_points={'console_scripts':['ddg = duckduckgo:main']}, - ) + entry_points={'console_scripts': ['ddg = duckduckgo:main']}, + install_requires=['aiohttp'] +) From 9c4062d799ef1cb30ef7a263388ed296d1c2492e Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 20:56:29 +0200 Subject: [PATCH 03/51] Print response for debugging purposes --- duckduckgo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/duckduckgo.py b/duckduckgo.py index a9dbd10..b553a0c 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -44,6 +44,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: + print(r) response_json = await r.json() return Results(response_json) From 838090c4fd152147a48036893a5859f2dafad88f Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 21:03:08 +0200 Subject: [PATCH 04/51] Hopefully fixes Content-Type issues. --- duckduckgo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index b553a0c..bedd217 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -44,8 +44,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: - print(r) - response_json = await r.json() + response_json = await r.json(content_type='application/x-javascript') return Results(response_json) From eacf09f810b2901ba876b424b087a08173d3513f Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 21:11:34 +0200 Subject: [PATCH 05/51] Raise and print when JSON parsing failed --- duckduckgo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/duckduckgo.py b/duckduckgo.py index bedd217..63a44ba 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -45,6 +45,11 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: response_json = await r.json(content_type='application/x-javascript') + + if response_json is None: + print(r) + print(await r.read()) + raise ValueError("Failed to decode JSON response") return Results(response_json) From b51ce799a1b65d0231738b883964ab5db3329014 Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 21:25:19 +0200 Subject: [PATCH 06/51] Follow redirects --- duckduckgo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index 63a44ba..cb13aba 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -17,7 +17,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), """ Query DuckDuckGo, returning a Results object. - The API is queried asynchronously. + The API is queried asynchronously, and redirects are followed. Keyword arguments: useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) @@ -34,7 +34,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), 'q': query, 'o': 'json', 'kp': safesearch, - 'no_redirect': '1', + 'no_redirect': '0', 'no_html': html, 'd': meanings, } From 8f3bd6f27585c07d6fbc84da180620a231695d43 Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 22:31:01 +0200 Subject: [PATCH 07/51] Add a 1 second ratelimit to query --- duckduckgo.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/duckduckgo.py b/duckduckgo.py index cb13aba..903a749 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -8,10 +8,12 @@ import urllib.parse import aiohttp +from ratelimit import rate_limited __version__ = 0.242 +@rate_limited(1) async def query(query, useragent='python-duckduckgo ' + str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ diff --git a/setup.py b/setup.py index 4cf20c8..86f5868 100644 --- a/setup.py +++ b/setup.py @@ -22,5 +22,5 @@ "Topic :: Internet :: WWW/HTTP :: Indexing/Search", ], entry_points={'console_scripts': ['ddg = duckduckgo:main']}, - install_requires=['aiohttp'] + install_requires=['aiohttp', 'ratelimit'] ) From 863491160e5658bef2538af0058d49bc88851742 Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 22:37:06 +0200 Subject: [PATCH 08/51] Do not check content-type. --- duckduckgo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo.py b/duckduckgo.py index 903a749..554d9b1 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -46,7 +46,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: - response_json = await r.json(content_type='application/x-javascript') + response_json = await r.json(content_type=None) if response_json is None: print(r) From 7d3c2b77cd38707793a80cbd7fb75560acd591d7 Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 22:39:56 +0200 Subject: [PATCH 09/51] Print the response contents. --- duckduckgo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/duckduckgo.py b/duckduckgo.py index 554d9b1..cd07a4e 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -46,7 +46,8 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: - response_json = await r.json(content_type=None) + print(await r.read()) + response_json = await r.json(content_type='application/x-javascript') if response_json is None: print(r) From 431f92d3e3e401bdb8080c6973dad938599f42f6 Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 22:42:06 +0200 Subject: [PATCH 10/51] Print repr() of response too. --- duckduckgo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/duckduckgo.py b/duckduckgo.py index cd07a4e..f420156 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -46,6 +46,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: + print(r) print(await r.read()) response_json = await r.json(content_type='application/x-javascript') From 7bbda3df66d7518a24b890ba2036e71ce0c4ef9f Mon Sep 17 00:00:00 2001 From: Volcyy Date: Sat, 9 Sep 2017 22:51:44 +0200 Subject: [PATCH 11/51] Fixed a redirect-related issue --- duckduckgo.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index f420156..f37cd35 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -36,7 +36,7 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), 'q': query, 'o': 'json', 'kp': safesearch, - 'no_redirect': '0', + 'no_redirect': '1', 'no_html': html, 'd': meanings, } @@ -46,8 +46,6 @@ async def query(query, useragent='python-duckduckgo ' + str(__version__), async with aiohttp.ClientSession() as cs: async with cs.get('http://api.duckduckgo.com/?' + encparams, headers={'User-Agent': useragent}) as r: - print(r) - print(await r.read()) response_json = await r.json(content_type='application/x-javascript') if response_json is None: From e7f8abdbfee0555a428e8fc32cd4a103128ff5ad Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 20:46:55 -0700 Subject: [PATCH 12/51] Add requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3f5fa2f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +aiohttp>=2.0.0 +ratelimit>=1.4.0 From 0fb1fd4b48087fa799dd498f98c4575374750d2b Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 20:47:06 -0700 Subject: [PATCH 13/51] Add gitignore. --- .gitignore | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ad3dfa --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Binaries +__pycache__/ +*.py[cod] + +# Deployment +venv/ + +# Misc +*~ +*.bak +.*.swp + From 5dfb371b7ba79711a8c62b248fd8baf9705123a4 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 20:48:11 -0700 Subject: [PATCH 14/51] Replace % with f formatting. --- duckduckgo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/duckduckgo.py b/duckduckgo.py index f37cd35..f4f88cb 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -2,6 +2,7 @@ # # Copyright (c) 2010 Michael Stephens # Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server # # See LICENSE for terms of usage, modification and redistribution. @@ -12,7 +13,6 @@ __version__ = 0.242 - @rate_limited(1) async def query(query, useragent='python-duckduckgo ' + str(__version__), safesearch=True, html=False, meanings=True, **kwargs): @@ -155,7 +155,7 @@ async def get_zci(q, web_fallback=True, result = getattr(ddg, type) if index is not None: if not hasattr(result, '__getitem__'): - raise TypeError('%s field is not indexable' % type) + raise TypeError(f'{type} field is not indexable') result = result[index] if len(result) > index else None if not result: continue @@ -164,7 +164,7 @@ async def get_zci(q, web_fallback=True, response = result.text if result.text and hasattr(result, 'url') and urls: if result.url: - response += ' (%s)' % result.url + response += f' ({result.url})' if response: break From 4663a2104183a24cf5ec7d2f98e5ae2c74fa4276 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 21:42:49 -0700 Subject: [PATCH 15/51] Add pylintrc. --- pylintrc | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 pylintrc diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..15f06de --- /dev/null +++ b/pylintrc @@ -0,0 +1,350 @@ +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=2 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. +optimize-ast=yes + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=colorized + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template={C}:{line:3d},{column:2d}: {msg} ({symbol}) + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= +enable= +disable=missing-docstring,invalid-name,bad-continuation,unused-argument,import-error,too-many-locals,redefined-builtin,too-many-branches,undefined-loop-variable,bare-except,protected-access,broad-except,no-self-use,too-few-public-methods,no-init,too-many-instance-attributes,too-many-public-methods,no-else-return,pointless-string-statement,len-as-condition + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis +ignored-modules= + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes= + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E0201 when accessed. Python regular +# expressions are accepted. +generated-members=__members__ + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=125 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + + +[BASIC] + +# List of builtins function names that should not be used, separated by a comma +bad-functions= + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=__.*__ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=10 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=yes + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=10 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=20 + +# Maximum number of statements in function / method body +max-statements=100 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=stringprep,optparse + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception From 6cc97ce39493afeca23a37d9229682a9f7aa5513 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 21:45:26 -0700 Subject: [PATCH 16/51] Move to module. --- duckduckgo.py | 180 ----------------------------------------- duckduckgo/__init__.py | 11 +++ duckduckgo/models.py | 131 ++++++++++++++++++++++++++++++ duckduckgo/query.py | 107 ++++++++++++++++++++++++ 4 files changed, 249 insertions(+), 180 deletions(-) delete mode 100755 duckduckgo.py create mode 100644 duckduckgo/__init__.py create mode 100644 duckduckgo/models.py create mode 100755 duckduckgo/query.py diff --git a/duckduckgo.py b/duckduckgo.py deleted file mode 100755 index f4f88cb..0000000 --- a/duckduckgo.py +++ /dev/null @@ -1,180 +0,0 @@ -# duckduckgo.py - Library for querying the DuckDuckGo API -# -# Copyright (c) 2010 Michael Stephens -# Copyright (c) 2012-2013 Michael Smith -# Copyright (c) 2017 Members of the Programming Server -# -# See LICENSE for terms of usage, modification and redistribution. - -import urllib.parse - -import aiohttp -from ratelimit import rate_limited - -__version__ = 0.242 - -@rate_limited(1) -async def query(query, useragent='python-duckduckgo ' + str(__version__), - safesearch=True, html=False, meanings=True, **kwargs): - """ - Query DuckDuckGo, returning a Results object. - - The API is queried asynchronously, and redirects are followed. - - Keyword arguments: - useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) - safesearch: True for on, False for off. Default: True (bool) - html: True to allow HTML in output. Default: False (bool) - meanings: True to include disambiguations in results (bool) - Any other keyword arguments are passed directly to DuckDuckGo as URL params. - """ % __version__ - - safesearch = '1' if safesearch else '-1' - html = '0' if html else '1' - meanings = '0' if meanings else '1' - params = { - 'q': query, - 'o': 'json', - 'kp': safesearch, - 'no_redirect': '1', - 'no_html': html, - 'd': meanings, - } - params.update(kwargs) - encparams = urllib.parse.urlencode(params) - url = 'http://api.duckduckgo.com/?' + encparams - async with aiohttp.ClientSession() as cs: - async with cs.get('http://api.duckduckgo.com/?' + encparams, - headers={'User-Agent': useragent}) as r: - response_json = await r.json(content_type='application/x-javascript') - - if response_json is None: - print(r) - print(await r.read()) - raise ValueError("Failed to decode JSON response") - return Results(response_json) - - -class Results(object): - - def __init__(self, data): - json_type = data.get('Type', '') - self.type = {'A': 'answer', 'D': 'disambiguation', - 'C': 'category', 'N': 'name', - 'E': 'exclusive', '': 'nothing'}.get(json_type, '') - - self.json = data - self.api_version = None # compat - - self.heading = data.get('Heading', '') - - self.results = [Result(elem) for elem in data.get('Results', [])] - self.related = [Result(elem) for elem in - data.get('RelatedTopics', [])] - - self.abstract = Abstract(data) - self.redirect = Redirect(data) - self.definition = Definition(data) - self.answer = Answer(data) - - self.image = Image({'Result': data.get('Image', '')}) - - -class Abstract(object): - - def __init__(self, data): - self.html = data.get('Abstract', '') - self.text = data.get('AbstractText', '') - self.url = data.get('AbstractURL', '') - self.source = data.get('AbstractSource') - - -class Redirect(object): - - def __init__(self, data): - self.url = data.get('Redirect', '') - - -class Result(object): - - def __init__(self, data): - self.topics = data.get('Topics', []) - if self.topics: - self.topics = [Result(t) for t in self.topics] - return - self.html = data.get('Result') - self.text = data.get('Text') - self.url = data.get('FirstURL') - - icon_json = data.get('Icon') - if icon_json is not None: - self.icon = Image(icon_json) - else: - self.icon = None - - -class Image(object): - - def __init__(self, data): - self.url = data.get('Result') - self.height = data.get('Height', None) - self.width = data.get('Width', None) - - -class Answer(object): - - def __init__(self, data): - self.text = data.get('Answer') - self.type = data.get('AnswerType', '') - - -class Definition(object): - def __init__(self, data): - self.text = data.get('Definition', '') - self.url = data.get('DefinitionURL') - self.source = data.get('DefinitionSource') - - -async def get_zci(q, web_fallback=True, - priority=['answer', 'abstract', 'related.0', 'definition'], - urls=True, **kwargs): - '''A helper method to get a single (and hopefully the best) ZCI result. - priority=list can be used to set the order in which fields will be checked for answers. - Use web_fallback=True to fall back to grabbing the first web result. - passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything.''' - - ddg = await query('\\' + q, **kwargs) - response = '' - - for p in priority: - ps = p.split('.') - type = ps[0] - index = int(ps[1]) if len(ps) > 1 else None - - result = getattr(ddg, type) - if index is not None: - if not hasattr(result, '__getitem__'): - raise TypeError(f'{type} field is not indexable') - result = result[index] if len(result) > index else None - if not result: - continue - - if result.text: - response = result.text - if result.text and hasattr(result, 'url') and urls: - if result.url: - response += f' ({result.url})' - if response: - break - - # if there still isn't anything, try to get the first web result - if not response and web_fallback: - if ddg.redirect.url: - response = ddg.redirect.url - - # final fallback - if not response: - response = 'Sorry, no results.' - - return response diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py new file mode 100644 index 0000000..b0e579d --- /dev/null +++ b/duckduckgo/__init__.py @@ -0,0 +1,11 @@ +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server +# +# See LICENSE for terms of usage, modification and redistribution. + +__version__ = 0.300 + +from .query import query, get_zci diff --git a/duckduckgo/models.py b/duckduckgo/models.py new file mode 100644 index 0000000..7504bd5 --- /dev/null +++ b/duckduckgo/models.py @@ -0,0 +1,131 @@ +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server +# +# See LICENSE for terms of usage, modification and redistribution. + +RESULT_TYPES = { + 'A': 'answer', + 'D': 'disambiguation', + 'C': 'category', + 'N': 'name', + 'E': 'exclusive', + '': 'nothing', +} + +class Results: + __slots__ = ( + 'type', + 'json', + 'api_version', + 'heading', + 'results', + 'related', + 'abstract', + 'redirect', + 'definition', + 'answer', + 'image', + ) + + def __init__(self, data): + json_type = data.get('Type', '') + self.type = RESULT_TYPES.get(json_type, '') + + self.json = data + self.api_version = None # compat + + self.heading = data.get('Heading', '') + + self.results = [Result(elem) for elem in data.get('Results', ())] + self.related = [Result(elem) for elem in data.get('RelatedTopics', ())] + + self.abstract = Abstract(data) + self.redirect = Redirect(data) + self.definition = Definition(data) + self.answer = Answer(data) + + self.image = Image({'Result': data.get('Image', '')}) + +class Abstract: + __slots__ = ( + 'html', + 'text', + 'url', + 'source', + ) + + def __init__(self, data): + self.html = data.get('Abstract', '') + self.text = data.get('AbstractText', '') + self.url = data.get('AbstractURL', '') + self.source = data.get('AbstractSource') + +class Redirect: + __slots__ = ( + 'url', + ) + + def __init__(self, data): + self.url = data.get('Redirect', '') + +class Result: + __slots__ = ( + 'topics', + 'html', + 'text', + 'url', + 'icon', + ) + + def __init__(self, data): + self.topics = data.get('Topics', []) + if self.topics: + self.topics = [Result(t) for t in self.topics] + return + self.html = data.get('Result') + self.text = data.get('Text') + self.url = data.get('FirstURL') + + icon_json = data.get('Icon') + if icon_json is not None: + self.icon = Image(icon_json) + else: + self.icon = None + + +class Image: + __slots__ = ( + 'url', + 'height', + 'width', + ) + + def __init__(self, data): + self.url = data.get('Result') + self.height = data.get('Height', None) + self.width = data.get('Width', None) + +class Answer: + __slots__ = ( + 'text', + 'type', + ) + + def __init__(self, data): + self.text = data.get('Answer') + self.type = data.get('AnswerType', '') + +class Definition: + __slots__ = ( + 'text', + 'url', + 'source', + ) + + def __init__(self, data): + self.text = data.get('Definition', '') + self.url = data.get('DefinitionURL') + self.source = data.get('DefinitionSource') diff --git a/duckduckgo/query.py b/duckduckgo/query.py new file mode 100755 index 0000000..8254e21 --- /dev/null +++ b/duckduckgo/query.py @@ -0,0 +1,107 @@ +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server +# +# See LICENSE for terms of usage, modification and redistribution. + +import urllib.parse +from typing import Tuple + +import aiohttp +from ratelimit import rate_limited + +from . import __version__ +from .models import Results + +DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' +DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') + +@rate_limited(1) +async def query(q: str, + useragent: str = DEFAULT_USER_AGENT, + safesearch: bool = True, + html: bool = False, + meanings: bool = True, + **kwargs) -> Results: + """ + Query DuckDuckGo, returning a Results object. + + The API is queried asynchronously, and redirects are followed. + + Keyword arguments: + useragent: UserAgent to use while querying. Default: "python-duckduckgo " (str) + safesearch: True for on, False for off. Default: True (bool) + html: True to allow HTML in output. Default: False (bool) + meanings: True to include disambiguations in results (bool) + Any other keyword arguments are passed directly to DuckDuckGo as URL params. + """ + + safesearch = '1' if safesearch else '-1' + html = '0' if html else '1' + meanings = '0' if meanings else '1' + params = { + 'q': q, + 'o': 'json', + 'kp': safesearch, + 'no_redirect': '1', + 'no_html': html, + 'd': meanings, + } + params.update(kwargs) + encparams = urllib.parse.urlencode(params) + url = f'http://api.duckduckgo.com/?{encparams}' + + async with aiohttp.ClientSession() as cs: + async with cs.get(url, headers={'User-Agent': useragent}) as req: + response = await req.json(content_type='application/x-javascript') + if response is None: + raise ValueError("Failed to decode JSON response") + return Results(response) + +async def get_zci(q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs) -> str: + '''A helper method to get a single (and hopefully the best) ZCI result. + priority=list can be used to set the order in which fields will be checked for answers. + Use web_fallback=True to fall back to grabbing the first web result. + passed to query. This method will fall back to 'Sorry, no results.' + if it cannot find anything.''' + + ddg = await query(f'\\{q}', **kwargs) + response = '' + + for p in priority: + ps = p.split('.') + type = ps[0] + index = int(ps[1]) if len(ps) > 1 else None + + result = getattr(ddg, type) + if index is not None: + if not hasattr(result, '__getitem__'): + raise TypeError(f'{type} field is not indexable') + result = result[index] if len(result) > index else None + if not result: + continue + + if result.text: + response = result.text + if result.text and hasattr(result, 'url') and urls: + if result.url: + response += f' ({result.url})' + if response: + break + + # If there still isn't anything, try to get the first web result + if not response and web_fallback: + if ddg.redirect.url: + response = ddg.redirect.url + + # Final fallback + if not response: + response = 'Sorry, no results.' + + return response From 2bb3fd3fb3cccbbf1b2e4061a2b7f1842ad67f24 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 21:53:04 -0700 Subject: [PATCH 17/51] Update README. --- README.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 486aaaf..18d0f45 100644 --- a/README.rst +++ b/README.rst @@ -26,19 +26,19 @@ Usage ===== >>> import duckduckgo - >>> r = duckduckgo.query('DuckDuckGo') + >>> r = await duckduckgo.query('DuckDuckGo') >>> r.type - u'answer' + 'answer' >>> r.results[0].text - u'Official site' + 'Official site' >>> r.results[0].url - u'http://duckduckgo.com/' + 'http://duckduckgo.com/' >>> r.abstract.url - u'http://en.wikipedia.org/wiki/Duck_Duck_Go' + 'http://en.wikipedia.org/wiki/Duck_Duck_Go' >>> r.abstract.source - u'Wikipedia' - - >>> r = duckduckgo.query('Python') + 'Wikipedia' + + >>> r = await duckduckgo.query('Python') >>> r.type u'disambiguation' >>> r.related[1].text @@ -49,7 +49,7 @@ Usage u'Armstrong Siddeley Python, an early turboprop engine' - >>> r = duckduckgo.query('1 + 1') + >>> r = await duckduckgo.query('1 + 1') >>> r.type u'nothing' >>> r.answer.text @@ -57,9 +57,9 @@ Usage >>> r.answer.type u'calc' - >>> print duckduckgo.query('19301', kad='es_ES').answer.text + >>> print(await duckduckgo.query('19301', kad='es_ES').answer.text) 19301 es un código postal de Paoli, PA - >>> print duckduckgo.query('how to spell test', html=True).answer.text + >>> print(await duckduckgo.query('how to spell test', html=True).answer.text) Test appears to be spelled right!
Suggestions: test, testy, teat, tests, rest, yest. The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: From 8cadac6ea8fae03bdefe040290f79327c18dcf38 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 21:54:34 -0700 Subject: [PATCH 18/51] Update repo info in README. --- README.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 18d0f45..47a7fa2 100644 --- a/README.rst +++ b/README.rst @@ -2,15 +2,17 @@ python-duckduckgo ================== -A Python library for querying the DuckDuckGo API. +A Python 3.6 library for querying the DuckDuckGo API. Copyright (c) 2010 Michael Stephens Copyright (c) 2012-2013 Michael Smith +Copyright (c) 2017 Members of the Programming Server Released under a 3-clause BSD license, see LICENSE for details. -Latest Source: http://github.com/crazedpsyc/python-duckduckgo -Original source: http://github.com/mikejs/python-duckduckgo (outdated) +This Source: https://github.com/strinking/python-duckduckgo +Original Source (1): http://github.com/crazedpsyc/python-duckduckgo +Original source (2): http://github.com/mikejs/python-duckduckgo This version has been forked from the original to handle some new features of the API, and switch from XML to JSON. From 29ac945386e19f5d032118646dbde5ee419d68bf Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 22:35:32 -0700 Subject: [PATCH 19/51] Remove +x from query.py --- duckduckgo/query.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 duckduckgo/query.py diff --git a/duckduckgo/query.py b/duckduckgo/query.py old mode 100755 new mode 100644 From f7133ca62ab59badab80d9baf7a9d26672c3c45d Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 22:42:54 -0700 Subject: [PATCH 20/51] Add logging. --- duckduckgo/query.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 8254e21..80a8395 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -6,6 +6,7 @@ # # See LICENSE for terms of usage, modification and redistribution. +import logging import urllib.parse from typing import Tuple @@ -18,6 +19,8 @@ DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') +logger = logging.getLogger('duckduckgo') + @rate_limited(1) async def query(q: str, useragent: str = DEFAULT_USER_AGENT, @@ -38,6 +41,11 @@ async def query(q: str, Any other keyword arguments are passed directly to DuckDuckGo as URL params. """ + logger.info(f"Performing DDG query: '{q}'") + logger.debug(f"Safesearch: {safesearch}") + logger.debug(f"HTML: {html}") + logger.debug(f"Meanings: {meanings}") + safesearch = '1' if safesearch else '-1' html = '0' if html else '1' meanings = '0' if meanings else '1' @@ -50,6 +58,8 @@ async def query(q: str, 'd': meanings, } params.update(kwargs) + logger.debug(f"Full parameters: {params}") + encparams = urllib.parse.urlencode(params) url = f'http://api.duckduckgo.com/?{encparams}' @@ -57,7 +67,10 @@ async def query(q: str, async with cs.get(url, headers={'User-Agent': useragent}) as req: response = await req.json(content_type='application/x-javascript') if response is None: + logger.error("Response is 'None'") raise ValueError("Failed to decode JSON response") + + logger.debug("Response is {response}") return Results(response) async def get_zci(q: str, @@ -71,6 +84,10 @@ async def get_zci(q: str, passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything.''' + logger.info(f"Performing DDG ZCI: '{q}'") + logger.debug(f"Web fallback: {web_fallback}") + logger.debug(f"Use URLs: {urls}") + ddg = await query(f'\\{q}', **kwargs) response = '' @@ -82,26 +99,33 @@ async def get_zci(q: str, result = getattr(ddg, type) if index is not None: if not hasattr(result, '__getitem__'): + logger.error("Result is not indexable!") raise TypeError(f'{type} field is not indexable') + result = result[index] if len(result) > index else None + if not result: continue - - if result.text: + elif result.text: + logger.debug(f"Result has text: {result.text}") response = result.text - if result.text and hasattr(result, 'url') and urls: - if result.url: + + if getattr(result, 'url', None) and urls: + logger.debug(f"Result has url: {result.url}") response += f' ({result.url})' - if response: + break # If there still isn't anything, try to get the first web result + logger.debug("Trying web fallback...") if not response and web_fallback: if ddg.redirect.url: response = ddg.redirect.url # Final fallback + logger.info("No results!") if not response: response = 'Sorry, no results.' + logger.debug(f"Final response: {response!r}") return response From ccf478333c9b7d93d4497892c268b2e26bba29ad Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Mon, 9 Oct 2017 23:51:23 -0700 Subject: [PATCH 21/51] Reorder imports. --- duckduckgo/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 80a8395..962741c 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -6,12 +6,12 @@ # # See LICENSE for terms of usage, modification and redistribution. +from typing import Tuple import logging import urllib.parse -from typing import Tuple -import aiohttp from ratelimit import rate_limited +import aiohttp from . import __version__ from .models import Results From 55a73eec8ed433ca05be7ca00c469dba37e02c95 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 00:52:29 -0700 Subject: [PATCH 22/51] Add Travis config. --- .travis.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..0a4ffc0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,22 @@ +language: python +dist: trusty + +python: + - '3.6' + - 'nightly' + +sudo: false + +cache: pip + +install: + - pip install --requirement requirements.txt + - pip install pylint + +script: + - pylint duckduckgo + +notifications: + email: + on_success: change + on_failure: always From 69954f3ef6e4a1b3a2a65828bb442da1a7481fb4 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 00:58:31 -0700 Subject: [PATCH 23/51] Change package name to avoid collisions. --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 86f5868..a017907 100644 --- a/setup.py +++ b/setup.py @@ -4,14 +4,14 @@ with open('README.rst') as f: long_description = f.read() -setup(name='duckduckgo2', +setup(name='duckduckgo-async', version=__version__, py_modules=['duckduckgo'], description='Library for querying the DuckDuckGo API', - author='Michael Smith', - author_email='crazedpsyc@duckduckgo.com', + author='Ammon Smith', + author_email='ammon.i.smith@gmail.com', license='BSD', - url='http://github.com/crazedpsyc/python-duckduckgo/', + url='http://github.com/strinking/python-duckduckgo/', long_description=long_description, platforms=['any'], classifiers=["Development Status :: 4 - Beta", From 1853230fafb725bd5c4a0557ff8187ec18251227 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 01:53:04 -0700 Subject: [PATCH 24/51] Make rate_limited() optional. --- duckduckgo/query.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 962741c..3966a97 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -10,12 +10,16 @@ import logging import urllib.parse -from ratelimit import rate_limited import aiohttp from . import __version__ from .models import Results +try: + from ratelimit import rate_limited +except ImportError: + rate_limited = lambda x: x + DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From 7a92e4f693649719a1f6b994d148df2a5bddd411 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 01:58:10 -0700 Subject: [PATCH 25/51] Fix dummy wrapper. --- duckduckgo/query.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 3966a97..e169046 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -18,7 +18,8 @@ try: from ratelimit import rate_limited except ImportError: - rate_limited = lambda x: x + def rate_limited(func, freq=None, mult=None): + return func DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From 12249cc9746abe9461a3183ac508252de64953b0 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 01:59:21 -0700 Subject: [PATCH 26/51] Change to decorator factory. --- duckduckgo/query.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index e169046..b50f9b1 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -18,8 +18,10 @@ try: from ratelimit import rate_limited except ImportError: - def rate_limited(func, freq=None, mult=None): - return func + def rate_limited(freq=None, mult=None): + def decorator(func): + return func + return decorator DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From ae91b0bc4942dffb6249f477f94090b364d31e09 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 02:16:39 -0700 Subject: [PATCH 27/51] Make ratelimit non-optional. --- duckduckgo/query.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index b50f9b1..962741c 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -10,19 +10,12 @@ import logging import urllib.parse +from ratelimit import rate_limited import aiohttp from . import __version__ from .models import Results -try: - from ratelimit import rate_limited -except ImportError: - def rate_limited(freq=None, mult=None): - def decorator(func): - return func - return decorator - DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From e98bea33a7117f5577af11e0332d5b4a2563da9b Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 17:09:28 -0700 Subject: [PATCH 28/51] Move version to a separate file. --- duckduckgo/__init__.py | 3 +-- duckduckgo/version.py | 9 +++++++++ setup.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 duckduckgo/version.py diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index b0e579d..ed10c85 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -6,6 +6,5 @@ # # See LICENSE for terms of usage, modification and redistribution. -__version__ = 0.300 - from .query import query, get_zci +from .version import __version__ diff --git a/duckduckgo/version.py b/duckduckgo/version.py new file mode 100644 index 0000000..9852619 --- /dev/null +++ b/duckduckgo/version.py @@ -0,0 +1,9 @@ +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server +# +# See LICENSE for terms of usage, modification and redistribution. + +__version__ = 0.300 diff --git a/setup.py b/setup.py index a017907..e3286a3 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ from setuptools import setup -from duckduckgo import __version__ +from duckduckgo.version import __version__ with open('README.rst') as f: long_description = f.read() From 34f4635ca383415c86c46893edc7d77c2d42af53 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Tue, 10 Oct 2017 17:10:25 -0700 Subject: [PATCH 29/51] Fix . import for __version__. --- duckduckgo/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 962741c..42caab1 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -13,8 +13,8 @@ from ratelimit import rate_limited import aiohttp -from . import __version__ from .models import Results +from .version import __version__ DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From 0af6dff00c4eda38cc73c6614efe717b4c313651 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 11 Oct 2017 20:07:36 -0700 Subject: [PATCH 30/51] Move version to a separate file. --- duckduckgo/VERSION | 1 + duckduckgo/__init__.py | 8 ++++++++ duckduckgo/version.py | 10 +++++++++- setup.py | 6 ++++-- 4 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 duckduckgo/VERSION diff --git a/duckduckgo/VERSION b/duckduckgo/VERSION new file mode 100644 index 0000000..1629a9c --- /dev/null +++ b/duckduckgo/VERSION @@ -0,0 +1 @@ +0.300 diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index ed10c85..d1af529 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -8,3 +8,11 @@ from .query import query, get_zci from .version import __version__ + +__all__ = [ + 'query', + 'get_zci', + '__version__', +] + +del version diff --git a/duckduckgo/version.py b/duckduckgo/version.py index 9852619..225fb2a 100644 --- a/duckduckgo/version.py +++ b/duckduckgo/version.py @@ -6,4 +6,12 @@ # # See LICENSE for terms of usage, modification and redistribution. -__version__ = 0.300 +import os + +__all__ = [ + '__version__', +] + +path = os.path.join(os.path.dirname(__file__), 'VERSION') +with open(path) as fh: + __version__ = fh.read().strip() diff --git a/setup.py b/setup.py index e3286a3..473d012 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,13 @@ from setuptools import setup -from duckduckgo.version import __version__ + +with open('duckduckgo/VERSION') as f: + version = f.read().strip() with open('README.rst') as f: long_description = f.read() setup(name='duckduckgo-async', - version=__version__, + version=version, py_modules=['duckduckgo'], description='Library for querying the DuckDuckGo API', author='Ammon Smith', From 7e3e19d9fcb4a04af320f170cbe8e5e1cb70aca1 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 11 Oct 2017 20:22:23 -0700 Subject: [PATCH 31/51] Rename version.py to _version.py --- duckduckgo/__init__.py | 4 +--- duckduckgo/{version.py => _version.py} | 0 duckduckgo/query.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) rename duckduckgo/{version.py => _version.py} (100%) diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index d1af529..9c9f2b4 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -6,13 +6,11 @@ # # See LICENSE for terms of usage, modification and redistribution. +from ._version import __version__ from .query import query, get_zci -from .version import __version__ __all__ = [ 'query', 'get_zci', '__version__', ] - -del version diff --git a/duckduckgo/version.py b/duckduckgo/_version.py similarity index 100% rename from duckduckgo/version.py rename to duckduckgo/_version.py diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 42caab1..645de98 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -13,8 +13,8 @@ from ratelimit import rate_limited import aiohttp +from ._version import __version__ from .models import Results -from .version import __version__ DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') From b23587b7c474297dee1d6bd7b8781155cd35c36f Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 11 Oct 2017 20:25:29 -0700 Subject: [PATCH 32/51] Bump version. --- duckduckgo/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/VERSION b/duckduckgo/VERSION index 1629a9c..4e09d2c 100644 --- a/duckduckgo/VERSION +++ b/duckduckgo/VERSION @@ -1 +1 @@ -0.300 +0.310 From 45159da2f80c6b50bdeaad759637e01dbca2180a Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 19:30:22 -0700 Subject: [PATCH 33/51] Replace ratelimit decorator with generic class. --- duckduckgo/__init__.py | 2 ++ duckduckgo/query.py | 2 -- duckduckgo/ratelimit.py | 59 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 - 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 duckduckgo/ratelimit.py diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 9c9f2b4..6eaeba7 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -8,9 +8,11 @@ from ._version import __version__ from .query import query, get_zci +from .ratelimit import Ratelimit __all__ = [ 'query', 'get_zci', + 'Ratelimit', '__version__', ] diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 645de98..73997a6 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -10,7 +10,6 @@ import logging import urllib.parse -from ratelimit import rate_limited import aiohttp from ._version import __version__ @@ -21,7 +20,6 @@ logger = logging.getLogger('duckduckgo') -@rate_limited(1) async def query(q: str, useragent: str = DEFAULT_USER_AGENT, safesearch: bool = True, diff --git a/duckduckgo/ratelimit.py b/duckduckgo/ratelimit.py new file mode 100644 index 0000000..27ceb5d --- /dev/null +++ b/duckduckgo/ratelimit.py @@ -0,0 +1,59 @@ +# duckduckgo.py - Library for querying the DuckDuckGo API +# +# Copyright (c) 2010 Michael Stephens +# Copyright (c) 2012-2013 Michael Smith +# Copyright (c) 2017 Members of the Programming Server +# +# See LICENSE for terms of usage, modification and redistribution. + +import asyncio +import time + +class Ratelimit: + __slots__ = ( + 'frequency', + 'last_called', + ) + + def __init__(self, period=1, every=1.0): + self.frequency = abs(every) / float(period) + self.last_called = {} + + def left_to_wait(self, id): + last = self.last_called.get(id, 0.0) + elapsed = time.monotonic() - last + return self.frequency - elapsed + + def check(self, id): + return self.left_to_wait(id) <= 0 + + def update(self, id): + self.last_called[id] = time.monotonic() + + def call(self, id, func, *args, **kwargs): + duration = self.left_to_wait(id) + if duration > 0: + time.sleep(duration) + + return func(*args, **kwargs) + + async def async_call(self, id, coro, *args, **kwargs): + duration = self.left_to_wait(id) + if duration > 0: + await asyncio.sleep(duration) + + return await coro(*args, **kwargs) + + def try_call(self, id, func, *args, **kwargs): + if not self.check(id): + return (False, None) + + self.update(id) + return (True, func(*args, **kwargs)) + + async def try_async_call(self, id, coro, *args, **kwargs): + if not self.check(id): + return (False, None) + + self.update(id) + return (True, await coro(*args, **kwargs)) diff --git a/requirements.txt b/requirements.txt index 3f5fa2f..7e76aef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ aiohttp>=2.0.0 -ratelimit>=1.4.0 From e673a13dbec224d429d447068d634f0de68bf48a Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 19:42:50 -0700 Subject: [PATCH 34/51] Rename get_zci() to zci(). --- README.rst | 6 +++--- duckduckgo/__init__.py | 4 ++-- duckduckgo/query.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 47a7fa2..6a8806b 100644 --- a/README.rst +++ b/README.rst @@ -64,10 +64,10 @@ Usage >>> print(await duckduckgo.query('how to spell test', html=True).answer.text) Test appears to be spelled right!
Suggestions: test, testy, teat, tests, rest, yest. -The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: - >>> print duckduckgo.get_zci('foo') +The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.zci:: + >>> print duckduckgo.zci('foo') The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar) - >>> print ddg.get_zci('foo fighters site') + >>> print ddg.zci('foo fighters site') http://www.foofighters.com/us/home Special keyword args for query(): diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 6eaeba7..8c84c53 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -7,12 +7,12 @@ # See LICENSE for terms of usage, modification and redistribution. from ._version import __version__ -from .query import query, get_zci +from .query import query, zci from .ratelimit import Ratelimit __all__ = [ 'query', - 'get_zci', + 'zci', 'Ratelimit', '__version__', ] diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 73997a6..ab88804 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -71,7 +71,7 @@ async def query(q: str, logger.debug("Response is {response}") return Results(response) -async def get_zci(q: str, +async def zci(q: str, web_fallback: bool = True, priority: Tuple[str] = DEFAULT_PRIORITIES, urls: bool = True, From 1298124c5668c2e210b2973638b22ed90abbfbcf Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 19:43:15 -0700 Subject: [PATCH 35/51] Bump DDG version. --- duckduckgo/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/VERSION b/duckduckgo/VERSION index 4e09d2c..d7623be 100644 --- a/duckduckgo/VERSION +++ b/duckduckgo/VERSION @@ -1 +1 @@ -0.310 +0.400 From f3e7e50337824b1379ad40393ecbf1501ee4303c Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 19:58:58 -0700 Subject: [PATCH 36/51] Replace call functions with "with" contexts. --- duckduckgo/ratelimit.py | 88 +++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/duckduckgo/ratelimit.py b/duckduckgo/ratelimit.py index 27ceb5d..c68bfc7 100644 --- a/duckduckgo/ratelimit.py +++ b/duckduckgo/ratelimit.py @@ -9,6 +9,58 @@ import asyncio import time +class _Scope: + __slots__ = ( + 'parent', + 'id', + ) + + def __init__(self, parent, id): + self.parent = parent + self.id = id + + def __enter__(self): + duration = self.parent.left_to_wait(self.id) + if duration > 0: + time.sleep(duration) + + def __exit__(self, *exc): + self.parent.update(self.id) + + async def __aenter__(self): + duration = self.parent.left_to_wait(self.id) + await asyncio.sleep(duration) + + async def __aexit__(self, *exc): + self.__exit__(*exc) + +class _TryScope: + __slots__ = ( + 'parent', + 'id', + 'ok', + ) + + def __init__(self, parent, id): + self.parent = parent + self.id = id + self.ok = None + + def __enter__(self): + duration = self.parent.left_to_wait(self.id) + self.ok = (duration <= 0) + return self.ok + + def __exit__(self, *exc): + if self.ok: + self.parent.update(self.id) + + async def __aenter__(self): + return self.__enter__() + + async def __aexit__(self, *exc): + return self.__exit__(*exc) + class Ratelimit: __slots__ = ( 'frequency', @@ -19,41 +71,19 @@ def __init__(self, period=1, every=1.0): self.frequency = abs(every) / float(period) self.last_called = {} - def left_to_wait(self, id): + def left_to_wait(self, id=None): last = self.last_called.get(id, 0.0) elapsed = time.monotonic() - last return self.frequency - elapsed - def check(self, id): + def check(self, id=None): return self.left_to_wait(id) <= 0 - def update(self, id): + def update(self, id=None): self.last_called[id] = time.monotonic() - def call(self, id, func, *args, **kwargs): - duration = self.left_to_wait(id) - if duration > 0: - time.sleep(duration) - - return func(*args, **kwargs) - - async def async_call(self, id, coro, *args, **kwargs): - duration = self.left_to_wait(id) - if duration > 0: - await asyncio.sleep(duration) - - return await coro(*args, **kwargs) - - def try_call(self, id, func, *args, **kwargs): - if not self.check(id): - return (False, None) - - self.update(id) - return (True, func(*args, **kwargs)) - - async def try_async_call(self, id, coro, *args, **kwargs): - if not self.check(id): - return (False, None) + def run(self, id=None): + return _Scope(self, id) - self.update(id) - return (True, await coro(*args, **kwargs)) + def try_run(self, id=None): + return _TryScope(self, id) From efce60554d5679831b4a3f96baf28797677cdc6b Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 20:16:59 -0700 Subject: [PATCH 37/51] Change ratelimit to be per period instead of average duration. --- duckduckgo/ratelimit.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/duckduckgo/ratelimit.py b/duckduckgo/ratelimit.py index c68bfc7..87a5489 100644 --- a/duckduckgo/ratelimit.py +++ b/duckduckgo/ratelimit.py @@ -63,24 +63,35 @@ async def __aexit__(self, *exc): class Ratelimit: __slots__ = ( - 'frequency', - 'last_called', + 'max_count', + 'duration', + 'limited', ) - def __init__(self, period=1, every=1.0): - self.frequency = abs(every) / float(period) - self.last_called = {} + def __init__(self, count=1, every=1.0): + self.max_count = count + self.duration = float(every) + self.limited = {} # { id : (count, last_time) } def left_to_wait(self, id=None): - last = self.last_called.get(id, 0.0) - elapsed = time.monotonic() - last - return self.frequency - elapsed + count, last_time = self.limited.get(id, (0, 0.0)) + if count < self.max_count - 1: + return 0.0 + + elapsed = time.monotonic() - last_time + return self.duration - elapsed def check(self, id=None): return self.left_to_wait(id) <= 0 def update(self, id=None): - self.last_called[id] = time.monotonic() + count, last_time = self.limited.get(id, (0, 0.0)) + now = time.monotonic() + elapsed = now - last_time + if elapsed > self.duration: + self.limited[id] = (0, now) + else: + self.limited[id] = (count + 1, last_time) def run(self, id=None): return _Scope(self, id) From a4cc79b0995fb7156f895c3f79d2bf47e1b0d088 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 18 Oct 2017 21:03:04 -0700 Subject: [PATCH 38/51] Add missing duration check to __aenter__. --- duckduckgo/ratelimit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/duckduckgo/ratelimit.py b/duckduckgo/ratelimit.py index 87a5489..c2f41a8 100644 --- a/duckduckgo/ratelimit.py +++ b/duckduckgo/ratelimit.py @@ -29,7 +29,8 @@ def __exit__(self, *exc): async def __aenter__(self): duration = self.parent.left_to_wait(self.id) - await asyncio.sleep(duration) + if duration > 0: + await asyncio.sleep(duration) async def __aexit__(self, *exc): self.__exit__(*exc) From 6742212f35695f650908d3e9c5686f4aaed98527 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 7 Aug 2019 21:39:13 -0400 Subject: [PATCH 39/51] Change error message. --- duckduckgo/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index ab88804..626ab7b 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -65,7 +65,7 @@ async def query(q: str, async with cs.get(url, headers={'User-Agent': useragent}) as req: response = await req.json(content_type='application/x-javascript') if response is None: - logger.error("Response is 'None'") + logger.error("Invalid response from JSON decoder") raise ValueError("Failed to decode JSON response") logger.debug("Response is {response}") From 9056d912e0ad195b01cadc8155633ff2f1e1a75b Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Wed, 7 Aug 2019 21:44:23 -0400 Subject: [PATCH 40/51] Add DuckDuckGoError. --- duckduckgo/__init__.py | 3 ++- duckduckgo/query.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 8c84c53..0429999 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -7,10 +7,11 @@ # See LICENSE for terms of usage, modification and redistribution. from ._version import __version__ -from .query import query, zci +from .query import DuckDuckGoError, query, zci from .ratelimit import Ratelimit __all__ = [ + 'DuckDuckGoError', 'query', 'zci', 'Ratelimit', diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 626ab7b..e6b58f2 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -20,6 +20,9 @@ logger = logging.getLogger('duckduckgo') +class DuckDuckGoError(RuntimeError): + pass + async def query(q: str, useragent: str = DEFAULT_USER_AGENT, safesearch: bool = True, @@ -65,8 +68,7 @@ async def query(q: str, async with cs.get(url, headers={'User-Agent': useragent}) as req: response = await req.json(content_type='application/x-javascript') if response is None: - logger.error("Invalid response from JSON decoder") - raise ValueError("Failed to decode JSON response") + raise DuckDuckGoError("Invalid JSON response") logger.debug("Response is {response}") return Results(response) From 92241e4d94ebeb88d827ba5dcbb302411a42467b Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 9 Apr 2020 20:49:17 +0300 Subject: [PATCH 41/51] Add method with extra return information --- duckduckgo/query.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index e6b58f2..7d86dbb 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -129,3 +129,67 @@ async def zci(q: str, logger.debug(f"Final response: {response!r}") return response + +async def zci_extra(q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs) -> Tuple[str, bool, bool]: + '''A helper method to get a single (and hopefully the best) ZCI result. + priority=list can be used to set the order in which fields will be checked for answers. + Use web_fallback=True to fall back to grabbing the first web result. + passed to query. This method will fall back to 'Sorry, no results.' + if it cannot find anything. Returns a tuple with [result, result_found, is_fallback] which + allows to determine how the result was retrieved.''' + + logger.info(f"Performing DDG ZCI: '{q}'") + logger.debug(f"Web fallback: {web_fallback}") + logger.debug(f"Use URLs: {urls}") + + ddg = await query(f'\\{q}', **kwargs) + response = '' + found = True + is_fallback = False + + for p in priority: + ps = p.split('.') + type = ps[0] + index = int(ps[1]) if len(ps) > 1 else None + + result = getattr(ddg, type) + if index is not None: + if not hasattr(result, '__getitem__'): + logger.error("Result is not indexable!") + raise TypeError(f'{type} field is not indexable') + + result = result[index] if len(result) > index else None + + if not result: + continue + elif result.text: + logger.debug(f"Result has text: {result.text}") + response = result.text + + if getattr(result, 'url', None) and urls: + logger.debug(f"Result has url: {result.url}") + response += f' ({result.url})' + + break + + # If there still isn't anything, try to get the first web result + logger.debug("Trying web fallback...") + if not response and web_fallback: + if ddg.redirect.url: + is_fallback = True + found = True + response = ddg.redirect.url + + # Final fallback + logger.info("No results!") + if not response: + found = False + is_fallback = True + response = 'Sorry, no results.' + + logger.debug(f"Final response: {response!r}") + return (response, found, is_fallback) From a4d772d5958a1b1db17642bc8dc0cfc02d197a54 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 9 Apr 2020 20:56:35 +0300 Subject: [PATCH 42/51] Add to init --- duckduckgo/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 0429999..0540f89 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -7,13 +7,14 @@ # See LICENSE for terms of usage, modification and redistribution. from ._version import __version__ -from .query import DuckDuckGoError, query, zci +from .query import DuckDuckGoError, query, zci, zci_extra from .ratelimit import Ratelimit __all__ = [ 'DuckDuckGoError', 'query', 'zci', + 'zci_extra', 'Ratelimit', '__version__', ] From a59c3cb54369885cec956afb8f132eb8c49cbc74 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 9 Apr 2020 21:04:18 +0300 Subject: [PATCH 43/51] Add format --- duckduckgo/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 7d86dbb..e0c364d 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -70,7 +70,7 @@ async def query(q: str, if response is None: raise DuckDuckGoError("Invalid JSON response") - logger.debug("Response is {response}") + logger.debug(f"Response is {response}") return Results(response) async def zci(q: str, From ddf1d920b6b5b4bc17a6acc624bb1fcc5e0624c2 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 9 Apr 2020 21:23:08 +0300 Subject: [PATCH 44/51] Cleanup logs and change ret type --- duckduckgo/query.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index e0c364d..3e0bbc9 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -64,6 +64,7 @@ async def query(q: str, encparams = urllib.parse.urlencode(params) url = f'http://api.duckduckgo.com/?{encparams}' + logger.debug(f"Full request url: {url}") async with aiohttp.ClientSession() as cs: async with cs.get(url, headers={'User-Agent': useragent}) as req: response = await req.json(content_type='application/x-javascript') @@ -117,14 +118,14 @@ async def zci(q: str, break # If there still isn't anything, try to get the first web result - logger.debug("Trying web fallback...") if not response and web_fallback: + logger.debug("Trying web fallback...") if ddg.redirect.url: response = ddg.redirect.url # Final fallback - logger.info("No results!") if not response: + logger.info("No results!") response = 'Sorry, no results.' logger.debug(f"Final response: {response!r}") @@ -134,12 +135,12 @@ async def zci_extra(q: str, web_fallback: bool = True, priority: Tuple[str] = DEFAULT_PRIORITIES, urls: bool = True, - **kwargs) -> Tuple[str, bool, bool]: + **kwargs) -> Tuple[str, bool, str]: '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Returns a tuple with [result, result_found, is_fallback] which + if it cannot find anything. Returns a tuple with [result, result_found, result_type] which allows to determine how the result was retrieved.''' logger.info(f"Performing DDG ZCI: '{q}'") @@ -149,7 +150,7 @@ async def zci_extra(q: str, ddg = await query(f'\\{q}', **kwargs) response = '' found = True - is_fallback = False + result_type = '' for p in priority: ps = p.split('.') @@ -177,19 +178,18 @@ async def zci_extra(q: str, break # If there still isn't anything, try to get the first web result - logger.debug("Trying web fallback...") if not response and web_fallback: + logger.debug("Trying web fallback...") if ddg.redirect.url: - is_fallback = True found = True + result_type = ddg.type response = ddg.redirect.url # Final fallback - logger.info("No results!") if not response: + logger.info("No results!") found = False - is_fallback = True response = 'Sorry, no results.' logger.debug(f"Final response: {response!r}") - return (response, found, is_fallback) + return (response, found, result_type) From d55e2dfcb45f1e50bb544b9fb4a0ab14c0e1dc40 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 9 Apr 2020 21:36:08 +0300 Subject: [PATCH 45/51] Simplify return type --- duckduckgo/query.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 3e0bbc9..7f683e2 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -135,12 +135,12 @@ async def zci_extra(q: str, web_fallback: bool = True, priority: Tuple[str] = DEFAULT_PRIORITIES, urls: bool = True, - **kwargs) -> Tuple[str, bool, str]: + **kwargs) -> Tuple[str, str]: '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Returns a tuple with [result, result_found, result_type] which + if it cannot find anything. Returns a tuple with [result, result_type] which allows to determine how the result was retrieved.''' logger.info(f"Performing DDG ZCI: '{q}'") @@ -149,8 +149,7 @@ async def zci_extra(q: str, ddg = await query(f'\\{q}', **kwargs) response = '' - found = True - result_type = '' + result_type = getattr(ddg, 'type', '') for p in priority: ps = p.split('.') @@ -181,15 +180,12 @@ async def zci_extra(q: str, if not response and web_fallback: logger.debug("Trying web fallback...") if ddg.redirect.url: - found = True - result_type = ddg.type response = ddg.redirect.url # Final fallback if not response: logger.info("No results!") - found = False response = 'Sorry, no results.' logger.debug(f"Final response: {response!r}") - return (response, found, result_type) + return (response, result_type) From 1931f02ab7874d03c97794a4158bb4a3afbaff48 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Fri, 10 Apr 2020 07:59:51 +0300 Subject: [PATCH 46/51] Add more generic zci function --- duckduckgo/__init__.py | 5 +-- duckduckgo/query.py | 76 +++++++++++++----------------------------- 2 files changed, 27 insertions(+), 54 deletions(-) diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 0540f89..5d9a806 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -7,14 +7,15 @@ # See LICENSE for terms of usage, modification and redistribution. from ._version import __version__ -from .query import DuckDuckGoError, query, zci, zci_extra +from .query import DuckDuckGoError, query, zci_with_result, zci_with_type, zci from .ratelimit import Ratelimit __all__ = [ 'DuckDuckGoError', 'query', + 'zci_with_result', + 'zci_with_type', 'zci', - 'zci_extra', 'Ratelimit', '__version__', ] diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 7f683e2..0fee8fd 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -74,16 +74,18 @@ async def query(q: str, logger.debug(f"Response is {response}") return Results(response) -async def zci(q: str, + +async def zci_with_result(q: str, web_fallback: bool = True, priority: Tuple[str] = DEFAULT_PRIORITIES, urls: bool = True, - **kwargs) -> str: + **kwargs) -> [str, Results]: '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything.''' + if it cannot find anything. Retruns tuple with result string and original DDG + Results.''' logger.info(f"Performing DDG ZCI: '{q}'") logger.debug(f"Web fallback: {web_fallback}") @@ -129,63 +131,33 @@ async def zci(q: str, response = 'Sorry, no results.' logger.debug(f"Final response: {response!r}") - return response + return [response, ddg] -async def zci_extra(q: str, +async def zci(q: str, web_fallback: bool = True, priority: Tuple[str] = DEFAULT_PRIORITIES, urls: bool = True, - **kwargs) -> Tuple[str, str]: + **kwargs) -> str: '''A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Returns a tuple with [result, result_type] which - allows to determine how the result was retrieved.''' - - logger.info(f"Performing DDG ZCI: '{q}'") - logger.debug(f"Web fallback: {web_fallback}") - logger.debug(f"Use URLs: {urls}") - - ddg = await query(f'\\{q}', **kwargs) - response = '' - result_type = getattr(ddg, 'type', '') - - for p in priority: - ps = p.split('.') - type = ps[0] - index = int(ps[1]) if len(ps) > 1 else None - - result = getattr(ddg, type) - if index is not None: - if not hasattr(result, '__getitem__'): - logger.error("Result is not indexable!") - raise TypeError(f'{type} field is not indexable') - - result = result[index] if len(result) > index else None - - if not result: - continue - elif result.text: - logger.debug(f"Result has text: {result.text}") - response = result.text - - if getattr(result, 'url', None) and urls: - logger.debug(f"Result has url: {result.url}") - response += f' ({result.url})' + if it cannot find anything. Returns just result string.''' - break - - # If there still isn't anything, try to get the first web result - if not response and web_fallback: - logger.debug("Trying web fallback...") - if ddg.redirect.url: - response = ddg.redirect.url + (result, ddg) = await zci_with_result(q, web_fallback, priority, urls, **kwargs)[0] + return result - # Final fallback - if not response: - logger.info("No results!") - response = 'Sorry, no results.' +async def zci_with_type(q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs) -> Tuple[str, str]: + '''A helper method to get a single (and hopefully the best) ZCI result. + priority=list can be used to set the order in which fields will be checked for answers. + Use web_fallback=True to fall back to grabbing the first web result. + passed to query. This method will fall back to 'Sorry, no results.' + if it cannot find anything. Returns a tuple with [result, result_type] which + allows to determine which type of result was returned.''' - logger.debug(f"Final response: {response!r}") - return (response, result_type) + (result, ddg) = await zci_with_result(q, web_fallback, priority, urls, **kwargs) + return (result, getattr(ddg, 'type', '')) From 7cff4fb5b2e6ed1c7229bb228b92ca69799bf242 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas <5737899+Hoffs@users.noreply.github.com> Date: Fri, 10 Apr 2020 14:20:50 +0300 Subject: [PATCH 47/51] Fix typos from code review Co-Authored-By: Ammon Smith --- duckduckgo/query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 0fee8fd..383b325 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -84,8 +84,8 @@ async def zci_with_result(q: str, priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Retruns tuple with result string and original DDG - Results.''' + if it cannot find anything. Returns tuple with result string and original DDG + results.''' logger.info(f"Performing DDG ZCI: '{q}'") logger.debug(f"Web fallback: {web_fallback}") @@ -142,7 +142,7 @@ async def zci(q: str, priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Returns just result string.''' + if it cannot find anything. Only returns the result string.''' (result, ddg) = await zci_with_result(q, web_fallback, priority, urls, **kwargs)[0] return result From 31b4a30bd911f611645afd966ce3aa3fbcdfe1f3 Mon Sep 17 00:00:00 2001 From: Ignas Maslinskas Date: Thu, 23 Apr 2020 22:08:16 +0300 Subject: [PATCH 48/51] Cleanup linter warnings --- duckduckgo/query.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 383b325..2b54a6d 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -42,10 +42,10 @@ async def query(q: str, Any other keyword arguments are passed directly to DuckDuckGo as URL params. """ - logger.info(f"Performing DDG query: '{q}'") - logger.debug(f"Safesearch: {safesearch}") - logger.debug(f"HTML: {html}") - logger.debug(f"Meanings: {meanings}") + logger.info("Performing DDG query: '%s'", query) + logger.debug("Safesearch: %s", safesearch) + logger.debug("HTML: %s", html) + logger.debug("Meanings: %s", meanings) safesearch = '1' if safesearch else '-1' html = '0' if html else '1' @@ -59,19 +59,19 @@ async def query(q: str, 'd': meanings, } params.update(kwargs) - logger.debug(f"Full parameters: {params}") + logger.debug("Full parameters: %s", params) encparams = urllib.parse.urlencode(params) url = f'http://api.duckduckgo.com/?{encparams}' - logger.debug(f"Full request url: {url}") + logger.debug("Full request url: %s", url) async with aiohttp.ClientSession() as cs: async with cs.get(url, headers={'User-Agent': useragent}) as req: response = await req.json(content_type='application/x-javascript') if response is None: raise DuckDuckGoError("Invalid JSON response") - logger.debug(f"Response is {response}") + logger.debug("Response is %s", response) return Results(response) @@ -87,9 +87,9 @@ async def zci_with_result(q: str, if it cannot find anything. Returns tuple with result string and original DDG results.''' - logger.info(f"Performing DDG ZCI: '{q}'") - logger.debug(f"Web fallback: {web_fallback}") - logger.debug(f"Use URLs: {urls}") + logger.info("Performing DDG ZCI: '%s'", q) + logger.debug("Web fallback: %s", web_fallback) + logger.debug("Use URLs: %s", urls) ddg = await query(f'\\{q}', **kwargs) response = '' @@ -109,12 +109,13 @@ async def zci_with_result(q: str, if not result: continue - elif result.text: - logger.debug(f"Result has text: {result.text}") + + if result.text: + logger.debug("Result has text: %s", result.text) response = result.text if getattr(result, 'url', None) and urls: - logger.debug(f"Result has url: {result.url}") + logger.debug("Result has url: %s", result.url) response += f' ({result.url})' break @@ -130,7 +131,7 @@ async def zci_with_result(q: str, logger.info("No results!") response = 'Sorry, no results.' - logger.debug(f"Final response: {response!r}") + logger.debug("Final response: %s", response) return [response, ddg] async def zci(q: str, @@ -144,7 +145,7 @@ async def zci(q: str, passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything. Only returns the result string.''' - (result, ddg) = await zci_with_result(q, web_fallback, priority, urls, **kwargs)[0] + (result, _) = await zci_with_result(q, web_fallback, priority, urls, **kwargs)[0] return result async def zci_with_type(q: str, From 9c602347d1b226938b6d0ead3f05353953685054 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Sun, 10 May 2020 03:35:12 -0400 Subject: [PATCH 49/51] Don't subscript from async directly. --- duckduckgo/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 2b54a6d..071279a 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -145,7 +145,7 @@ async def zci(q: str, passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything. Only returns the result string.''' - (result, _) = await zci_with_result(q, web_fallback, priority, urls, **kwargs)[0] + (result, _) = await zci_with_result(q, web_fallback, priority, urls, **kwargs) return result async def zci_with_type(q: str, From 28280c23ec713f0f51c49eb0ee5bc98138bb6b45 Mon Sep 17 00:00:00 2001 From: Ammon Smith Date: Sun, 10 May 2020 03:57:32 -0400 Subject: [PATCH 50/51] Update travis build. --- .travis.yml | 27 ++++++++++++++++++++++----- requirements-dev.txt | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 requirements-dev.txt diff --git a/.travis.yml b/.travis.yml index 0a4ffc0..8714e2b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,37 @@ language: python -dist: trusty +dist: xenial +os: linux + +stages: + - black + - test python: - '3.6' + - '3.7' + - '3.8' + - '3.8-dev' - 'nightly' -sudo: false +jobs: + include: + - stage: black + python: '3.8' + script: + - black --check duckduckgo || true + allow_failures: + - python: 'nightly' + fast_finish: true cache: pip install: - - pip install --requirement requirements.txt - - pip install pylint + - pip install -r requirements.txt + - pip install -r requirements-dev.txt script: - - pylint duckduckgo + # Display all lints and a report + - pylint --reports=yes duckduckgo notifications: email: diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..3ab45ae --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +git+https://github.com/ambv/black@stable +pylint>=2.5 From 73b6b870d6c73d20a0d2c972b71b4f1004efdca0 Mon Sep 17 00:00:00 2001 From: Black Formatter Date: Sun, 10 May 2020 03:58:14 -0400 Subject: [PATCH 51/51] Run black formatter. --- .travis.yml | 2 +- duckduckgo/__init__.py | 14 ++--- duckduckgo/_version.py | 4 +- duckduckgo/models.py | 128 +++++++++++++++++++++------------------- duckduckgo/query.py | 114 +++++++++++++++++++---------------- duckduckgo/ratelimit.py | 23 ++++---- 6 files changed, 152 insertions(+), 133 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8714e2b..d4f6b17 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ jobs: - stage: black python: '3.8' script: - - black --check duckduckgo || true + - black --check duckduckgo allow_failures: - python: 'nightly' fast_finish: true diff --git a/duckduckgo/__init__.py b/duckduckgo/__init__.py index 5d9a806..f306d13 100644 --- a/duckduckgo/__init__.py +++ b/duckduckgo/__init__.py @@ -11,11 +11,11 @@ from .ratelimit import Ratelimit __all__ = [ - 'DuckDuckGoError', - 'query', - 'zci_with_result', - 'zci_with_type', - 'zci', - 'Ratelimit', - '__version__', + "DuckDuckGoError", + "query", + "zci_with_result", + "zci_with_type", + "zci", + "Ratelimit", + "__version__", ] diff --git a/duckduckgo/_version.py b/duckduckgo/_version.py index 225fb2a..c65ad6a 100644 --- a/duckduckgo/_version.py +++ b/duckduckgo/_version.py @@ -9,9 +9,9 @@ import os __all__ = [ - '__version__', + "__version__", ] -path = os.path.join(os.path.dirname(__file__), 'VERSION') +path = os.path.join(os.path.dirname(__file__), "VERSION") with open(path) as fh: __version__ = fh.read().strip() diff --git a/duckduckgo/models.py b/duckduckgo/models.py index 7504bd5..5e57fc3 100644 --- a/duckduckgo/models.py +++ b/duckduckgo/models.py @@ -7,89 +7,91 @@ # See LICENSE for terms of usage, modification and redistribution. RESULT_TYPES = { - 'A': 'answer', - 'D': 'disambiguation', - 'C': 'category', - 'N': 'name', - 'E': 'exclusive', - '': 'nothing', + "A": "answer", + "D": "disambiguation", + "C": "category", + "N": "name", + "E": "exclusive", + "": "nothing", } + class Results: __slots__ = ( - 'type', - 'json', - 'api_version', - 'heading', - 'results', - 'related', - 'abstract', - 'redirect', - 'definition', - 'answer', - 'image', + "type", + "json", + "api_version", + "heading", + "results", + "related", + "abstract", + "redirect", + "definition", + "answer", + "image", ) def __init__(self, data): - json_type = data.get('Type', '') - self.type = RESULT_TYPES.get(json_type, '') + json_type = data.get("Type", "") + self.type = RESULT_TYPES.get(json_type, "") self.json = data - self.api_version = None # compat + self.api_version = None # compat - self.heading = data.get('Heading', '') + self.heading = data.get("Heading", "") - self.results = [Result(elem) for elem in data.get('Results', ())] - self.related = [Result(elem) for elem in data.get('RelatedTopics', ())] + self.results = [Result(elem) for elem in data.get("Results", ())] + self.related = [Result(elem) for elem in data.get("RelatedTopics", ())] self.abstract = Abstract(data) self.redirect = Redirect(data) self.definition = Definition(data) self.answer = Answer(data) - self.image = Image({'Result': data.get('Image', '')}) + self.image = Image({"Result": data.get("Image", "")}) + class Abstract: __slots__ = ( - 'html', - 'text', - 'url', - 'source', + "html", + "text", + "url", + "source", ) def __init__(self, data): - self.html = data.get('Abstract', '') - self.text = data.get('AbstractText', '') - self.url = data.get('AbstractURL', '') - self.source = data.get('AbstractSource') + self.html = data.get("Abstract", "") + self.text = data.get("AbstractText", "") + self.url = data.get("AbstractURL", "") + self.source = data.get("AbstractSource") + class Redirect: - __slots__ = ( - 'url', - ) + __slots__ = ("url",) def __init__(self, data): - self.url = data.get('Redirect', '') + self.url = data.get("Redirect", "") + class Result: __slots__ = ( - 'topics', - 'html', - 'text', - 'url', - 'icon', + "topics", + "html", + "text", + "url", + "icon", ) def __init__(self, data): - self.topics = data.get('Topics', []) + self.topics = data.get("Topics", []) if self.topics: self.topics = [Result(t) for t in self.topics] return - self.html = data.get('Result') - self.text = data.get('Text') - self.url = data.get('FirstURL') + self.html = data.get("Result") + self.text = data.get("Text") + self.url = data.get("FirstURL") - icon_json = data.get('Icon') + icon_json = data.get("Icon") if icon_json is not None: self.icon = Image(icon_json) else: @@ -98,34 +100,36 @@ def __init__(self, data): class Image: __slots__ = ( - 'url', - 'height', - 'width', + "url", + "height", + "width", ) def __init__(self, data): - self.url = data.get('Result') - self.height = data.get('Height', None) - self.width = data.get('Width', None) + self.url = data.get("Result") + self.height = data.get("Height", None) + self.width = data.get("Width", None) + class Answer: __slots__ = ( - 'text', - 'type', + "text", + "type", ) def __init__(self, data): - self.text = data.get('Answer') - self.type = data.get('AnswerType', '') + self.text = data.get("Answer") + self.type = data.get("AnswerType", "") + class Definition: __slots__ = ( - 'text', - 'url', - 'source', + "text", + "url", + "source", ) def __init__(self, data): - self.text = data.get('Definition', '') - self.url = data.get('DefinitionURL') - self.source = data.get('DefinitionSource') + self.text = data.get("Definition", "") + self.url = data.get("DefinitionURL") + self.source = data.get("DefinitionSource") diff --git a/duckduckgo/query.py b/duckduckgo/query.py index 071279a..020c253 100644 --- a/duckduckgo/query.py +++ b/duckduckgo/query.py @@ -15,20 +15,24 @@ from ._version import __version__ from .models import Results -DEFAULT_USER_AGENT = f'python-duckduckgo {__version__}' -DEFAULT_PRIORITIES = ('answer', 'abstract', 'related.0', 'definition') +DEFAULT_USER_AGENT = f"python-duckduckgo {__version__}" +DEFAULT_PRIORITIES = ("answer", "abstract", "related.0", "definition") + +logger = logging.getLogger("duckduckgo") -logger = logging.getLogger('duckduckgo') class DuckDuckGoError(RuntimeError): pass -async def query(q: str, - useragent: str = DEFAULT_USER_AGENT, - safesearch: bool = True, - html: bool = False, - meanings: bool = True, - **kwargs) -> Results: + +async def query( + q: str, + useragent: str = DEFAULT_USER_AGENT, + safesearch: bool = True, + html: bool = False, + meanings: bool = True, + **kwargs, +) -> Results: """ Query DuckDuckGo, returning a Results object. @@ -47,27 +51,27 @@ async def query(q: str, logger.debug("HTML: %s", html) logger.debug("Meanings: %s", meanings) - safesearch = '1' if safesearch else '-1' - html = '0' if html else '1' - meanings = '0' if meanings else '1' + safesearch = "1" if safesearch else "-1" + html = "0" if html else "1" + meanings = "0" if meanings else "1" params = { - 'q': q, - 'o': 'json', - 'kp': safesearch, - 'no_redirect': '1', - 'no_html': html, - 'd': meanings, + "q": q, + "o": "json", + "kp": safesearch, + "no_redirect": "1", + "no_html": html, + "d": meanings, } params.update(kwargs) logger.debug("Full parameters: %s", params) encparams = urllib.parse.urlencode(params) - url = f'http://api.duckduckgo.com/?{encparams}' + url = f"http://api.duckduckgo.com/?{encparams}" logger.debug("Full request url: %s", url) async with aiohttp.ClientSession() as cs: - async with cs.get(url, headers={'User-Agent': useragent}) as req: - response = await req.json(content_type='application/x-javascript') + async with cs.get(url, headers={"User-Agent": useragent}) as req: + response = await req.json(content_type="application/x-javascript") if response is None: raise DuckDuckGoError("Invalid JSON response") @@ -75,35 +79,37 @@ async def query(q: str, return Results(response) -async def zci_with_result(q: str, - web_fallback: bool = True, - priority: Tuple[str] = DEFAULT_PRIORITIES, - urls: bool = True, - **kwargs) -> [str, Results]: - '''A helper method to get a single (and hopefully the best) ZCI result. +async def zci_with_result( + q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs, +) -> [str, Results]: + """A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything. Returns tuple with result string and original DDG - results.''' + results.""" logger.info("Performing DDG ZCI: '%s'", q) logger.debug("Web fallback: %s", web_fallback) logger.debug("Use URLs: %s", urls) - ddg = await query(f'\\{q}', **kwargs) - response = '' + ddg = await query(f"\\{q}", **kwargs) + response = "" for p in priority: - ps = p.split('.') + ps = p.split(".") type = ps[0] index = int(ps[1]) if len(ps) > 1 else None result = getattr(ddg, type) if index is not None: - if not hasattr(result, '__getitem__'): + if not hasattr(result, "__getitem__"): logger.error("Result is not indexable!") - raise TypeError(f'{type} field is not indexable') + raise TypeError(f"{type} field is not indexable") result = result[index] if len(result) > index else None @@ -114,9 +120,9 @@ async def zci_with_result(q: str, logger.debug("Result has text: %s", result.text) response = result.text - if getattr(result, 'url', None) and urls: + if getattr(result, "url", None) and urls: logger.debug("Result has url: %s", result.url) - response += f' ({result.url})' + response += f" ({result.url})" break @@ -129,36 +135,42 @@ async def zci_with_result(q: str, # Final fallback if not response: logger.info("No results!") - response = 'Sorry, no results.' + response = "Sorry, no results." logger.debug("Final response: %s", response) return [response, ddg] -async def zci(q: str, - web_fallback: bool = True, - priority: Tuple[str] = DEFAULT_PRIORITIES, - urls: bool = True, - **kwargs) -> str: - '''A helper method to get a single (and hopefully the best) ZCI result. + +async def zci( + q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs, +) -> str: + """A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything. Only returns the result string.''' + if it cannot find anything. Only returns the result string.""" (result, _) = await zci_with_result(q, web_fallback, priority, urls, **kwargs) return result -async def zci_with_type(q: str, - web_fallback: bool = True, - priority: Tuple[str] = DEFAULT_PRIORITIES, - urls: bool = True, - **kwargs) -> Tuple[str, str]: - '''A helper method to get a single (and hopefully the best) ZCI result. + +async def zci_with_type( + q: str, + web_fallback: bool = True, + priority: Tuple[str] = DEFAULT_PRIORITIES, + urls: bool = True, + **kwargs, +) -> Tuple[str, str]: + """A helper method to get a single (and hopefully the best) ZCI result. priority=list can be used to set the order in which fields will be checked for answers. Use web_fallback=True to fall back to grabbing the first web result. passed to query. This method will fall back to 'Sorry, no results.' if it cannot find anything. Returns a tuple with [result, result_type] which - allows to determine which type of result was returned.''' + allows to determine which type of result was returned.""" (result, ddg) = await zci_with_result(q, web_fallback, priority, urls, **kwargs) - return (result, getattr(ddg, 'type', '')) + return (result, getattr(ddg, "type", "")) diff --git a/duckduckgo/ratelimit.py b/duckduckgo/ratelimit.py index c2f41a8..eda9c0f 100644 --- a/duckduckgo/ratelimit.py +++ b/duckduckgo/ratelimit.py @@ -9,10 +9,11 @@ import asyncio import time + class _Scope: __slots__ = ( - 'parent', - 'id', + "parent", + "id", ) def __init__(self, parent, id): @@ -35,11 +36,12 @@ async def __aenter__(self): async def __aexit__(self, *exc): self.__exit__(*exc) + class _TryScope: __slots__ = ( - 'parent', - 'id', - 'ok', + "parent", + "id", + "ok", ) def __init__(self, parent, id): @@ -49,7 +51,7 @@ def __init__(self, parent, id): def __enter__(self): duration = self.parent.left_to_wait(self.id) - self.ok = (duration <= 0) + self.ok = duration <= 0 return self.ok def __exit__(self, *exc): @@ -62,17 +64,18 @@ async def __aenter__(self): async def __aexit__(self, *exc): return self.__exit__(*exc) + class Ratelimit: __slots__ = ( - 'max_count', - 'duration', - 'limited', + "max_count", + "duration", + "limited", ) def __init__(self, count=1, every=1.0): self.max_count = count self.duration = float(every) - self.limited = {} # { id : (count, last_time) } + self.limited = {} # { id : (count, last_time) } def left_to_wait(self, id=None): count, last_time = self.limited.get(id, (0, 0.0))