Skip to content

Commit 048044c

Browse files
committed
A couple of changes to fix scrapy#303:
* improved detection of inside-project environments * make list command faster (by only instantiating the spider manger) * print a warning when extensions (middlewares, etc) are disabled with a message on NotConfigured exception * assert that scrapy configuration hasn't been loaded in scrapyd.runner * simplified IgnoreRequest exception, to avoid loading settings when importing scrapy.exceptions * added test to make sure certain modules don't cause scrapy.conf module to be loaded, to ensure the scrapyd runner bootstraping performs properly
1 parent 48b30ba commit 048044c

File tree

9 files changed

+54
-22
lines changed

9 files changed

+54
-22
lines changed

scrapy/cmdline.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from scrapy.command import ScrapyCommand
1414
from scrapy.exceptions import UsageError
1515
from scrapy.utils.misc import walk_modules
16+
from scrapy.utils.project import inside_project
1617

1718
def _iter_command_classes(module_name):
1819
# TODO: add `name` attribute to commands and and merge this function with
@@ -106,7 +107,7 @@ def execute(argv=None):
106107
argv = sys.argv
107108
crawler = CrawlerProcess(settings)
108109
crawler.install()
109-
inproject = bool(settings.settings_module)
110+
inproject = inside_project()
110111
_check_deprecated_scrapy_ctl(argv, inproject) # TODO: remove for Scrapy 0.11
111112
cmds = _get_commands_dict(inproject)
112113
cmdname = _pop_command_name(argv)

scrapy/commands/list.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import os
2+
13
from scrapy.command import ScrapyCommand
4+
from scrapy.utils.misc import load_object
5+
from scrapy.conf import settings
26

37
class Command(ScrapyCommand):
48

@@ -9,4 +13,6 @@ def short_desc(self):
913
return "List available spiders"
1014

1115
def run(self, args, opts):
12-
print "\n".join(self.crawler.spiders.list())
16+
spman_cls = load_object(settings['SPIDER_MANAGER_CLASS'])
17+
spiders = spman_cls.from_settings(settings)
18+
print os.linesep.join(spiders.list())

scrapy/core/engine.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,11 @@ def _on_error(_failure):
166166
exc = _failure.value
167167
if isinstance(exc, IgnoreRequest):
168168
errmsg = _failure.getErrorMessage()
169-
level = exc.level
170169
else:
171170
errmsg = str(_failure)
172-
level = log.ERROR
173171
if errmsg:
174172
log.msg("Error downloading <%s>: %s" % (request.url, errmsg), \
175-
level=level, spider=spider)
173+
level=log.ERROR, spider=spider)
176174
return Failure(IgnoreRequest(str(exc)))
177175

178176
def _on_complete(_):

scrapy/exceptions.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
new exceptions here without documenting them there.
66
"""
77

8-
from scrapy import log
9-
108
# Internal
119

1210
class NotConfigured(Exception):
@@ -18,13 +16,6 @@ class NotConfigured(Exception):
1816
class IgnoreRequest(Exception):
1917
"""Indicates a decision was made not to process a request"""
2018

21-
def __init__(self, msg='', level=log.ERROR):
22-
self.msg = msg
23-
self.level = level
24-
25-
def __str__(self):
26-
return self.msg
27-
2819
class DontCloseSpider(Exception):
2920
"""Request the spider not to be closed yet"""
3021
pass

scrapy/middleware.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ def from_settings(cls, settings):
3434
middlewares.append(mw)
3535
except NotConfigured, e:
3636
if e.args:
37-
log.msg(e)
37+
clsname = clspath.split('.')[-1]
38+
log.msg("Disabled %s: %s" % (clsname, e.args[0]), log.WARNING)
3839
enabled = [x.__class__.__name__ for x in middlewares]
3940
log.msg("Enabled %ss: %s" % (cls.component_name, ", ".join(enabled)), \
4041
level=log.DEBUG)

scrapy/utils/project.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from scrapy.utils.conf import closest_scrapy_cfg, get_config
66
from scrapy.utils.python import is_writable
7+
from scrapy.exceptions import NotConfigured
78

89
DATADIR_CFG_SECTION = 'datadir'
910

@@ -20,12 +21,16 @@ def inside_project():
2021

2122
def project_data_dir(project='default'):
2223
"""Return the current project data dir, creating it if it doesn't exist"""
23-
assert inside_project(), "Not inside project"
24-
scrapy_cfg = closest_scrapy_cfg()
25-
d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
24+
if not inside_project():
25+
raise NotConfigured("Not inside a project")
2626
cfg = get_config()
2727
if cfg.has_option(DATADIR_CFG_SECTION, project):
2828
d = cfg.get(DATADIR_CFG_SECTION, project)
29+
else:
30+
scrapy_cfg = closest_scrapy_cfg()
31+
if not scrapy_cfg:
32+
raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
33+
d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
2934
if not exists(d):
3035
makedirs(d)
3136
return d
@@ -47,8 +52,12 @@ def sqlite_db(path, nonwritable_fallback=True):
4752
if not inside_project() or path == ':memory:':
4853
db = ':memory:'
4954
else:
50-
db = data_path(path)
51-
if not is_writable(db) and nonwritable_fallback:
52-
warnings.warn("%r is not writable - using in-memory SQLite instead" % db)
55+
try:
56+
db = data_path(path)
57+
except NotConfigured:
5358
db = ':memory:'
59+
else:
60+
if not is_writable(db) and nonwritable_fallback:
61+
warnings.warn("%r is not writable - using in-memory SQLite instead" % db)
62+
db = ':memory:'
5463
return db

scrapyd/runner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from __future__ import with_statement
2+
3+
import sys
14
import os
25
import shutil
36
import tempfile
@@ -22,6 +25,7 @@ def project_environment(project):
2225
else:
2326
eggpath = None
2427
try:
28+
assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
2529
yield
2630
finally:
2731
if eggpath:
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import sys
2+
import unittest
3+
4+
class SettingsSafeModulesTest(unittest.TestCase):
5+
6+
# these modules must not load scrapy.conf
7+
SETTINGS_SAFE_MODULES = [
8+
'scrapy.utils.project',
9+
'scrapy.utils.conf',
10+
'scrapyd.interfaces',
11+
'scrapyd.eggutils',
12+
]
13+
14+
def test_modules_that_shouldnt_load_settings(self):
15+
sys.modules.pop('scrapy.conf', None)
16+
for m in self.SETTINGS_SAFE_MODULES:
17+
__import__(m)
18+
assert 'scrapy.conf' not in sys.modules, \
19+
"Module %r must not cause the scrapy.conf module to be loaded" % m
20+
21+
if __name__ == "__main__":
22+
unittest.main()

scrapyd/tests/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_get_crawl_args(self):
2525
class GetSpiderListTest(unittest.TestCase):
2626

2727
def test_get_spider_list(self):
28-
path = self.mktemp()
28+
path = os.path.abspath(self.mktemp())
2929
j = os.path.join
3030
eggs_dir = j(path, 'eggs')
3131
os.makedirs(eggs_dir)

0 commit comments

Comments
 (0)