原先脚本如下:
def __init__(self):
self.browser = webdriver.Chrome()
self.browser.set_page_load_timeout(30)
Error:
D:\app\python3\python.exe D:/demo/musics/__init__.py
2021-12-15 22:58:10 [scrapy.utils.log] INFO: Scrapy 2.5.1 started (bot: musics)
2021-12-15 22:58:10 [scrapy.utils.log] INFO: Versions: lxml 4.7.1.0, libxml2 2.9.12, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 21.7.0, Python 3.9.7 (tags/v3.9.7:1016ef3, Aug 30 2021, 20:19:38) [MSC v.1929 64 bit (AMD64)], pyOpenSSL 21.0.0 (OpenSSL 1.1.1m 14 Dec 2021), cryptography 36.0.1, Platform Windows-10-10.0.19042-SP0
2021-12-15 22:58:10 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2021-12-15 22:58:10 [scrapy.crawler] INFO: Overridden settings:
{'BOT_NAME': 'musics',
'CONCURRENT_REQUESTS': 32,
'CONCURRENT_REQUESTS_PER_DOMAIN': 16,
'CONCURRENT_REQUESTS_PER_IP': 16,
'DOWNLOAD_DELAY': 5,
'HTTPCACHE_DIR': '../httpcache',
'HTTPCACHE_ENABLED': True,
'NEWSPIDER_MODULE': 'musics.spiders',
'SPIDER_MODULES': ['musics.spiders'],
'TELNETCONSOLE_ENABLED': False}
2021-12-15 22:58:10 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats', 'scrapy.extensions.logstats.LogStats']
2021-12-15 22:58:11 [selenium.webdriver.remote.remote_connection] DEBUG: POST http://localhost:58518/session {"capabilities": {"firstMatch": [{}], "alwaysMatch": {"browserName": "chrome", "pageLoadStrategy": "normal", "goog:chromeOptions": {"extensions": [], "args": []}}}, "desiredCapabilities": {"browserName": "chrome", "pageLoadStrategy": "normal", "goog:chromeOptions": {"extensions": [], "args": []}}}
2021-12-15 22:58:11 [urllib3.connectionpool] DEBUG: Starting new HTTP connection (1): localhost:58518
2021-12-15 22:58:11 [urllib3.connectionpool] DEBUG: http://localhost:58518 "POST /session HTTP/1.1" 500 898
2021-12-15 22:58:11 [selenium.webdriver.remote.remote_connection] DEBUG: Finished Request
Unhandled error in Deferred:
2021-12-15 22:58:13 [twisted] CRITICAL: Unhandled error in Deferred:
Traceback (most recent call last):
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 192, in crawl
return self._crawl(crawler, *args, **kwargs)
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 196, in _crawl
d = crawler.crawl(*args, **kwargs)
File "D:\app\python3\lib\site-packages\twisted\internet\defer.py", line 1909, in unwindGenerator
return _cancellableInlineCallbacks(gen) # type: ignore[unreachable]
File "D:\app\python3\lib\site-packages\twisted\internet\defer.py", line 1816, in _cancellableInlineCallbacks
_inlineCallbacks(None, gen, status)
--- <exception caught here> ---
File "D:\app\python3\lib\site-packages\twisted\internet\defer.py", line 1661, in _inlineCallbacks
result = current_context.run(gen.send, result)
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 86, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 98, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "D:\app\python3\lib\site-packages\scrapy\spiders\__init__.py", line 50, in from_crawler
spider = cls(*args, **kwargs)
File "D:\demo\musics\spiders\qingqing.py", line 20, in __init__
self.browser = webdriver.Chrome()
File "D:\app\python3\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 70, in __init__
super(WebDriver, self).__init__(DesiredCapabilities.CHROME['browserName'], "goog",
File "D:\app\python3\lib\site-packages\selenium\webdriver\chromium\webdriver.py", line 93, in __init__
RemoteWebDriver.__init__(
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 268, in __init__
self.start_session(capabilities, browser_profile)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 359, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 424, in execute
self.error_handler.check_response(response)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 247, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Failed to create Chrome process.
Stacktrace:
Backtrace:
Ordinal0 [0x01136903+2517251]
Ordinal0 [0x010CF8E1+2095329]
Ordinal0 [0x00FD2848+1058888]
Ordinal0 [0x00FECC2F+1166383]
Ordinal0 [0x00FEA2CF+1155791]
Ordinal0 [0x0101AAAF+1354415]
Ordinal0 [0x0101A71A+1353498]
Ordinal0 [0x0101639B+1336219]
Ordinal0 [0x00FF27A7+1189799]
Ordinal0 [0x00FF3609+1193481]
GetHandleVerifier [0x012C5904+1577972]
GetHandleVerifier [0x01370B97+2279047]
GetHandleVerifier [0x011C6D09+534521]
GetHandleVerifier [0x011C5DB9+530601]
Ordinal0 [0x010D4FF9+2117625]
Ordinal0 [0x010D98A8+2136232]
Ordinal0 [0x010D99E2+2136546]
Ordinal0 [0x010E3541+2176321]
BaseThreadInitThunk [0x768CFA29+25]
RtlGetAppContainerNamedObjectPath [0x77B07A9E+286]
RtlGetAppContainerNamedObjectPath [0x77B07A6E+238]
2021-12-15 22:58:13 [twisted] CRITICAL:
Traceback (most recent call last):
File "D:\app\python3\lib\site-packages\twisted\internet\defer.py", line 1661, in _inlineCallbacks
result = current_context.run(gen.send, result)
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 86, in crawl
self.spider = self._create_spider(*args, **kwargs)
File "D:\app\python3\lib\site-packages\scrapy\crawler.py", line 98, in _create_spider
return self.spidercls.from_crawler(self, *args, **kwargs)
File "D:\app\python3\lib\site-packages\scrapy\spiders\__init__.py", line 50, in from_crawler
spider = cls(*args, **kwargs)
File "D:\demo\musics\spiders\qingqing.py", line 20, in __init__
self.browser = webdriver.Chrome()
File "D:\app\python3\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 70, in __init__
super(WebDriver, self).__init__(DesiredCapabilities.CHROME['browserName'], "goog",
File "D:\app\python3\lib\site-packages\selenium\webdriver\chromium\webdriver.py", line 93, in __init__
RemoteWebDriver.__init__(
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 268, in __init__
self.start_session(capabilities, browser_profile)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 359, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 424, in execute
self.error_handler.check_response(response)
File "D:\app\python3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 247, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Failed to create Chrome process.
Stacktrace:
Backtrace:
Ordinal0 [0x01136903+2517251]
Ordinal0 [0x010CF8E1+2095329]
Ordinal0 [0x00FD2848+1058888]
Ordinal0 [0x00FECC2F+1166383]
Ordinal0 [0x00FEA2CF+1155791]
Ordinal0 [0x0101AAAF+1354415]
Ordinal0 [0x0101A71A+1353498]
Ordinal0 [0x0101639B+1336219]
Ordinal0 [0x00FF27A7+1189799]
Ordinal0 [0x00FF3609+1193481]
GetHandleVerifier [0x012C5904+1577972]
GetHandleVerifier [0x01370B97+2279047]
GetHandleVerifier [0x011C6D09+534521]
GetHandleVerifier [0x011C5DB9+530601]
Ordinal0 [0x010D4FF9+2117625]
Ordinal0 [0x010D98A8+2136232]
Ordinal0 [0x010D99E2+2136546]
Ordinal0 [0x010E3541+2176321]
BaseThreadInitThunk [0x768CFA29+25]
RtlGetAppContainerNamedObjectPath [0x77B07A9E+286]
RtlGetAppContainerNamedObjectPath [0x77B07A6E+238]
Process finished with exit code 1
将脚本修改如下,就可正确执行了. 其中chrome_options.binary_location为google浏览器的完整路径。通常这段脚本在爬虫页面脚本里,如qingqing.py,taobao.py
def __init__(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
self.browser = webdriver.Chrome(options=chrome_options)
self.browser.set_page_load_timeout(30)