Source code for inline_requests.generator

import logging
import warnings

from functools import partial
from types import GeneratorType

from scrapy.http import Request
from scrapy.utils.spider import iterate_spider_output


logger = logging.getLogger(__name__)


[docs]class RequestGenerator(object): """This is the core class that wraps the callback and outputs the requests one by one. """ def __init__(self, callback, **kwargs): """Initialize RequestGenerator. Parameters ---------- callback : callable Callable callback (spider method). **kwargs : Extra callback keyword arguments. """ self.callback = callback self.kwargs = kwargs
[docs] def __call__(self, response): """Main response entry point. This method calls the callback and wraps the returned generator. """ output = iterate_spider_output(self.callback(response=response, **self.kwargs)) if not isinstance(output, GeneratorType): raise ValueError("Callback must return a generator type") return self._unwindGenerator(output)
def _unwindGenerator(self, generator, _prev=None): """Unwind (resume) generator.""" while True: if _prev: ret, _prev = _prev, None else: try: ret = next(generator) except StopIteration: break if isinstance(ret, Request): if ret.callback: warnings.warn("Got a request with callback set, bypassing " "the generator wrapper. Generator may not " "be able to resume. %s" % ret) elif ret.errback: # By Scrapy defaults, a request without callback defaults to # self.parse spider method. warnings.warn("Got a request with errback set, bypassing " "the generator wrapper. Generator may not " "be able to resume. %s" % ret) else: yield self._wrapRequest(ret, generator) return # A request with callbacks, item or None object. yield ret def _wrapRequest(self, request, generator): # Allowing existing callback or errbacks could lead to undesired # results. To ensure the generator is **always** properly exhausted we # must handle both callback and errback in order to send back the # result to the generator. if request.callback is not None: raise ValueError("Request with existing callback is not supported") if request.errback is not None: raise ValueError("Request with existing callback is not supported") request.callback = partial(self._handleSuccess, generator=generator) request.errback = partial(self._handleFailure, generator=generator) return request def _cleanRequest(self, request): request.callback = None request.errback = None def _handleSuccess(self, response, generator): if response.request: self._cleanRequest(response.request) try: ret = generator.send(response) except StopIteration: return return self._unwindGenerator(generator, ret) def _handleFailure(self, failure, generator): # Look for the request instance in the exception value. if hasattr(failure.value, 'request'): self._cleanRequest(failure.value.request) elif hasattr(failure.value, 'response'): if hasattr(failure.value.response, 'request'): self._cleanRequest(failure.value.response.request) try: ret = failure.throwExceptionIntoGenerator(generator) except StopIteration: return return self._unwindGenerator(generator, ret)