Module scrapfly.scrapy.request
Expand source code
from copy import deepcopy
from typing import Dict, Optional
from scrapy import Request
from .. import ScrapeConfig
class ScrapflyScrapyRequest(Request):
scrape_config: ScrapeConfig
# See request_from_dict method in scrapy.utils.request
attributes = tuple(
attr for attr in Request.attributes if attr not in ["body", "cookies", "headers", "method", "url"]) + (
"scrape_config",)
# url:str inherited
# method:str inherited
# body:bytes inherited
# headers:Dict inherited
# encoding:Dict inherited
def __init__(self, scrape_config: ScrapeConfig, meta: Dict = {}, *args, **kwargs):
self.scrape_config = scrape_config
meta['scrapfly_scrape_config'] = self.scrape_config
super().__init__(
*args,
url=self.scrape_config.url,
headers=self.scrape_config.headers,
cookies=self.scrape_config.cookies,
body=self.scrape_config.body,
meta=meta,
**kwargs
)
def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict:
if spider is None:
raise ValueError("The 'spider' argument is required to serialize the request.")
d = super().to_dict(spider=spider)
d['scrape_config'] = self.scrape_config
return d
@classmethod
def from_dict(cls, data):
scrape_config_data = data['meta']['scrapfly_scrape_config'].to_dict()
scrape_config = ScrapeConfig.from_dict(scrape_config_data)
request = cls(scrape_config=scrape_config)
return request
def replace(self, *args, **kwargs):
for x in [
'meta',
'flags',
'encoding',
'priority',
'dont_filter',
'callback',
'errback',
'cb_kwargs',
]:
kwargs.setdefault(x, getattr(self, x))
kwargs['scrape_config'] = deepcopy(self.scrape_config)
cls = kwargs.pop('cls', self.__class__)
return cls(*args, **kwargs)
Classes
class ScrapflyScrapyRequest (scrape_config: ScrapeConfig, meta: Dict = {}, *args, **kwargs)
-
Represents an HTTP request, which is usually generated in a Spider and executed by the Downloader, thus generating a :class:
Response
.Expand source code
class ScrapflyScrapyRequest(Request): scrape_config: ScrapeConfig # See request_from_dict method in scrapy.utils.request attributes = tuple( attr for attr in Request.attributes if attr not in ["body", "cookies", "headers", "method", "url"]) + ( "scrape_config",) # url:str inherited # method:str inherited # body:bytes inherited # headers:Dict inherited # encoding:Dict inherited def __init__(self, scrape_config: ScrapeConfig, meta: Dict = {}, *args, **kwargs): self.scrape_config = scrape_config meta['scrapfly_scrape_config'] = self.scrape_config super().__init__( *args, url=self.scrape_config.url, headers=self.scrape_config.headers, cookies=self.scrape_config.cookies, body=self.scrape_config.body, meta=meta, **kwargs ) def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict: if spider is None: raise ValueError("The 'spider' argument is required to serialize the request.") d = super().to_dict(spider=spider) d['scrape_config'] = self.scrape_config return d @classmethod def from_dict(cls, data): scrape_config_data = data['meta']['scrapfly_scrape_config'].to_dict() scrape_config = ScrapeConfig.from_dict(scrape_config_data) request = cls(scrape_config=scrape_config) return request def replace(self, *args, **kwargs): for x in [ 'meta', 'flags', 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs', ]: kwargs.setdefault(x, getattr(self, x)) kwargs['scrape_config'] = deepcopy(self.scrape_config) cls = kwargs.pop('cls', self.__class__) return cls(*args, **kwargs)
Ancestors
- scrapy.http.request.Request
- scrapy.utils.trackref.object_ref
Class variables
var attributes : Tuple[str, ...]
var scrape_config : ScrapeConfig
Static methods
def from_dict(data)
-
Expand source code
@classmethod def from_dict(cls, data): scrape_config_data = data['meta']['scrapfly_scrape_config'].to_dict() scrape_config = ScrapeConfig.from_dict(scrape_config_data) request = cls(scrape_config=scrape_config) return request
Methods
def replace(self, *args, **kwargs)
-
Create a new Request with the same attributes except for those given new values
Expand source code
def replace(self, *args, **kwargs): for x in [ 'meta', 'flags', 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs', ]: kwargs.setdefault(x, getattr(self, x)) kwargs['scrape_config'] = deepcopy(self.scrape_config) cls = kwargs.pop('cls', self.__class__) return cls(*args, **kwargs)
def to_dict(self, *, spider: Optional[ForwardRef('scrapy.Spider')] = None) ‑> dict
-
Return a dictionary containing the Request's data.
Use :func:
~scrapy.utils.request.request_from_dict
to convert back into a :class:~scrapy.Request
object.If a spider is given, this method will try to find out the name of the spider methods used as callback and errback and include them in the output dict, raising an exception if they cannot be found.
Expand source code
def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict: if spider is None: raise ValueError("The 'spider' argument is required to serialize the request.") d = super().to_dict(spider=spider) d['scrape_config'] = self.scrape_config return d