Module `scrapfly.scrapy.response`

Classes

class ScrapflyScrapyResponse (request: ScrapflyScrapyRequest, scrape_api_response: ScrapeApiResponse)

Expand source code

class ScrapflyScrapyResponse(TextResponse):

    content:Union[str, BytesIO]
    scrape_api_response:ScrapeApiResponse

    context:Dict
    scrape_config:ScrapeConfig
    log_url:str
    status:str
    config:Dict
    success:bool
    duration:float
    format:str
    screenshots:Dict
    dns:Optional[Dict]
    ssl:Optional[Dict]
    iframes:Dict
    browser_data:Dict
    error:Optional[Dict]

    DEFAULT_ENCODING = 'utf-8'

    def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse):
        self.scrape_api_response = scrape_api_response
        self.content = self.scrape_api_response.scrape_result['content']

        self.context = self.scrape_api_response.context
        self.scrape_config = self.scrape_api_response.scrape_config
        self.log_url = self.scrape_api_response.scrape_result['log_url']
        self.status = self.scrape_api_response.scrape_result['status']
        self.success = self.scrape_api_response.scrape_result['success']
        self.duration = self.scrape_api_response.scrape_result['duration']
        self.format = self.scrape_api_response.scrape_result['format']
        self.screenshots = self.scrape_api_response.scrape_result['screenshots']
        self.dns = self.scrape_api_response.scrape_result['dns']
        self.ssl = self.scrape_api_response.scrape_result['ssl']
        self.iframes = self.scrape_api_response.scrape_result['iframes']
        self.browser_data = self.scrape_api_response.scrape_result['browser_data']
        self.error = self.scrape_api_response.scrape_result['error']
        self.ip_address = None

        if isinstance(self.content, str):
            content = self.content.encode('utf-8')
        elif isinstance(self.content, (BytesIO, TextIO)):
            content = self.content.read()
        else:
            raise RuntimeError('Unsupported body %s' % type(self.content))

        TextResponse.__init__(
            self,
            url=self.scrape_api_response.scrape_result['url'],
            status=self.scrape_api_response.scrape_result['status_code'],
            headers=self.scrape_api_response.scrape_result['response_headers'],
            body=content,
            request=request,
            ip_address=None
        )

    @property
    def __class__(self):
        response_headers = self.scrape_api_response.scrape_result['response_headers']

        if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0:
            return HtmlResponse
        elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0:
            return XmlResponse
        else:
            return TextResponse

    def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
        self.scrape_api_response.sink(path=path, name=name, file=file)

An object that represents an HTTP response, which is usually downloaded (by the Downloader) and fed to the Spiders for processing.

Ancestors

scrapy.http.response.text.TextResponse
scrapy.http.response.Response
scrapy.utils.trackref.object_ref

Class variables

var DEFAULT_ENCODING: The type of the None singleton.
var browser_data : Dict: The type of the None singleton.
var config : Dict: The type of the None singleton.
var content : str | _io.BytesIO: The type of the None singleton.
var context : Dict: The type of the None singleton.
var dns : Dict | None: The type of the None singleton.
var duration : float: The type of the None singleton.
var error : Dict | None: The type of the None singleton.
var format : str: The type of the None singleton.
var iframes : Dict: The type of the None singleton.
var log_url : str: The type of the None singleton.
var scrape_api_response : ScrapeApiResponse: The type of the None singleton.
var scrape_config : ScrapeConfig: The type of the None singleton.
var screenshots : Dict: The type of the None singleton.
var ssl : Dict | None: The type of the None singleton.
var status : str: The type of the None singleton.
var success : bool: The type of the None singleton.

Methods

def sink(self, path: str | None = None, name: str | None = None, file: | _io.BytesIO | None = None)

Expand source code

def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
    self.scrape_api_response.sink(path=path, name=name, file=file)