Module scrapfly.scrapy.response

Expand source code
from io import BytesIO
from typing import Union, Dict, Optional, TextIO

from scrapy.http import TextResponse, HtmlResponse, XmlResponse

from .. import ScrapeApiResponse, ScrapeConfig
from .request import ScrapflyScrapyRequest


class ScrapflyScrapyResponse(TextResponse):

    content:Union[str, BytesIO]
    scrape_api_response:ScrapeApiResponse

    context:Dict
    scrape_config:ScrapeConfig
    log_url:str
    status:str
    config:Dict
    success:bool
    duration:float
    format:str
    screenshots:Dict
    dns:Optional[Dict]
    ssl:Optional[Dict]
    iframes:Dict
    browser_data:Dict
    error:Optional[Dict]

    DEFAULT_ENCODING = 'utf-8'

    def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse):
        self.scrape_api_response = scrape_api_response
        self.content = self.scrape_api_response.scrape_result['content']

        self.context = self.scrape_api_response.context
        self.scrape_config = self.scrape_api_response.scrape_config
        self.log_url = self.scrape_api_response.scrape_result['log_url']
        self.status = self.scrape_api_response.scrape_result['status']
        self.success = self.scrape_api_response.scrape_result['success']
        self.duration = self.scrape_api_response.scrape_result['duration']
        self.format = self.scrape_api_response.scrape_result['format']
        self.screenshots = self.scrape_api_response.scrape_result['screenshots']
        self.dns = self.scrape_api_response.scrape_result['dns']
        self.ssl = self.scrape_api_response.scrape_result['ssl']
        self.iframes = self.scrape_api_response.scrape_result['iframes']
        self.browser_data = self.scrape_api_response.scrape_result['browser_data']
        self.error = self.scrape_api_response.scrape_result['error']
        self.ip_address = None

        if isinstance(self.content, str):
            content = self.content.encode('utf-8')
        elif isinstance(self.content, (BytesIO, TextIO)):
            content = self.content.read()
        else:
            raise RuntimeError('Unsupported body %s' % type(self.content))

        TextResponse.__init__(
            self,
            url=self.scrape_api_response.scrape_result['url'],
            status=self.scrape_api_response.scrape_result['status_code'],
            headers=self.scrape_api_response.scrape_result['response_headers'],
            body=content,
            request=request,
            ip_address=None
        )

    @property
    def __class__(self):
        response_headers = self.scrape_api_response.scrape_result['response_headers']

        if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0:
            return HtmlResponse
        elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0:
            return XmlResponse
        else:
            return TextResponse

    def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
        self.scrape_api_response.sink(path=path, name=name, file=file)

Classes

class ScrapflyScrapyResponse (request: ScrapflyScrapyRequest, scrape_api_response: ScrapeApiResponse)

An object that represents an HTTP response, which is usually downloaded (by the Downloader) and fed to the Spiders for processing.

Expand source code
class ScrapflyScrapyResponse(TextResponse):

    content:Union[str, BytesIO]
    scrape_api_response:ScrapeApiResponse

    context:Dict
    scrape_config:ScrapeConfig
    log_url:str
    status:str
    config:Dict
    success:bool
    duration:float
    format:str
    screenshots:Dict
    dns:Optional[Dict]
    ssl:Optional[Dict]
    iframes:Dict
    browser_data:Dict
    error:Optional[Dict]

    DEFAULT_ENCODING = 'utf-8'

    def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse):
        self.scrape_api_response = scrape_api_response
        self.content = self.scrape_api_response.scrape_result['content']

        self.context = self.scrape_api_response.context
        self.scrape_config = self.scrape_api_response.scrape_config
        self.log_url = self.scrape_api_response.scrape_result['log_url']
        self.status = self.scrape_api_response.scrape_result['status']
        self.success = self.scrape_api_response.scrape_result['success']
        self.duration = self.scrape_api_response.scrape_result['duration']
        self.format = self.scrape_api_response.scrape_result['format']
        self.screenshots = self.scrape_api_response.scrape_result['screenshots']
        self.dns = self.scrape_api_response.scrape_result['dns']
        self.ssl = self.scrape_api_response.scrape_result['ssl']
        self.iframes = self.scrape_api_response.scrape_result['iframes']
        self.browser_data = self.scrape_api_response.scrape_result['browser_data']
        self.error = self.scrape_api_response.scrape_result['error']
        self.ip_address = None

        if isinstance(self.content, str):
            content = self.content.encode('utf-8')
        elif isinstance(self.content, (BytesIO, TextIO)):
            content = self.content.read()
        else:
            raise RuntimeError('Unsupported body %s' % type(self.content))

        TextResponse.__init__(
            self,
            url=self.scrape_api_response.scrape_result['url'],
            status=self.scrape_api_response.scrape_result['status_code'],
            headers=self.scrape_api_response.scrape_result['response_headers'],
            body=content,
            request=request,
            ip_address=None
        )

    @property
    def __class__(self):
        response_headers = self.scrape_api_response.scrape_result['response_headers']

        if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0:
            return HtmlResponse
        elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0:
            return XmlResponse
        else:
            return TextResponse

    def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
        self.scrape_api_response.sink(path=path, name=name, file=file)

Ancestors

  • scrapy.http.response.text.TextResponse
  • scrapy.http.response.Response
  • scrapy.utils.trackref.object_ref

Class variables

var DEFAULT_ENCODING
var browser_data : Dict
var config : Dict
var content : Union[str, _io.BytesIO]
var context : Dict
var dns : Optional[Dict]
var duration : float
var error : Optional[Dict]
var format : str
var iframes : Dict
var log_url : str
var scrape_api_responseScrapeApiResponse
var scrape_configScrapeConfig
var screenshots : Dict
var ssl : Optional[Dict]
var status : str
var success : bool

Methods

def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Union[TextIO, _io.BytesIO, ForwardRef(None)] = None)
Expand source code
def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
    self.scrape_api_response.sink(path=path, name=name, file=file)