Module scrapfly.scrapy.response
Expand source code
from io import BytesIO
from typing import Union, Dict, Optional, TextIO
from scrapy.http import TextResponse, HtmlResponse, XmlResponse
from .. import ScrapeApiResponse, ScrapeConfig
from .request import ScrapflyScrapyRequest
class ScrapflyScrapyResponse(TextResponse):
content:Union[str, BytesIO]
scrape_api_response:ScrapeApiResponse
context:Dict
scrape_config:ScrapeConfig
log_url:str
status:str
config:Dict
success:bool
duration:float
format:str
screenshots:Dict
dns:Optional[Dict]
ssl:Optional[Dict]
iframes:Dict
browser_data:Dict
error:Optional[Dict]
DEFAULT_ENCODING = 'utf-8'
def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse):
self.scrape_api_response = scrape_api_response
self.content = self.scrape_api_response.scrape_result['content']
self.context = self.scrape_api_response.context
self.scrape_config = self.scrape_api_response.scrape_config
self.log_url = self.scrape_api_response.scrape_result['log_url']
self.status = self.scrape_api_response.scrape_result['status']
self.success = self.scrape_api_response.scrape_result['success']
self.duration = self.scrape_api_response.scrape_result['duration']
self.format = self.scrape_api_response.scrape_result['format']
self.screenshots = self.scrape_api_response.scrape_result['screenshots']
self.dns = self.scrape_api_response.scrape_result['dns']
self.ssl = self.scrape_api_response.scrape_result['ssl']
self.iframes = self.scrape_api_response.scrape_result['iframes']
self.browser_data = self.scrape_api_response.scrape_result['browser_data']
self.error = self.scrape_api_response.scrape_result['error']
self.ip_address = None
if isinstance(self.content, str):
content = self.content.encode('utf-8')
elif isinstance(self.content, (BytesIO, TextIO)):
content = self.content.read()
else:
raise RuntimeError('Unsupported body %s' % type(self.content))
TextResponse.__init__(
self,
url=self.scrape_api_response.scrape_result['url'],
status=self.scrape_api_response.scrape_result['status_code'],
headers=self.scrape_api_response.scrape_result['response_headers'],
body=content,
request=request,
ip_address=None
)
@property
def __class__(self):
response_headers = self.scrape_api_response.scrape_result['response_headers']
if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0:
return HtmlResponse
elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0:
return XmlResponse
else:
return TextResponse
def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
self.scrape_api_response.sink(path=path, name=name, file=file)
Classes
class ScrapflyScrapyResponse (request: ScrapflyScrapyRequest, scrape_api_response: ScrapeApiResponse)
-
An object that represents an HTTP response, which is usually downloaded (by the Downloader) and fed to the Spiders for processing.
Expand source code
class ScrapflyScrapyResponse(TextResponse): content:Union[str, BytesIO] scrape_api_response:ScrapeApiResponse context:Dict scrape_config:ScrapeConfig log_url:str status:str config:Dict success:bool duration:float format:str screenshots:Dict dns:Optional[Dict] ssl:Optional[Dict] iframes:Dict browser_data:Dict error:Optional[Dict] DEFAULT_ENCODING = 'utf-8' def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse): self.scrape_api_response = scrape_api_response self.content = self.scrape_api_response.scrape_result['content'] self.context = self.scrape_api_response.context self.scrape_config = self.scrape_api_response.scrape_config self.log_url = self.scrape_api_response.scrape_result['log_url'] self.status = self.scrape_api_response.scrape_result['status'] self.success = self.scrape_api_response.scrape_result['success'] self.duration = self.scrape_api_response.scrape_result['duration'] self.format = self.scrape_api_response.scrape_result['format'] self.screenshots = self.scrape_api_response.scrape_result['screenshots'] self.dns = self.scrape_api_response.scrape_result['dns'] self.ssl = self.scrape_api_response.scrape_result['ssl'] self.iframes = self.scrape_api_response.scrape_result['iframes'] self.browser_data = self.scrape_api_response.scrape_result['browser_data'] self.error = self.scrape_api_response.scrape_result['error'] self.ip_address = None if isinstance(self.content, str): content = self.content.encode('utf-8') elif isinstance(self.content, (BytesIO, TextIO)): content = self.content.read() else: raise RuntimeError('Unsupported body %s' % type(self.content)) TextResponse.__init__( self, url=self.scrape_api_response.scrape_result['url'], status=self.scrape_api_response.scrape_result['status_code'], headers=self.scrape_api_response.scrape_result['response_headers'], body=content, request=request, ip_address=None ) @property def __class__(self): response_headers = self.scrape_api_response.scrape_result['response_headers'] if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0: return HtmlResponse elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0: return XmlResponse else: return TextResponse def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None): self.scrape_api_response.sink(path=path, name=name, file=file)
Ancestors
- scrapy.http.response.text.TextResponse
- scrapy.http.response.Response
- scrapy.utils.trackref.object_ref
Class variables
var DEFAULT_ENCODING
var browser_data : Dict
var config : Dict
var content : Union[str, _io.BytesIO]
var context : Dict
var dns : Optional[Dict]
var duration : float
var error : Optional[Dict]
var format : str
var iframes : Dict
var log_url : str
var scrape_api_response : ScrapeApiResponse
var scrape_config : ScrapeConfig
var screenshots : Dict
var ssl : Optional[Dict]
var status : str
var success : bool
Methods
def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Union[TextIO, _io.BytesIO, ForwardRef(None)] = None)
-
Expand source code
def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None): self.scrape_api_response.sink(path=path, name=name, file=file)