python爬虫万能代码 网络爬虫软件有哪些( 二 )

若字段有默认值或者自增,则默认注释掉,可按需打开 。大家可以看到我这张表的 id 字段在这里被注释了 。
若item字段过多,不想逐一赋值,可通过如下方式创建:

feapder create -i report 1
这时候生成的实体类是这样的:
class ReportItem(Item):"""This class was generated by feapder.command: feapder create -i report 1."""__table_name__ = "report 1"def __init__(self, *args, **kwargs):self.count = kwargs.get('count')self.emRatingName = kwargs.get('emRatingName')# 评级名称self.emRatingValue = https://www.shwenmu.com/wenda/kwargs.get('emRatingValue')# 评级代码self.encodeUrl = kwargs.get('encodeUrl')# 链接# self.id = kwargs.get('id')self.indvInduCode = kwargs.get('indvInduCode')# 行业代码self.indvInduName = kwargs.get('indvInduName')# 行业名称self.lastEmRatingName = kwargs.get('lastEmRatingName')# 上次评级名称self.lastEmRatingValue = https://www.shwenmu.com/wenda/kwargs.get('lastEmRatingValue')# 上次评级代码self.orgCode = kwargs.get('orgCode')# 机构代码self.orgName = kwargs.get('orgName')# 机构名称self.orgSName = kwargs.get('orgSName')# 机构简称self.predictNextTwoYearEps = kwargs.get('predictNextTwoYearEps')self.predictNextTwoYearPe = kwargs.get('predictNextTwoYearPe')self.predictNextYearEps = kwargs.get('predictNextYearEps')self.predictNextYearPe = kwargs.get('predictNextYearPe')self.predictThisYearEps = kwargs.get('predictThisYearEps')self.predictThisYearPe = kwargs.get('predictThisYearPe')self.publishDate = kwargs.get('publishDate')# 发表时间self.ratingChange = kwargs.get('ratingChange')# 评级变动self.researcher = kwargs.get('researcher')# 研究员self.stockCode = kwargs.get('stockCode')# 股票代码self.stockName = kwargs.get('stockName')# 股票简称self.title = kwargs.get('title')# 报告名称这样当我们请求回来的json数据时,可直接赋值,如:
response_data = https://www.shwenmu.com/wenda/{"title":" 测试"} # 模拟请求回来的数据item = SpiderDataItem(**response_data)想要数据自动入库也比较简单,在解析完数据之后,将数据赋值给 Item,然后 yield 就行了:
def parse(self, request, response):html = response.content.decode("utf-8")if len(html):content = html.replace('datatable1351846(', '')[:-1]content_json = json.loads(content)print(content_json)for obj in content_json['data']:result = ReportItem()result['orgName'] = obj['orgName'] #机构名称result['orgSName'] = obj['orgSName'] #机构简称result['publishDate'] = obj['publishDate'] #发布日期result['predictNextTwoYearEps'] = obj['predictNextTwoYearEps'] #后年每股盈利result['title'] = obj['title'] #报告名称result['stockName'] = obj['stockName'] #股票名称result['stockCode'] = obj['stockCode'] #股票coderesult['orgCode'] = obj['stockCode'] #机构coderesult['predictNextTwoYearPe'] = obj['predictNextTwoYearPe'] #后年市盈率result['predictNextYearEps'] = obj['predictNextYearEps'] # 明年每股盈利result['predictNextYearPe'] = obj['predictNextYearPe'] # 明年市盈率result['predictThisYearEps'] = obj['predictThisYearEps'] #今年每股盈利result['predictThisYearPe'] = obj['predictThisYearPe'] #今年市盈率result['indvInduCode'] = obj['indvInduCode'] # 行业代码result['indvInduName'] = obj['indvInduName'] # 行业名称result['lastEmRatingName'] = obj['lastEmRatingName'] # 上次评级名称result['lastEmRatingValue'] = obj['lastEmRatingValue'] # 上次评级代码result['emRatingValue'] = obj['emRatingValue'] # 评级代码result['emRatingName'] = obj['emRatingName'] # 评级名称result['ratingChange'] = obj['ratingChange'] # 评级变动result['researcher'] = obj['researcher'] # 研究员result['encodeUrl'] = obj['encodeUrl'] # 链接result['count'] = int(obj['count']) # 近一月个股研报数yield result返回item后,item 会流进到框架的 ItemBuffer, ItemBuffer 每.05秒或当item数量积攒到5000个,便会批量将这些 item 批量入库 。表名为类名去掉 Item 的小写,如 ReportItem 数据会落入到 report 表 。

推荐阅读