创建json保存的pipeline
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| import json import codecs
class JsonPipeline(object): def __init__(self): self.file = codecs.open('demo.json', 'w', encoding='utf-8') def process_item(self, item, spider): line = json.dumps(dict(item), ensure_ascii=False) + "\n" self.file.write(line) return item def spider_closed(self, spider): self.file.close()
|
Scrapy自带写入json
scrapy.exports中提供的导出方式:
1 2 3 4 5 6 7 8 9 10
| [ 'BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter', 'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter', 'JsonItemExporter', 'MarshalItemExporter' ]
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| from scrapy.exporters import JsonItemExporter
def __init__(self): self.file = open('demo.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting()
def start_exporting(self): self.file.write(b"[\n")
def finish_exporting(self): self.file.write(b"\n]")
def close_spider(self, spider): self.exporter.finish_exporting() self.file.close()
def process_item(self, item, spider): self.exporter.export_item(item) return item
|
在settings 设置 ITEM_PIPELINES 开启
如有问题可联系 Email:afacode@outlook.com 或 微信:afacode