pyspider 常用模板

from pyspider.libs.base_handler import * import time import re class Handler(BaseHandler): crawl_config = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4" } @every(minutes=24 * 60) def on_start(self): self.crawl( 'http://www.yntv.cn/page_list_text/picindex.html?sectionid=425&page=1&title=%E7%B2%BE%E5%BD%A9%E8%A7%86%E9%A2%91', callback=self.index_page, fetch_type='js') self.crawl( 'http://www.yntv.cn/page_list_text/picindex.html?sectionid=429&page=1&title=%E7%83%AD%E6%92%AD%E5%89%A7%E5%9C%BA', callback=self.index_page, fetch_type='js') @config(age=23 * 60 * 60) def index_page(self, response): for x, y in zip(response.doc('.video_item_img').items(), response.doc('.item_img').items()): href = x('a').attr.href self.crawl(href, callback=self.detail_page, fetch_type='js', save={'p': y.attr.src}, js_script=''' function() { return video } ''') @config(priority=10) def detail_page(self, response): try: a = re. [Read More]