爬虫 pyquery goquery css匹配语法 智障使用模板

pyquery 用法模板 主要分为 1类匹配 写法 doc(.class), 2 id 匹配 写法doc(#id), 3标签匹配 A B="C" 写法doc(A[B="C"]) 4值匹配 A="B" 写法 B = doc("xxx").attr("A") 或B = doc("xxx").attr.A 5遍历相同类型。 for x in doc("yyyy").items() import requests from pyquery import PyQuery as pq r = requests.get("http://video.infolz.com/") r.encoding="utf-8" r1 = requests.get("http://www.infolz.com/2016/1213/303.shtml") """ <object class="cmstopVideo" width="650" height="519" data="https://imgcache.qq.com/tencentvideo_v1/playerv3/TPout.swf?max_age=86400&v=20161117&vid=v0354ryo54j&auto=0" type="application/x-shockwave-flash"> <param name="src" value="https://imgcache.qq.com/tencentvideo_v1/playerv3/TPout.swf?max_age=86400&v=20161117&vid=v0354ryo54j&auto=0"/> <param name="allowfullscreen" value="true"/> <param name="allowscriptaccess" value="true"/> <param name="wmode" value="Transparent"/> </object> """ 1单层查找 a > b 横跨多层查找用空格 a b 2选择类 . [Read More]

pymysql 模板

import pymysql # MySQL 建立连接 class MySQLPipeline(object): def __init__(self, host, port, user, password, db): self.mysql_host = host self.mysql_port = port self.mysql_user = user self.mysql_password = password self.mysql_db = db # 创建MYSQL数据库链接对象 self.conn = pymysql.connect(host=self.mysql_host, user=self.mysql_user, password=self.mysql_password, db=self.mysql_db, charset="utf8") # 查询数据 def searching(self, sql): try: with self.conn as cur: cur.execute(sql) logger.info("sql查询成功") return cur except Exception as e: print(e) logger.error(e) return None #增删改 def processing(self, sql): try: with self.conn as cur: cur.execute(sql) logger. [Read More]

python redis utils

import redis


class RedisPipline():

    def __init__(self):
        self.pool = redis.ConnectionPool(host=redis_host, port=redis_port, password=redis_password, db=redis_db)
        self.redis_conn = redis.Redis(connection_pool=self.pool)

    # set
    def set_data(self, key, data):
        self.redis_conn.set(key, data)

    # get
    def get_data(self, key):
        return self.redis_conn.get(key)

    # rpush
    def rpush_data(self, key, data):
        self.redis_conn.rpush(key, json.dumps(data))

    # lpop
    def lpop_data(self, key):
        self.redis_conn.lpop(key)

    # 删除key
    def delete_key(self, key):
        if self.redis_conn.exists(key):
            self.redis_conn.delete(key)