|
本主声明,仅学习分享,不乱扣帽子,不服的评论区见!!!
scrapy 管道建设本主有自己的思考:
1, 自动化拼接sql语句
2, 量大时启用连接池(本主有封装一下,看起来舒服)
3, 多表时 item["pop"]字段进行表识别
啥也别说,上代码
# -*- coding: utf-8 -*- # File: pipelines.py from .YuanGongZi_Dom.mysqlpoll import MysqlPool from .settings import MYSQL_HOST, MYSQL_DATABASE, MYSQL_USER, MYSQL_PASSWORD, MYSQL_PORT, MYSQL_DBTABLE class MysqlDbPipeline(object): """ 基于 pymysql 连接池的 MySql 自动化动态存储管道 """ def __init__(self): self.pdb = MysqlPool(MYSQL_HOST, MYSQL_DATABASE, MYSQL_USER, MYSQL_PASSWORD, MYSQL_PORT) # 连接池初始化 # self.pdb = pymysql.Connect(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DATABASE, MYSQL_PORT) # 连接初始化 # self.cursor = self.pdb.cursor() # pymysql 和 连接池都需要游标 def process_item(self, item, spider): data = dict(item) # MYSQL_DBTABLE = data.pop('pop') # 多表启用, kry 为 pop(目的取出),值为表名,spider中添加item中 # key = " and ".join(f'`{k}` = "{v}"' for k, v in [data.pop('条件1字段明'), data.pop('条件2字段明'), ]) # 为 3 前置 keys = ", ".join('`{}`'.format(k) for k in data.keys()) values = ', '.join('%({})s'.format(k) for k in data.keys()) kv = ", ".join('`{}`=%({})s'.format(k, k) for k in data.keys()) # sql 拼接 # 1, 忽略错误插入 # sql = 'REPLACE INTO %s (%s) VALUES (%s)' % (self.dbtable, keys, values) # 2, 插入带错误提示 # sql = 'INSERT INTO %s (%s) VALUES (%s)' % (self.surface, keys, values) # 3, 更新 # sql = 'UPDATE %s SET %s WHERE %s;' % (MYSQL_DBTABLE, kv, key) # 4, 插入重复就更新 sql = 'INSERT INTO %s(%s) VALUES(%s) ON DUPLICATE KEY UPDATE %s;' % (MYSQL_DBTABLE, keys, values, kv) try: self.pdb.insert(sql, data) # 连接池插入 # self.cursor.execute(sql, data) # pymysql 插入 # self.db.commit() # pymysql 提交 except Exception as e: print(e) return item
功能来看:
1, 实现了上述三个目的
2,前提条件是items.py中字段名与数据库中一致
3,附加实现了【insert】【replace】【update】【insert on duplicate update】四个msyql 数据操作方式
4,连接池的封装与否看个人爱好,功能单一化可自行设计
附件简单的封装代码:
您好,本帖含有特定内容,请回复后再查看。
# -*- coding: utf-8 -*- # @file: Yuan_PYMySql_poll.py import pymysql from DBUtils.PooledDB import PooledDB class MysqlPool(object): def __init__(self, host, database, user, password, port): self.POOL = PooledDB( creator=pymysql, # 使用链接数据库的模块 maxconnections=3, # 连接池允许的最大连接数,0和None表示不限制连接数 mincached=2, # 初始化时,链接池中至少创建的空闲的链接,0表示不创建 maxcached=2, # 链接池中最多闲置的链接,0和None不限制 maxshared=3, # 链接池中最多共享的链接数量,0和None表示全部共享。PS: 无用,因为pymysql和MySQLdb等模块的 threadsafety都为1, # 所有值无论设置为多少,_maxcached永远为0,所以永远是所有链接都共享。 blocking=True, # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错 maxusage=None, # 一个链接最多被重复使用的次数,None表示无限制 setsession=[], # 开始会话前执行的命令列表。如:["set datestyle to ...", "set time zone ..."] ping=0, # ping MySQL服务端,检查是否服务可用。# 如:0 = None = never, 1 = default = whenever it is requested, # 2 = when a cursor is created, 4 = when a query is executed, 7 = always host=host, port=port, user=user, password=password, database=database, charset='utf8' ) def __new__(cls, *args, **kw): """ 启用单例模式 :param args: :param kw: :return: """ if not hasattr(cls, '_instance'): cls._instance = object.__new__(cls) return cls._instance def connect(self): """ 启动连接 :return: """ conn = self.POOL.connection() cursor = conn.cursor(cursor=pymysql.cursors.DictCursor) return conn, cursor @staticmethod def connect_close(conn, cursor): """ 关闭连接 :param conn: :param cursor: :return: """ cursor.close() conn.close() def fetch_all(self, sql, args): """ 批量查询 :param sql: :param args: :return: """ conn, cursor = self.connect() cursor.execute(sql, args) record_list = cursor.fetchall() self.connect_close(conn, cursor) return record_list def fetch_one(self, sql, args): """ 查询单条数据 :param sql: :param args: :return: """ conn, cursor = self.connect() cursor.execute(sql, args) result = cursor.fetchone() self.connect_close(conn, cursor) return result def insert(self, sql, args): """ 插入数据 :param sql: :param args: :return: """ conn, cursor = self.connect() row = cursor.execute(sql, args) conn.commit() self.connect_close(conn, cursor) return row希望多多支持,素质三联,评论 转发 收藏
欢迎深入讨论,开发更简版
|
|