Python按日期水平分割Mysql-日常笔记

水平分割表:一种是当多个过程频繁访问数据表的不同行时,水平分割表,并消除新表中的冗余数据列;若个别过程要访问整个数据,则要用连接*作,这也无妨分割表;典型案例是电信话单按月分割存放。

垂直分割表(不破坏第三范式),一种是当多个过程频繁访问表的不同列时,可将表垂直分成几个表,减少磁盘I/O(每行的数据列少,每页存的数据行就多,相应占用的页就少),更新时不必考虑锁,没有冗余数据。缺点是要在插入或删除数据时要考虑数据的完整性,用存储过程维护。

# -*- coding: UTF-8 -*-
import MySQLdb
import datetime
import time
# Mysql数据库按照存储日期水平分割的脚本 # Au:Claves # Email:claves@claves.me # Date:2016/04/24
def check_db_data_count(db_handle, db_cursor_handle, field_name, source_table_name, new_table_name, start_time, end_time):       # 校验原表数据和新表数据
    source_sql =" select count(*) from "+ source_table_name+" where ("+field_name+"  >= '"+start_time+"') and ( "+field_name+" < '"+end_time+"')"
    new_table_sql = "select count(*) from "+new_table_name
    try:
        # db_handle.commit()
        db_cursor_handle.execute(source_sql)
        source_count = db_cursor_handle.fetchone()
        db_cursor_handle.execute(new_table_sql)
        new_count = db_cursor_handle.fetchone()
        db_handle.commit()
    except:
        db_handle.rollback()
    print start_time+"-"+end_time+"::::"+"source talbe count:"+str(source_count[0])+"   new talbe count:"+str(new_count[0])
    return (source_count[0],new_count[0])

def copy_db_struct(db_cursor_handle, source_table_name, new_table_name):    # 从原表中拷贝表结构到新表中
    sql = "create table if not EXISTS " + new_table_name + " like " + source_table_name
    db_cursor_handle.execute(sql)
def copy_db_data(db_handle,db_cursor_handle,field_name,source_table_name,new_table_name,start_time,end_time):#拷贝表中数据
    sql = " insert into " + new_table_name + " select * from " + source_table_name+" where ("+field_name+"  >= '" + start_time + "') and ( "+field_name+" < '"+end_time+"')";
    try:
        db_handle.commit()
        db_cursor_handle.execute(sql)
        db_handle.commit()
    except:
        db_handle.rollback()

start_date = datetime.date(2015, 12, 1)     # 开始时间
end_date = datetime.date(2016, 4, 30)      # 结束日期
source_table_name = "historydata"      # 原数据库中表名
field_name_date = "storetime"      # 元数据库中,保存时间的字段名
space = 1       # 水平分割的时间跨度(已天为单位)
source_count = 0     # 临时变量,保存原表中总数据量
new_count = 0     # 临时变量,保存所有创建新表中总数据量
db = MySQLdb.connect("localhost", "root", "password", "dbname");
cursor = db.cursor()  #mysql cursor handle
while(start_date <= end_date):
    tmp_start_time = start_date.strftime("%Y-%m-%d 00:00:00")
    tmp_nextday_time = datetime.date.fromordinal(start_date.toordinal()+space)
    tmp_nextday_time =tmp_nextday_time.strftime("%Y-%m-%d 00:00:00")
    tmp_new_table_name = source_table_name+"_" + start_date.strftime("%Y%m%d")
    copy_db_struct(cursor, source_table_name ,tmp_new_table_name)
    copy_db_data(db, cursor, field_name_date, source_table_name, tmp_new_table_name, tmp_start_time, tmp_nextday_time)

    tmp_count = check_db_data_count(db, cursor, field_name_date, source_table_name, tmp_new_table_name, tmp_start_time, tmp_nextday_time)
    source_count += tmp_count[0]
    new_count += tmp_count[1]
    start_date = datetime.date.fromordinal(start_date.toordinal()+space)
print "source_count:"+str(source_count)+"----"+"new_count:"+str(new_count)

 

分享到:更多 ()