当表中有上千万条数据时,对数据进行分页或者分段处理时,越后的数据用 limit 就越慢,直接 limit 的方法基本可以直接弃用。有如下两种方法可以比较好的解决这个问题:
# 方法一数组拆分法:
public function process() { | |
$num = 1000000; | |
// 按 100000 对总评论数进行拆分 | |
$count = ceil($num / 100000); | |
for ($i = 0; $i < $count; $i++) { | |
$ids = array(); | |
$start = $i * 100000; | |
$cidSQL = "SELECT cid FROM rb_spacecomment_rel FORCE INDEX (PRIMARY) " | |
. "WHERE uid != 0 AND cid > 0 LIMIT $start, 100000"; | |
$cidResults = $this->_db->fetchAll($cidSQL); | |
foreach ($cidResults AS $result) { | |
$ids[] = $result['cid']; | |
} | |
$this->splitData($ids); | |
} | |
} | |
public function splitData($ids) { | |
$begin = 0; | |
$step = 1000; | |
while (true) { | |
// 取一段 ID, 对应的数据 | |
$idArr = array_slice($ids, $begin, $step); | |
if (!$idArr or ! count($idArr)) | |
break; | |
$idStr = implode(',', $idArr); | |
$sql = "SELECT * FROM rb_spacecomment_rel WHERE cid in ($idStr)"; | |
$results = $this->_db->fetchAll($sql); | |
if (!$results) | |
break; | |
// 逐个节目处理 | |
foreach ($results as $row) { | |
file_put_contents('test123.log', $row['cid'] . "\n", FILE_APPEND); | |
} | |
// 继续循环下一片段 | |
$begin+=$step; | |
if ($begin >= 100000) { | |
return; | |
} | |
} | |
} | |
耗时:7分24秒 |
# 方法二 lastid 法:
public function process() { | |
$num = 1000000; | |
$lastcid = 0; | |
$begin = 0; | |
$step = 1000; | |
while (true) { | |
$cidSQL = "SELECT * FROM rb_spacecomment_rel FORCE INDEX (PRIMARY) " | |
. "WHERE uid != 0 AND cid > $lastcid LIMIT $step"; | |
$cidResults = $this->_db->fetchAll($cidSQL); | |
foreach ($cidResults as $row) { | |
file_put_contents('test1234.log', $row['cid'] . "\n", FILE_APPEND); | |
} | |
$lastcid = $row['cid']; | |
$begin+=$step; | |
if ($begin >= 1000000) { | |
return; | |
} | |
} | |
} | |
耗时:6分47秒 |
从代码上及耗时上看很明显方法二要好很多,而且随着数据量的增大,方法一需要在外层拆分更多次,增加的耗时更多。