Merge pull request #2283 from annando/1601-worker-stuff
The worker now kills processes that run longer than 3 hours
This commit is contained in:
commit
b77dfb5737
1 changed files with 30 additions and 11 deletions
|
@ -39,8 +39,10 @@ function poller_run(&$argv, &$argc){
|
|||
}
|
||||
|
||||
// Checking the number of workers
|
||||
if (poller_too_much_workers(1))
|
||||
if (poller_too_much_workers(1)) {
|
||||
poller_kill_stale_workers();
|
||||
return;
|
||||
}
|
||||
|
||||
if(($argc <= 1) OR ($argv[1] != "no_cron")) {
|
||||
// Run the cron job that calls all other jobs
|
||||
|
@ -50,16 +52,7 @@ function poller_run(&$argv, &$argc){
|
|||
proc_run("php","include/cronhooks.php");
|
||||
|
||||
// Cleaning dead processes
|
||||
$r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'");
|
||||
foreach($r AS $pid)
|
||||
if (!posix_kill($pid["pid"], 0))
|
||||
q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d",
|
||||
intval($pid["pid"]));
|
||||
else {
|
||||
/// @TODO Kill long running processes
|
||||
/// But: Update processes (like the database update) mustn't be killed
|
||||
}
|
||||
|
||||
poller_kill_stale_workers();
|
||||
} else
|
||||
// Sleep four seconds before checking for running processes again to avoid having too many workers
|
||||
sleep(4);
|
||||
|
@ -124,6 +117,32 @@ function poller_run(&$argv, &$argc){
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief fix the queue entry if the worker process died
|
||||
*
|
||||
*/
|
||||
function poller_kill_stale_workers() {
|
||||
$r = q("SELECT `pid`, `executed` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'");
|
||||
foreach($r AS $pid)
|
||||
if (!posix_kill($pid["pid"], 0))
|
||||
q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d",
|
||||
intval($pid["pid"]));
|
||||
else {
|
||||
// Kill long running processes
|
||||
$duration = (time() - strtotime($pid["executed"])) / 60;
|
||||
if ($duration > 180) {
|
||||
logger("Worker process ".$pid["pid"]." took more than 3 hours. It will be killed now.");
|
||||
posix_kill($pid["pid"], SIGTERM);
|
||||
|
||||
// Question: If a process is stale: Should we remove it or should we reschedule it?
|
||||
// By now we rescheduling it. It's maybe not the wisest decision?
|
||||
q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d",
|
||||
intval($pid["pid"]));
|
||||
} else
|
||||
logger("Worker process ".$pid["pid"]." now runs for ".round($duration)." minutes. That's okay.", LOGGER_DEBUG);
|
||||
}
|
||||
}
|
||||
|
||||
function poller_too_much_workers($stage) {
|
||||
|
||||
$queues = get_config("system", "worker_queues");
|
||||
|
|
Loading…
Reference in a new issue