From 17b1d459687ec068dd5bd968196f8afc8660b14d Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 9 Sep 2015 22:42:31 +0200 Subject: [PATCH 01/18] Worker: New method for running background processes --- boot.php | 22 +++++++++++-- include/dbstructure.php | 14 +++++++++ include/worker.php | 68 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100755 include/worker.php diff --git a/boot.php b/boot.php index 7451891fe..7f3238013 100644 --- a/boot.php +++ b/boot.php @@ -1432,8 +1432,25 @@ if(! function_exists('proc_run')) { if(! $arr['run_cmd']) return; - if(count($args) && $args[0] === 'php') + if(count($args) && $args[0] === 'php') { + $argv = $args; + array_shift($argv); + + $parameters = json_encode($argv); + $found = q("SELECT `id` FROM `workerqueue` WHERE `parameter` = '%s'", + dbesc($parameters)); + + if (!$found) + q("INSERT INTO `workerqueue` (`parameter`, `created`, `priority`) + VALUES ('%s', '%s', %d)", + dbesc($parameters), + dbesc(datetime_convert()), + intval(0)); + + // return; + $args[0] = ((x($a->config,'php_path')) && (strlen($a->config['php_path'])) ? $a->config['php_path'] : 'php'); + } // add baseurl to args. cli scripts can't construct it $args[] = $a->get_baseurl(); @@ -1441,9 +1458,8 @@ if(! function_exists('proc_run')) { for($x = 0; $x < count($args); $x ++) $args[$x] = escapeshellarg($args[$x]); - - $cmdline = implode($args," "); + if(get_config('system','proc_windows')) proc_close(proc_open('cmd /c start /b ' . $cmdline,array(),$foo,dirname(__FILE__))); else diff --git a/include/dbstructure.php b/include/dbstructure.php index 2b1ee84fd..deb6ddf53 100644 --- a/include/dbstructure.php +++ b/include/dbstructure.php @@ -1382,6 +1382,20 @@ function db_definition() { "username" => array("username"), ) ); + $database["workerqueue"] = array( + "fields" => array( + "id" => array("type" => "int(11)", "not null" => "1", "extra" => "auto_increment", "primary" => "1"), + "parameter" => array("type" => "text", "not null" => "1"), + "priority" => array("type" => "tinyint(3) unsigned", "not null" => "1", "default" => "0"), + "created" => array("type" => "datetime", "not null" => "1", "default" => "0000-00-00 00:00:00"), + "pid" => array("type" => "int(11)", "not null" => "1", "default" => "0"), + "executed" => array("type" => "datetime", "not null" => "1", "default" => "0000-00-00 00:00:00"), + ), + "indexes" => array( + "PRIMARY" => array("id"), + "created" => array("created"), + ) + ); return($database); } diff --git a/include/worker.php b/include/worker.php new file mode 100755 index 000000000..e1eee388c --- /dev/null +++ b/include/worker.php @@ -0,0 +1,68 @@ +#!/usr/bin/php += $threads) + return; + +while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", + dbesc(datetime_convert()), + intval(getmypid()), + intval($r[0]["id"])); + + $argv = json_decode($r[0]["parameter"]); + + $argc = count($argv); + + // To-Do: Check for existance + require_once(basename($argv[0])); + + $funcname=str_replace(".php", "", basename($argv[0]))."_run"; + + if (function_exists($funcname)) { + logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]); + //$funcname($argv, $argc); + sleep(10); + logger("Process ".getmypid().": ".$funcname." - done"); + + q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); + } +} +?> From d3a6ebfe7e7ba2fca35968c0ffc602f332a7c4a1 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 10 Sep 2015 23:10:31 +0200 Subject: [PATCH 02/18] The worker is now working --- boot.php | 27 +++++++++++++++------------ include/onepoll.php | 2 +- include/poller.php | 19 ++----------------- include/socgraph.php | 6 ++++-- include/worker.php | 31 +++++++++++++++++++++++++++---- 5 files changed, 49 insertions(+), 36 deletions(-) diff --git a/boot.php b/boot.php index 7f3238013..b3b926521 100644 --- a/boot.php +++ b/boot.php @@ -1433,21 +1433,24 @@ if(! function_exists('proc_run')) { return; if(count($args) && $args[0] === 'php') { - $argv = $args; - array_shift($argv); - $parameters = json_encode($argv); - $found = q("SELECT `id` FROM `workerqueue` WHERE `parameter` = '%s'", - dbesc($parameters)); + if (get_config("system", "worker")) { + $argv = $args; + array_shift($argv); - if (!$found) - q("INSERT INTO `workerqueue` (`parameter`, `created`, `priority`) - VALUES ('%s', '%s', %d)", - dbesc($parameters), - dbesc(datetime_convert()), - intval(0)); + $parameters = json_encode($argv); + $found = q("SELECT `id` FROM `workerqueue` WHERE `parameter` = '%s'", + dbesc($parameters)); - // return; + if (!$found) + q("INSERT INTO `workerqueue` (`parameter`, `created`, `priority`) + VALUES ('%s', '%s', %d)", + dbesc($parameters), + dbesc(datetime_convert()), + intval(0)); + + return; + } $args[0] = ((x($a->config,'php_path')) && (strlen($a->config['php_path'])) ? $a->config['php_path'] : 'php'); } diff --git a/include/onepoll.php b/include/onepoll.php index 1fc861afa..e8fc97b21 100644 --- a/include/onepoll.php +++ b/include/onepoll.php @@ -360,7 +360,7 @@ function onepoll_run(&$argv, &$argc){ ); logger("Mail: Connected to " . $mailconf[0]['user']); } else - logger("Mail: Connection error ".$mailconf[0]['user']." ".print_r(imap_errors())); + logger("Mail: Connection error ".$mailconf[0]['user']." ".print_r(imap_errors(), true)); } if($mbox) { diff --git a/include/poller.php b/include/poller.php index 28dc0c0cd..e47ab3782 100644 --- a/include/poller.php +++ b/include/poller.php @@ -75,22 +75,6 @@ function poller_run(&$argv, &$argc){ logger('poller: start'); - // run queue delivery process in the background - - proc_run('php',"include/queue.php"); - - // run diaspora photo queue process in the background - - proc_run('php',"include/dsprphotoq.php"); - - // run the process to discover global contacts in the background - - proc_run('php',"include/discover_poco.php"); - - // run the process to update locally stored global contacts in the background - - proc_run('php',"include/discover_poco.php", "checkcontact"); - // expire any expired accounts q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 @@ -119,7 +103,8 @@ function poller_run(&$argv, &$argc){ check_conversations(false); // Follow your friends from your legacy OStatus account - ostatus_check_follow_friends(); + // Doesn't work + // ostatus_check_follow_friends(); // update nodeinfo data nodeinfo_cron(); diff --git a/include/socgraph.php b/include/socgraph.php index 97daae1d2..6e2b6ea15 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1338,8 +1338,10 @@ function poco_discover($complete = false) { q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); if (!$complete AND (--$no_of_queries == 0)) break; - } else // If the server hadn't replied correctly, then force a sanity check - poco_check_server($server["url"], $server["network"], true); + // If the server hadn't replied correctly, then force a sanity check + } elseif (!poco_check_server($server["url"], $server["network"], true)) + q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); + } } diff --git a/include/worker.php b/include/worker.php index e1eee388c..c1d9202ba 100755 --- a/include/worker.php +++ b/include/worker.php @@ -26,6 +26,25 @@ if(is_null($db)) { unset($db_host, $db_user, $db_pass, $db_data); }; +// run queue delivery process in the background + +proc_run('php',"include/queue.php"); + +// run diaspora photo queue process in the background + +proc_run('php',"include/dsprphotoq.php"); + +// run the process to discover global contacts in the background + +proc_run('php',"include/discover_poco.php"); + +// run the process to update locally stored global contacts in the background + +proc_run('php',"include/discover_poco.php", "checkcontact"); + +// When everything else is done ... +proc_run("php","include/poller.php"); + // Cleaning killed processes $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); foreach($r AS $pid) @@ -36,9 +55,12 @@ foreach($r AS $pid) // Checking number of workers $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); -$threads = 3; +$queues = intval(get_config("system", "worker_queues")); -if ($workers[0]["workers"] >= $threads) +if ($queues == 0) + $queues = 4; + +if ($workers[0]["workers"] >= $queues) return; while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { @@ -58,11 +80,12 @@ while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00: if (function_exists($funcname)) { logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]); - //$funcname($argv, $argc); - sleep(10); + $funcname($argv, $argc); + //sleep(10); logger("Process ".getmypid().": ".$funcname." - done"); q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); } } + ?> From ff739b0a2320350e27cb5bbb9d915d81734363e3 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 10 Sep 2015 23:32:56 +0200 Subject: [PATCH 03/18] Just changed some script names --- include/cron.php | 308 +++++++++++++++++++++++++++++++++++++++++++++ include/poller.php | 299 +++++++------------------------------------ include/worker.php | 91 -------------- 3 files changed, 352 insertions(+), 346 deletions(-) create mode 100644 include/cron.php delete mode 100755 include/worker.php diff --git a/include/cron.php b/include/cron.php new file mode 100644 index 000000000..ea7fd2c60 --- /dev/null +++ b/include/cron.php @@ -0,0 +1,308 @@ + $maxsysload) { + logger('system: load ' . $load[0] . ' too high. cron deferred to next scheduled run.'); + return; + } + } + + $lockpath = get_lockpath(); + if ($lockpath != '') { + $pidfile = new pidfile($lockpath, 'cron'); + if($pidfile->is_already_running()) { + logger("cron: Already running"); + if ($pidfile->running_time() > 9*60) { + $pidfile->kill(); + logger("cron: killed stale process"); + // Calling a new instance + proc_run('php','include/cron.php'); + } + exit; + } + } + + + + $a->set_baseurl(get_config('system','url')); + + load_hooks(); + + logger('cron: start'); + + // expire any expired accounts + + q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 + AND `account_expires_on` != '0000-00-00 00:00:00' + AND `account_expires_on` < UTC_TIMESTAMP() "); + + // delete user and contact records for recently removed accounts + + $r = q("SELECT * FROM `user` WHERE `account_removed` = 1 AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY"); + if ($r) { + foreach($r as $user) { + q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid'])); + q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid'])); + } + } + + $abandon_days = intval(get_config('system','account_abandon_days')); + if($abandon_days < 1) + $abandon_days = 0; + + // Check OStatus conversations + // Check only conversations with mentions (for a longer time) + check_conversations(true); + + // Check every conversation + check_conversations(false); + + // Follow your friends from your legacy OStatus account + // Doesn't work + // ostatus_check_follow_friends(); + + // update nodeinfo data + nodeinfo_cron(); + + // To-Do: Regenerate usage statistics + // q("ANALYZE TABLE `item`"); + + // once daily run birthday_updates and then expire in background + + $d1 = get_config('system','last_expire_day'); + $d2 = intval(datetime_convert('UTC','UTC','now','d')); + + if($d2 != intval($d1)) { + + update_contact_birthdays(); + + update_suggestions(); + + set_config('system','last_expire_day',$d2); + proc_run('php','include/expire.php'); + } + + $last = get_config('system','cache_last_cleared'); + + if($last) { + $next = $last + (3600); // Once per hour + $clear_cache = ($next <= time()); + } else + $clear_cache = true; + + if ($clear_cache) { + // clear old cache + Cache::clear(); + + // clear old item cache files + clear_cache(); + + // clear cache for photos + clear_cache($a->get_basepath(), $a->get_basepath()."/photo"); + + // clear smarty cache + clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled"); + + // clear cache for image proxy + if (!get_config("system", "proxy_disabled")) { + clear_cache($a->get_basepath(), $a->get_basepath()."/proxy"); + + $cachetime = get_config('system','proxy_cache_time'); + if (!$cachetime) $cachetime = PROXY_DEFAULT_TIME; + + q('DELETE FROM `photo` WHERE `uid` = 0 AND `resource-id` LIKE "pic:%%" AND `created` < NOW() - INTERVAL %d SECOND', $cachetime); + } + + set_config('system','cache_last_cleared', time()); + } + + $manual_id = 0; + $generation = 0; + $force = false; + $restart = false; + + if(($argc > 1) && ($argv[1] == 'force')) + $force = true; + + if(($argc > 1) && ($argv[1] == 'restart')) { + $restart = true; + $generation = intval($argv[2]); + if(! $generation) + killme(); + } + + if(($argc > 1) && intval($argv[1])) { + $manual_id = intval($argv[1]); + $force = true; + } + + $interval = intval(get_config('system','poll_interval')); + if(! $interval) + $interval = ((get_config('system','delivery_interval') === false) ? 3 : intval(get_config('system','delivery_interval'))); + + $sql_extra = (($manual_id) ? " AND `id` = $manual_id " : ""); + + reload_plugins(); + + $d = datetime_convert(); + + if(! $restart) + proc_run('php','include/cronhooks.php'); + + // Only poll from those with suitable relationships, + // and which have a polling address and ignore Diaspora since + // we are unable to match those posts with a Diaspora GUID and prevent duplicates. + + $abandon_sql = (($abandon_days) + ? sprintf(" AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL %d DAY ", intval($abandon_days)) + : '' + ); + + $contacts = q("SELECT `contact`.`id` FROM `contact` INNER JOIN `user` ON `user`.`uid` = `contact`.`uid` + WHERE `rel` IN (%d, %d) AND `poll` != '' AND `network` IN ('%s', '%s', '%s', '%s', '%s', '%s') + $sql_extra + AND NOT `self` AND NOT `contact`.`blocked` AND NOT `contact`.`readonly` AND NOT `contact`.`archive` + AND NOT `user`.`account_expired` AND NOT `user`.`account_removed` $abandon_sql ORDER BY RAND()", + intval(CONTACT_IS_SHARING), + intval(CONTACT_IS_FRIEND), + dbesc(NETWORK_DFRN), + dbesc(NETWORK_ZOT), + dbesc(NETWORK_OSTATUS), + dbesc(NETWORK_FEED), + dbesc(NETWORK_MAIL), + dbesc(NETWORK_MAIL2) + ); + + if(! count($contacts)) { + return; + } + + foreach($contacts as $c) { + + $res = q("SELECT * FROM `contact` WHERE `id` = %d LIMIT 1", + intval($c['id']) + ); + + if((! $res) || (! count($res))) + continue; + + foreach($res as $contact) { + + $xml = false; + + if($manual_id) + $contact['last-update'] = '0000-00-00 00:00:00'; + + if(in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) + $contact['priority'] = 2; + + if($contact['subhub'] AND in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) { + // We should be getting everything via a hub. But just to be sure, let's check once a day. + // (You can make this more or less frequent if desired by setting 'pushpoll_frequency' appropriately) + // This also lets us update our subscription to the hub, and add or replace hubs in case it + // changed. We will only update hubs once a day, regardless of 'pushpoll_frequency'. + + $poll_interval = get_config('system','pushpoll_frequency'); + $contact['priority'] = (($poll_interval !== false) ? intval($poll_interval) : 3); + } + + if($contact['priority'] AND !$force) { + + $update = false; + + $t = $contact['last-update']; + + /** + * Based on $contact['priority'], should we poll this site now? Or later? + */ + + switch ($contact['priority']) { + case 5: + if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 month")) + $update = true; + break; + case 4: + if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 week")) + $update = true; + break; + case 3: + if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 day")) + $update = true; + break; + case 2: + if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 12 hour")) + $update = true; + break; + case 1: + default: + if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 hour")) + $update = true; + break; + } + if(!$update) + continue; + } + + logger("Polling ".$contact["network"]." ".$contact["id"]." ".$contact["nick"]." ".$contact["name"]); + + proc_run('php','include/onepoll.php',$contact['id']); + + if($interval) + @time_sleep_until(microtime(true) + (float) $interval); + } + } + + logger('cron: end'); + + return; +} + +if (array_search(__file__,get_included_files())===0){ + cron_run($_SERVER["argv"],$_SERVER["argc"]); + killme(); +} diff --git a/include/poller.php b/include/poller.php index e47ab3782..05e936936 100644 --- a/include/poller.php +++ b/include/poller.php @@ -12,7 +12,6 @@ if (!file_exists("boot.php") AND (sizeof($_SERVER["argv"]) != 0)) { require_once("boot.php"); - function poller_run(&$argv, &$argc){ global $a, $db; @@ -21,288 +20,78 @@ function poller_run(&$argv, &$argc){ } if(is_null($db)) { - @include(".htconfig.php"); - require_once("include/dba.php"); - $db = new dba($db_host, $db_user, $db_pass, $db_data); - unset($db_host, $db_user, $db_pass, $db_data); - }; + @include(".htconfig.php"); + require_once("include/dba.php"); + $db = new dba($db_host, $db_user, $db_pass, $db_data); + unset($db_host, $db_user, $db_pass, $db_data); + }; + // run queue delivery process in the background - require_once('include/session.php'); - require_once('include/datetime.php'); - require_once('library/simplepie/simplepie.inc'); - require_once('include/items.php'); - require_once('include/Contact.php'); - require_once('include/email.php'); - require_once('include/socgraph.php'); - require_once('include/pidfile.php'); - require_once('mod/nodeinfo.php'); + proc_run('php',"include/queue.php"); - load_config('config'); - load_config('system'); + // run diaspora photo queue process in the background - $maxsysload = intval(get_config('system','maxloadavg')); - if($maxsysload < 1) - $maxsysload = 50; - if(function_exists('sys_getloadavg')) { - $load = sys_getloadavg(); - if(intval($load[0]) > $maxsysload) { - logger('system: load ' . $load[0] . ' too high. Poller deferred to next scheduled run.'); - return; - } - } + proc_run('php',"include/dsprphotoq.php"); - $lockpath = get_lockpath(); - if ($lockpath != '') { - $pidfile = new pidfile($lockpath, 'poller'); - if($pidfile->is_already_running()) { - logger("poller: Already running"); - if ($pidfile->running_time() > 9*60) { - $pidfile->kill(); - logger("poller: killed stale process"); - // Calling a new instance - proc_run('php','include/poller.php'); - } - exit; - } - } + // run the process to discover global contacts in the background + proc_run('php',"include/discover_poco.php"); + // run the process to update locally stored global contacts in the background - $a->set_baseurl(get_config('system','url')); + proc_run('php',"include/discover_poco.php", "checkcontact"); - load_hooks(); + // When everything else is done ... + proc_run("php","include/cron.php"); - logger('poller: start'); + // Cleaning killed processes + $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); + foreach($r AS $pid) + if (!posix_kill($pid["pid"], 0)) + q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", + intval($pid["pid"])); - // expire any expired accounts + // Checking number of workers + $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); - q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 - AND `account_expires_on` != '0000-00-00 00:00:00' - AND `account_expires_on` < UTC_TIMESTAMP() "); + $queues = intval(get_config("system", "worker_queues")); - // delete user and contact records for recently removed accounts + if ($queues == 0) + $queues = 4; - $r = q("SELECT * FROM `user` WHERE `account_removed` = 1 AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY"); - if ($r) { - foreach($r as $user) { - q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid'])); - q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid'])); - } - } - - $abandon_days = intval(get_config('system','account_abandon_days')); - if($abandon_days < 1) - $abandon_days = 0; - - // Check OStatus conversations - // Check only conversations with mentions (for a longer time) - check_conversations(true); - - // Check every conversation - check_conversations(false); - - // Follow your friends from your legacy OStatus account - // Doesn't work - // ostatus_check_follow_friends(); - - // update nodeinfo data - nodeinfo_cron(); - - // To-Do: Regenerate usage statistics - // q("ANALYZE TABLE `item`"); - - // once daily run birthday_updates and then expire in background - - $d1 = get_config('system','last_expire_day'); - $d2 = intval(datetime_convert('UTC','UTC','now','d')); - - if($d2 != intval($d1)) { - - update_contact_birthdays(); - - update_suggestions(); - - set_config('system','last_expire_day',$d2); - proc_run('php','include/expire.php'); - } - - $last = get_config('system','cache_last_cleared'); - - if($last) { - $next = $last + (3600); // Once per hour - $clear_cache = ($next <= time()); - } else - $clear_cache = true; - - if ($clear_cache) { - // clear old cache - Cache::clear(); - - // clear old item cache files - clear_cache(); - - // clear cache for photos - clear_cache($a->get_basepath(), $a->get_basepath()."/photo"); - - // clear smarty cache - clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled"); - - // clear cache for image proxy - if (!get_config("system", "proxy_disabled")) { - clear_cache($a->get_basepath(), $a->get_basepath()."/proxy"); - - $cachetime = get_config('system','proxy_cache_time'); - if (!$cachetime) $cachetime = PROXY_DEFAULT_TIME; - - q('DELETE FROM `photo` WHERE `uid` = 0 AND `resource-id` LIKE "pic:%%" AND `created` < NOW() - INTERVAL %d SECOND', $cachetime); - } - - set_config('system','cache_last_cleared', time()); - } - - $manual_id = 0; - $generation = 0; - $force = false; - $restart = false; - - if(($argc > 1) && ($argv[1] == 'force')) - $force = true; - - if(($argc > 1) && ($argv[1] == 'restart')) { - $restart = true; - $generation = intval($argv[2]); - if(! $generation) - killme(); - } - - if(($argc > 1) && intval($argv[1])) { - $manual_id = intval($argv[1]); - $force = true; - } - - $interval = intval(get_config('system','poll_interval')); - if(! $interval) - $interval = ((get_config('system','delivery_interval') === false) ? 3 : intval(get_config('system','delivery_interval'))); - - $sql_extra = (($manual_id) ? " AND `id` = $manual_id " : ""); - - reload_plugins(); - - $d = datetime_convert(); - - if(! $restart) - proc_run('php','include/cronhooks.php'); - - // Only poll from those with suitable relationships, - // and which have a polling address and ignore Diaspora since - // we are unable to match those posts with a Diaspora GUID and prevent duplicates. - - $abandon_sql = (($abandon_days) - ? sprintf(" AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL %d DAY ", intval($abandon_days)) - : '' - ); - - $contacts = q("SELECT `contact`.`id` FROM `contact` INNER JOIN `user` ON `user`.`uid` = `contact`.`uid` - WHERE `rel` IN (%d, %d) AND `poll` != '' AND `network` IN ('%s', '%s', '%s', '%s', '%s', '%s') - $sql_extra - AND NOT `self` AND NOT `contact`.`blocked` AND NOT `contact`.`readonly` AND NOT `contact`.`archive` - AND NOT `user`.`account_expired` AND NOT `user`.`account_removed` $abandon_sql ORDER BY RAND()", - intval(CONTACT_IS_SHARING), - intval(CONTACT_IS_FRIEND), - dbesc(NETWORK_DFRN), - dbesc(NETWORK_ZOT), - dbesc(NETWORK_OSTATUS), - dbesc(NETWORK_FEED), - dbesc(NETWORK_MAIL), - dbesc(NETWORK_MAIL2) - ); - - if(! count($contacts)) { + if ($workers[0]["workers"] >= $queues) return; - } - foreach($contacts as $c) { + while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", + dbesc(datetime_convert()), + intval(getmypid()), + intval($r[0]["id"])); - $res = q("SELECT * FROM `contact` WHERE `id` = %d LIMIT 1", - intval($c['id']) - ); + $argv = json_decode($r[0]["parameter"]); - if((! $res) || (! count($res))) - continue; + $argc = count($argv); - foreach($res as $contact) { + // To-Do: Check for existance + require_once(basename($argv[0])); - $xml = false; + $funcname=str_replace(".php", "", basename($argv[0]))."_run"; - if($manual_id) - $contact['last-update'] = '0000-00-00 00:00:00'; + if (function_exists($funcname)) { + logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]); + $funcname($argv, $argc); + //sleep(10); + logger("Process ".getmypid().": ".$funcname." - done"); - if(in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) - $contact['priority'] = 2; - - if($contact['subhub'] AND in_array($contact['network'], array(NETWORK_DFRN, NETWORK_ZOT, NETWORK_OSTATUS))) { - // We should be getting everything via a hub. But just to be sure, let's check once a day. - // (You can make this more or less frequent if desired by setting 'pushpoll_frequency' appropriately) - // This also lets us update our subscription to the hub, and add or replace hubs in case it - // changed. We will only update hubs once a day, regardless of 'pushpoll_frequency'. - - $poll_interval = get_config('system','pushpoll_frequency'); - $contact['priority'] = (($poll_interval !== false) ? intval($poll_interval) : 3); - } - - if($contact['priority'] AND !$force) { - - $update = false; - - $t = $contact['last-update']; - - /** - * Based on $contact['priority'], should we poll this site now? Or later? - */ - - switch ($contact['priority']) { - case 5: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 month")) - $update = true; - break; - case 4: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 week")) - $update = true; - break; - case 3: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 day")) - $update = true; - break; - case 2: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 12 hour")) - $update = true; - break; - case 1: - default: - if(datetime_convert('UTC','UTC', 'now') > datetime_convert('UTC','UTC', $t . " + 1 hour")) - $update = true; - break; - } - if(!$update) - continue; - } - - logger("Polling ".$contact["network"]." ".$contact["id"]." ".$contact["nick"]." ".$contact["name"]); - - proc_run('php','include/onepoll.php',$contact['id']); - - if($interval) - @time_sleep_until(microtime(true) + (float) $interval); + q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); } } - logger('poller: end'); - - return; } if (array_search(__file__,get_included_files())===0){ poller_run($_SERVER["argv"],$_SERVER["argc"]); killme(); } +?> diff --git a/include/worker.php b/include/worker.php deleted file mode 100755 index c1d9202ba..000000000 --- a/include/worker.php +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/php -= $queues) - return; - -while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { - q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", - dbesc(datetime_convert()), - intval(getmypid()), - intval($r[0]["id"])); - - $argv = json_decode($r[0]["parameter"]); - - $argc = count($argv); - - // To-Do: Check for existance - require_once(basename($argv[0])); - - $funcname=str_replace(".php", "", basename($argv[0]))."_run"; - - if (function_exists($funcname)) { - logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]); - $funcname($argv, $argc); - //sleep(10); - logger("Process ".getmypid().": ".$funcname." - done"); - - q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); - } -} - -?> From 32e8f3468d17f9f84b308aa903f7efa1fa22441f Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 11 Sep 2015 21:35:58 +0200 Subject: [PATCH 04/18] Moved some functionality back to the cron. Speed up things --- include/cron.php | 32 ++++++++++++++++++++++++++++++++ include/follow.php | 8 +++++--- include/onepoll.php | 14 ++++++++++++-- include/poller.php | 20 ++------------------ include/queue.php | 23 +++++++++++++++-------- include/socgraph.php | 16 +++++++++++----- 6 files changed, 77 insertions(+), 36 deletions(-) diff --git a/include/cron.php b/include/cron.php index ea7fd2c60..712befb1f 100644 --- a/include/cron.php +++ b/include/cron.php @@ -52,6 +52,20 @@ function cron_run(&$argv, &$argc){ } } + $last = get_config('system','last_cron'); + + $poll_interval = intval(get_config('system','cron_interval')); + if(! $poll_interval) + $poll_interval = 10; + + if($last) { + $next = $last + ($poll_interval * 60); + if($next > time()) { + logger('cron intervall not reached'); + return; + } + } + $lockpath = get_lockpath(); if ($lockpath != '') { $pidfile = new pidfile($lockpath, 'cron'); @@ -75,6 +89,22 @@ function cron_run(&$argv, &$argc){ logger('cron: start'); + // run queue delivery process in the background + + proc_run('php',"include/queue.php"); + + // run diaspora photo queue process in the background + + proc_run('php',"include/dsprphotoq.php"); + + // run the process to discover global contacts in the background + + proc_run('php',"include/discover_poco.php"); + + // run the process to update locally stored global contacts in the background + + proc_run('php',"include/discover_poco.php", "checkcontact"); + // expire any expired accounts q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 @@ -299,6 +329,8 @@ function cron_run(&$argv, &$argc){ logger('cron: end'); + set_config('system','last_cron', time()); + return; } diff --git a/include/follow.php b/include/follow.php index 217b9d07b..ca0228cc0 100644 --- a/include/follow.php +++ b/include/follow.php @@ -9,13 +9,13 @@ function update_contact($id) { $r = q("SELECT `url`, `nurl`, `addr`, `alias`, `batch`, `notify`, `poll`, `poco`, `network` FROM `contact` WHERE `id` = %d", intval($id)); if (!$r) - return; + return false; $ret = probe_url($r[0]["url"]); // If probe_url fails the network code will be different if ($ret["network"] != $r[0]["network"]) - return; + return false; $update = false; @@ -29,7 +29,7 @@ function update_contact($id) { } if (!$update) - return; + return true; q("UPDATE `contact` SET `url` = '%s', `nurl` = '%s', `addr` = '%s', `alias` = '%s', `batch` = '%s', `notify` = '%s', `poll` = '%s', `poco` = '%s' WHERE `id` = %d", dbesc($ret['url']), @@ -42,6 +42,8 @@ function update_contact($id) { dbesc($ret['poco']), intval($id) ); + + return true; } // diff --git a/include/onepoll.php b/include/onepoll.php index e8fc97b21..0e58a776c 100644 --- a/include/onepoll.php +++ b/include/onepoll.php @@ -168,8 +168,18 @@ function onepoll_run(&$argv, &$argc){ ); // Update the contact entry - if(($contact['network'] === NETWORK_OSTATUS) || ($contact['network'] === NETWORK_DIASPORA) || ($contact['network'] === NETWORK_DFRN)) - update_contact($contact["id"]); + if(($contact['network'] === NETWORK_OSTATUS) || ($contact['network'] === NETWORK_DIASPORA) || ($contact['network'] === NETWORK_DFRN)) { + if (!poco_reachable($contact['url'])) { + logger("Skipping probably dead contact ".$contact['url']); + return; + } + + if (!update_contact($contact["id"])) { + mark_for_death($contact); + return; + } else + unmark_for_death($contact); + } if($contact['network'] === NETWORK_DFRN) { diff --git a/include/poller.php b/include/poller.php index 05e936936..74d23a548 100644 --- a/include/poller.php +++ b/include/poller.php @@ -26,23 +26,7 @@ function poller_run(&$argv, &$argc){ unset($db_host, $db_user, $db_pass, $db_data); }; - // run queue delivery process in the background - - proc_run('php',"include/queue.php"); - - // run diaspora photo queue process in the background - - proc_run('php',"include/dsprphotoq.php"); - - // run the process to discover global contacts in the background - - proc_run('php',"include/discover_poco.php"); - - // run the process to update locally stored global contacts in the background - - proc_run('php',"include/discover_poco.php", "checkcontact"); - - // When everything else is done ... + // Run the cron job that calls all other jobs proc_run("php","include/cron.php"); // Cleaning killed processes @@ -81,7 +65,7 @@ function poller_run(&$argv, &$argc){ if (function_exists($funcname)) { logger("Process ".getmypid().": ".$funcname." ".$r[0]["parameter"]); $funcname($argv, $argc); - //sleep(10); + logger("Process ".getmypid().": ".$funcname." - done"); q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); diff --git a/include/queue.php b/include/queue.php index 0edd64fdb..3f6686ec6 100644 --- a/include/queue.php +++ b/include/queue.php @@ -22,6 +22,7 @@ function queue_run(&$argv, &$argc){ require_once('include/items.php'); require_once('include/bbcode.php'); require_once('include/pidfile.php'); + require_once('include/socgraph.php'); load_config('config'); load_config('system'); @@ -88,7 +89,7 @@ function queue_run(&$argv, &$argc){ else { // For the first 12 hours we'll try to deliver every 15 minutes - // After that, we'll only attempt delivery once per hour. + // After that, we'll only attempt delivery once per hour. $r = q("SELECT `id` FROM `queue` WHERE (( `created` > UTC_TIMESTAMP() - INTERVAL 12 HOUR && `last` < UTC_TIMESTAMP() - INTERVAL 15 MINUTE ) OR ( `last` < UTC_TIMESTAMP() - INTERVAL 1 HOUR ))"); } @@ -107,7 +108,7 @@ function queue_run(&$argv, &$argc){ foreach($r as $q_item) { - // queue_predeliver hooks may have changed the queue db details, + // queue_predeliver hooks may have changed the queue db details, // so check again if this entry still needs processing if($queue_id) { @@ -132,12 +133,18 @@ function queue_run(&$argv, &$argc){ continue; } if(in_array($c[0]['notify'],$deadguys)) { - logger('queue: skipping known dead url: ' . $c[0]['notify']); - update_queue_time($q_item['id']); - continue; + logger('queue: skipping known dead url: ' . $c[0]['notify']); + update_queue_time($q_item['id']); + continue; } - $u = q("SELECT `user`.*, `user`.`pubkey` AS `upubkey`, `user`.`prvkey` AS `uprvkey` + if (!poco_reachable($c[0]['url'])) { + logger('queue: skipping probably dead url: ' . $c[0]['url']); + update_queue_time($q_item['id']); + continue; + } + + $u = q("SELECT `user`.*, `user`.`pubkey` AS `upubkey`, `user`.`prvkey` AS `uprvkey` FROM `user` WHERE `uid` = %d LIMIT 1", intval($c[0]['uid']) ); @@ -194,9 +201,9 @@ function queue_run(&$argv, &$argc){ call_hooks('queue_deliver', $a, $params); if($params['result']) - remove_queue_item($q_item['id']); + remove_queue_item($q_item['id']); else - update_queue_time($q_item['id']); + update_queue_time($q_item['id']); break; diff --git a/include/socgraph.php b/include/socgraph.php index 6e2b6ea15..1a92a7256 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -234,7 +234,7 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca } if ((($network == "") OR ($name == "") OR ($profile_photo == "") OR ($server_url == "") OR $alternate) - AND poco_reachable($profile_url, $server_url, $network, true)) { + AND poco_reachable($profile_url, $server_url, $network, false)) { $data = probe_url($profile_url); $orig_profile = $profile_url; @@ -1296,8 +1296,11 @@ function poco_discover($complete = false) { if ($r) foreach ($r AS $server) { - if (!poco_check_server($server["url"], $server["network"])) + if (!poco_check_server($server["url"], $server["network"])) { + // The server is not reachable? Okay, then we will try it later + q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); continue; + } // Fetch all users from the other server $url = $server["poco"]."/?fields=displayName,urls,photos,updated,network,aboutMe,currentLocation,tags,gender,generation"; @@ -1338,10 +1341,13 @@ function poco_discover($complete = false) { q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); if (!$complete AND (--$no_of_queries == 0)) break; - // If the server hadn't replied correctly, then force a sanity check - } elseif (!poco_check_server($server["url"], $server["network"], true)) - q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); + } else { + // If the server hadn't replied correctly, then force a sanity check + poco_check_server($server["url"], $server["network"], true); + // If we couldn't reach the server, we will try it some time later + q("UPDATE `gserver` SET `last_poco_query` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc($server["nurl"])); + } } } From ce9b4e868b68269277a88bd34e9b05244366e9ab Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 11 Sep 2015 21:56:37 +0200 Subject: [PATCH 05/18] Database update for worker --- boot.php | 2 +- database.sql | 15 ++++++++++++++- update.php | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/boot.php b/boot.php index b3b926521..3bbbef3c9 100644 --- a/boot.php +++ b/boot.php @@ -19,7 +19,7 @@ define ( 'FRIENDICA_PLATFORM', 'Friendica'); define ( 'FRIENDICA_CODENAME', 'Lily of the valley'); define ( 'FRIENDICA_VERSION', '3.4.1' ); define ( 'DFRN_PROTOCOL_VERSION', '2.23' ); -define ( 'DB_UPDATE_VERSION', 1188 ); +define ( 'DB_UPDATE_VERSION', 1189 ); define ( 'EOL', "
\r\n" ); define ( 'ATOM_TIME', 'Y-m-d\TH:i:s\Z' ); diff --git a/database.sql b/database.sql index 76df6aec1..a6eb71ef3 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 3.4.1 (Lily of the valley) --- DB_UPDATE_VERSION 1188 +-- DB_UPDATE_VERSION 1189 -- ------------------------------------------ @@ -1020,3 +1020,16 @@ CREATE TABLE IF NOT EXISTS `userd` ( INDEX `username` (`username`) ) DEFAULT CHARSET=utf8; +-- +-- TABLE workerqueue +-- +CREATE TABLE IF NOT EXISTS `workerqueue` ( + `id` int(11) NOT NULL auto_increment PRIMARY KEY, + `parameter` text NOT NULL, + `priority` tinyint(3) unsigned NOT NULL DEFAULT 0, + `created` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', + `pid` int(11) NOT NULL DEFAULT 0, + `executed` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', + INDEX `created` (`created`) +) DEFAULT CHARSET=utf8; + diff --git a/update.php b/update.php index 761da7273..06aab577a 100644 --- a/update.php +++ b/update.php @@ -1,6 +1,6 @@ Date: Sat, 12 Sep 2015 17:51:27 +0200 Subject: [PATCH 06/18] Fork as many processes as possible from the start on. --- boot.php | 20 +++++++++++++++++++- include/poller.php | 21 +++++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/boot.php b/boot.php index 3bbbef3c9..3116bb94b 100644 --- a/boot.php +++ b/boot.php @@ -1449,7 +1449,25 @@ if(! function_exists('proc_run')) { dbesc(datetime_convert()), intval(0)); - return; + // Should we quit and wait for the poller to be called as a cronjob? + if (get_config("system", "worker_dont_fork")) + return; + + // Checking number of workers + $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); + + // Get number of allowed number of worker threads + $queues = intval(get_config("system", "worker_queues")); + + if ($queues == 0) + $queues = 4; + + // If there are already enough workers running, don't fork another one + if ($workers[0]["workers"] >= $queues) + return; + + // Now call the poller to execute the jobs that we just added to the queue + $args = array("php", "include/poller.php", "no_cron"); } $args[0] = ((x($a->config,'php_path')) && (strlen($a->config['php_path'])) ? $a->config['php_path'] : 'php'); diff --git a/include/poller.php b/include/poller.php index 74d23a548..bdf6ba84e 100644 --- a/include/poller.php +++ b/include/poller.php @@ -26,15 +26,20 @@ function poller_run(&$argv, &$argc){ unset($db_host, $db_user, $db_pass, $db_data); }; - // Run the cron job that calls all other jobs - proc_run("php","include/cron.php"); + if(($argc <= 1) OR ($argv[1] != "no_cron")) { + // Run the cron job that calls all other jobs + proc_run("php","include/cron.php"); - // Cleaning killed processes - $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); - foreach($r AS $pid) - if (!posix_kill($pid["pid"], 0)) - q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", - intval($pid["pid"])); + // Cleaning dead processes + $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); + foreach($r AS $pid) + if (!posix_kill($pid["pid"], 0)) + q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", + intval($pid["pid"])); + + } else + // Sleep two seconds before checking for running processes to avoid having too many workers + sleep(2); // Checking number of workers $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); From 7edce8e266259da095c181033ddfab9d259a7304 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sat, 12 Sep 2015 18:08:03 +0200 Subject: [PATCH 07/18] Don't use a delivery interval when using the worker --- include/cron.php | 4 ++++ include/notifier.php | 4 ++++ include/poller.php | 2 +- include/queue.php | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/cron.php b/include/cron.php index 712befb1f..0c9d6baa5 100644 --- a/include/cron.php +++ b/include/cron.php @@ -215,6 +215,10 @@ function cron_run(&$argv, &$argc){ if(! $interval) $interval = ((get_config('system','delivery_interval') === false) ? 3 : intval(get_config('system','delivery_interval'))); + // If we are using the worker we don't need a delivery interval + if (get_config("system", "worker")) + $interval = false; + $sql_extra = (($manual_id) ? " AND `id` = $manual_id " : ""); reload_plugins(); diff --git a/include/notifier.php b/include/notifier.php index 002b3c8d7..593933b32 100644 --- a/include/notifier.php +++ b/include/notifier.php @@ -615,6 +615,10 @@ function notifier_run(&$argv, &$argc){ $interval = ((get_config('system','delivery_interval') === false) ? 2 : intval(get_config('system','delivery_interval'))); + // If we are using the worker we don't need a delivery interval + if (get_config("system", "worker")) + $interval = false; + // delivery loop if(count($r)) { diff --git a/include/poller.php b/include/poller.php index bdf6ba84e..053880bc5 100644 --- a/include/poller.php +++ b/include/poller.php @@ -39,7 +39,7 @@ function poller_run(&$argv, &$argc){ } else // Sleep two seconds before checking for running processes to avoid having too many workers - sleep(2); + sleep(4); // Checking number of workers $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); diff --git a/include/queue.php b/include/queue.php index 3f6686ec6..cb5fe28ad 100644 --- a/include/queue.php +++ b/include/queue.php @@ -60,6 +60,10 @@ function queue_run(&$argv, &$argc){ $interval = ((get_config('system','delivery_interval') === false) ? 2 : intval(get_config('system','delivery_interval'))); + // If we are using the worker we don't need a delivery interval + if (get_config("system", "worker")) + $interval = false; + $r = q("select * from deliverq where 1"); if($r) { foreach($r as $rr) { From 12659fc3a11b27ffe0647a1f73f5b4b23d9757ae Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sat, 12 Sep 2015 20:22:58 +0200 Subject: [PATCH 08/18] Let the cronhook be called different from the cron job. --- include/cron.php | 7 ++----- include/cronhooks.php | 36 ++++++++++++++++++++++++++---------- include/poller.php | 3 +++ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/cron.php b/include/cron.php index 0c9d6baa5..46067d76d 100644 --- a/include/cron.php +++ b/include/cron.php @@ -225,9 +225,6 @@ function cron_run(&$argv, &$argc){ $d = datetime_convert(); - if(! $restart) - proc_run('php','include/cronhooks.php'); - // Only poll from those with suitable relationships, // and which have a polling address and ignore Diaspora since // we are unable to match those posts with a Diaspora GUID and prevent duplicates. @@ -339,6 +336,6 @@ function cron_run(&$argv, &$argc){ } if (array_search(__file__,get_included_files())===0){ - cron_run($_SERVER["argv"],$_SERVER["argc"]); - killme(); + cron_run($_SERVER["argv"],$_SERVER["argc"]); + killme(); } diff --git a/include/cronhooks.php b/include/cronhooks.php index 26cab3cf9..d5b4f3bf6 100644 --- a/include/cronhooks.php +++ b/include/cronhooks.php @@ -11,11 +11,11 @@ function cronhooks_run(&$argv, &$argc){ } if(is_null($db)) { - @include(".htconfig.php"); - require_once("include/dba.php"); - $db = new dba($db_host, $db_user, $db_pass, $db_data); - unset($db_host, $db_user, $db_pass, $db_data); - }; + @include(".htconfig.php"); + require_once("include/dba.php"); + $db = new dba($db_host, $db_user, $db_pass, $db_data); + unset($db_host, $db_user, $db_pass, $db_data); + }; require_once('include/session.php'); require_once('include/datetime.php'); @@ -35,17 +35,31 @@ function cronhooks_run(&$argv, &$argc){ } } + $last = get_config('system','last_cronhook'); + + $poll_interval = intval(get_config('system','cronhook_interval')); + if(! $poll_interval) + $poll_interval = 9; + + if($last) { + $next = $last + ($poll_interval * 60); + if($next > time()) { + logger('cronhook intervall not reached'); + return; + } + } + $lockpath = get_lockpath(); if ($lockpath != '') { $pidfile = new pidfile($lockpath, 'cronhooks'); if($pidfile->is_already_running()) { logger("cronhooks: Already running"); if ($pidfile->running_time() > 19*60) { - $pidfile->kill(); - logger("cronhooks: killed stale process"); + $pidfile->kill(); + logger("cronhooks: killed stale process"); // Calling a new instance proc_run('php','include/cronhooks.php'); - } + } exit; } } @@ -62,10 +76,12 @@ function cronhooks_run(&$argv, &$argc){ logger('cronhooks: end'); + set_config('system','last_cronhook', time()); + return; } if (array_search(__file__,get_included_files())===0){ - cronhooks_run($_SERVER["argv"],$_SERVER["argc"]); - killme(); + cronhooks_run($_SERVER["argv"],$_SERVER["argc"]); + killme(); } diff --git a/include/poller.php b/include/poller.php index 053880bc5..e4b0b092f 100644 --- a/include/poller.php +++ b/include/poller.php @@ -30,6 +30,9 @@ function poller_run(&$argv, &$argc){ // Run the cron job that calls all other jobs proc_run("php","include/cron.php"); + // Run the cronhooks job separately from cron for being able to use a different timing + proc_run("php","include/cronhooks.php"); + // Cleaning dead processes $r = q("SELECT DISTINCT(`pid`) FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); foreach($r AS $pid) From 69daaa61ba1d1a6502835e9b1356b14c1fdc1cf5 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 13 Sep 2015 08:08:13 +0200 Subject: [PATCH 09/18] With the new queue we don't need "delivery_batch_count" anymore --- include/notifier.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/notifier.php b/include/notifier.php index 593933b32..d4d254f1c 100644 --- a/include/notifier.php +++ b/include/notifier.php @@ -638,7 +638,7 @@ function notifier_run(&$argv, &$argc){ // This controls the number of deliveries to execute with each separate delivery process. // By default we'll perform one delivery per process. Assuming a hostile shared hosting - // provider, this provides the greatest chance of deliveries if processes start getting + // provider, this provides the greatest chance of deliveries if processes start getting // killed. We can also space them out with the delivery_interval to also help avoid them // getting whacked. @@ -646,8 +646,10 @@ function notifier_run(&$argv, &$argc){ // together into a single process. This will reduce the overall number of processes // spawned for each delivery, but they will run longer. + // When using the workerqueue, we don't need this functionality. + $deliveries_per_process = intval(get_config('system','delivery_batch_count')); - if($deliveries_per_process <= 0) + if (($deliveries_per_process <= 0) OR get_config("system", "worker")) $deliveries_per_process = 1; $this_batch = array(); From 3ace2136f062bd7e2f42328460a9e08859c856d5 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 13 Sep 2015 18:47:10 +0200 Subject: [PATCH 10/18] Checking includes for valid paths --- boot.php | 28 ++++++++++++++++++++++++++++ include/poller.php | 15 ++++++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/boot.php b/boot.php index 3116bb94b..22cd34e06 100644 --- a/boot.php +++ b/boot.php @@ -1893,3 +1893,31 @@ if(!function_exists('exif_imagetype')) { return($size[2]); } } + +function validate_include(&$file) { + $orig_file = $file; + + $file = realpath($file); + + if (strpos($file, getcwd()) !== 0) + return false; + + $file = str_replace(getcwd()."/", "", $file, $count); + if ($count != 1) + return false; + + if ($orig_file !== $file) + return false; + + $valid = false; + if (strpos($file, "include/") === 0) + $valid = true; + + if (strpos($file, "addon/") === 0) + $valid = true; + + if (!$valid) + return false; + + return true; +} diff --git a/include/poller.php b/include/poller.php index e4b0b092f..b03dc84af 100644 --- a/include/poller.php +++ b/include/poller.php @@ -65,8 +65,16 @@ function poller_run(&$argv, &$argc){ $argc = count($argv); - // To-Do: Check for existance - require_once(basename($argv[0])); + // Check for existance and validity of the include file + $include = $argv[0]; + + if (!validate_include($include)) { + logger("Include file ".$argv[0]." is not valid!"); + q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); + continue; + } + + require_once($include); $funcname=str_replace(".php", "", basename($argv[0]))."_run"; @@ -77,7 +85,8 @@ function poller_run(&$argv, &$argc){ logger("Process ".getmypid().": ".$funcname." - done"); q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); - } + } else + logger("Function ".$funcname." does not exist"); } } From f8e4a71edae2f7a0fef8690e173612f30cf3630a Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 23 Sep 2015 08:56:48 +0200 Subject: [PATCH 11/18] Do a load check during execution of the queue. --- include/poller.php | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/poller.php b/include/poller.php index b03dc84af..c919b9d2a 100644 --- a/include/poller.php +++ b/include/poller.php @@ -26,6 +26,17 @@ function poller_run(&$argv, &$argc){ unset($db_host, $db_user, $db_pass, $db_data); }; + $maxsysload = intval(get_config('system','maxloadavg')); + if($maxsysload < 1) + $maxsysload = 50; + if(function_exists('sys_getloadavg')) { + $load = sys_getloadavg(); + if(intval($load[0]) > $maxsysload) { + logger('system: load ' . $load[0] . ' too high. poller deferred to next scheduled run.'); + return; + } + } + if(($argc <= 1) OR ($argv[1] != "no_cron")) { // Run the cron job that calls all other jobs proc_run("php","include/cron.php"); @@ -56,6 +67,15 @@ function poller_run(&$argv, &$argc){ return; while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { + + if(function_exists('sys_getloadavg')) { + $load = sys_getloadavg(); + if(intval($load[0]) > $maxsysload) { + logger('system: load ' . $load[0] . ' too high. poller deferred to next scheduled run.'); + return; + } + } + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", dbesc(datetime_convert()), intval(getmypid()), From 173d1390df9e7fbe1a23ce3e67c1b9ea6ddcb1f5 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 25 Sep 2015 17:38:56 +0200 Subject: [PATCH 12/18] Mute warnings in pidfile/Quit poller after an hour. --- include/pidfile.php | 8 ++++---- include/poller.php | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/pidfile.php b/include/pidfile.php index 4f5b25ad7..3093e149a 100644 --- a/include/pidfile.php +++ b/include/pidfile.php @@ -7,8 +7,8 @@ class pidfile { $this->_file = "$dir/$name.pid"; if (file_exists($this->_file)) { - $pid = trim(file_get_contents($this->_file)); - if (posix_kill($pid, 0)) { + $pid = trim(@file_get_contents($this->_file)); + if (($pid != "") AND posix_kill($pid, 0)) { $this->_running = true; } } @@ -21,7 +21,7 @@ class pidfile { public function __destruct() { if ((! $this->_running) && file_exists($this->_file)) { - unlink($this->_file); + @unlink($this->_file); } } @@ -30,7 +30,7 @@ class pidfile { } public function running_time() { - return(time() - filectime($this->_file)); + return(time() - @filectime($this->_file)); } public function kill() { diff --git a/include/poller.php b/include/poller.php index c919b9d2a..7255eaa6e 100644 --- a/include/poller.php +++ b/include/poller.php @@ -66,6 +66,8 @@ function poller_run(&$argv, &$argc){ if ($workers[0]["workers"] >= $queues) return; + $starttime = time(); + while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { if(function_exists('sys_getloadavg')) { @@ -76,6 +78,10 @@ function poller_run(&$argv, &$argc){ } } + // Quit the poller once every hour + if (time() > ($starttime + 3600)) + return; + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", dbesc(datetime_convert()), intval(getmypid()), From ae21c40f21ab4bb5a482d63ae35beff18efa13be Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 27 Sep 2015 13:56:20 +0200 Subject: [PATCH 13/18] Load depending number of workers --- include/poller.php | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/include/poller.php b/include/poller.php index 7255eaa6e..e33167c5b 100644 --- a/include/poller.php +++ b/include/poller.php @@ -56,14 +56,7 @@ function poller_run(&$argv, &$argc){ sleep(4); // Checking number of workers - $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); - - $queues = intval(get_config("system", "worker_queues")); - - if ($queues == 0) - $queues = 4; - - if ($workers[0]["workers"] >= $queues) + if (poller_too_much_workers()) return; $starttime = time(); @@ -82,6 +75,9 @@ function poller_run(&$argv, &$argc){ if (time() > ($starttime + 3600)) return; + if (poller_too_much_workers()) + return; + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", dbesc(datetime_convert()), intval(getmypid()), @@ -117,6 +113,38 @@ function poller_run(&$argv, &$argc){ } +function poller_too_much_workers() { + + if(function_exists('sys_getloadavg')) { + $load = sys_getloadavg(); + + // To-Do + if ($load < 1) + $queues = 10; + elseif ($load < 5) + $queues = 4; + elseif ($load < 10) + $queues = 2; + else + $queues = 1; + + } else { + $queues = intval(get_config("system", "worker_queues")); + + if ($queues == 0) + $queues = 4; + } + + if (poller_active_workers() >= $queues) + return; +} + +function poller_active_workers() { + $workers = q("SELECT COUNT(*) AS `workers` FROM `workerqueue` WHERE `executed` != '0000-00-00 00:00:00'"); + + return($workers[0]["workers"]); +} + if (array_search(__file__,get_included_files())===0){ poller_run($_SERVER["argv"],$_SERVER["argc"]); killme(); From 9767801be4bfff53afd60b09dd310bbefeb3a3d1 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 28 Sep 2015 07:54:28 +0200 Subject: [PATCH 14/18] Load depending number of worker queues. --- include/poller.php | 48 ++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/include/poller.php b/include/poller.php index e33167c5b..fc592d206 100644 --- a/include/poller.php +++ b/include/poller.php @@ -26,10 +26,11 @@ function poller_run(&$argv, &$argc){ unset($db_host, $db_user, $db_pass, $db_data); }; - $maxsysload = intval(get_config('system','maxloadavg')); - if($maxsysload < 1) - $maxsysload = 50; if(function_exists('sys_getloadavg')) { + $maxsysload = intval(get_config('system','maxloadavg')); + if($maxsysload < 1) + $maxsysload = 50; + $load = sys_getloadavg(); if(intval($load[0]) > $maxsysload) { logger('system: load ' . $load[0] . ' too high. poller deferred to next scheduled run.'); @@ -63,18 +64,11 @@ function poller_run(&$argv, &$argc){ while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { - if(function_exists('sys_getloadavg')) { - $load = sys_getloadavg(); - if(intval($load[0]) > $maxsysload) { - logger('system: load ' . $load[0] . ' too high. poller deferred to next scheduled run.'); - return; - } - } - // Quit the poller once every hour if (time() > ($starttime + 3600)) return; + // Count active workers and compare them with a maximum value that depends on the load if (poller_too_much_workers()) return; @@ -115,28 +109,28 @@ function poller_run(&$argv, &$argc){ function poller_too_much_workers() { + $queues = get_config("system", "worker_queues"); + + if ($queues == 0) + $queues = 4; + + $active = poller_active_workers(); + + // Decrease the number of workers at higher load if(function_exists('sys_getloadavg')) { - $load = sys_getloadavg(); + $load = max(sys_getloadavg()); - // To-Do - if ($load < 1) - $queues = 10; - elseif ($load < 5) - $queues = 4; - elseif ($load < 10) - $queues = 2; - else - $queues = 1; + $maxsysload = intval(get_config('system','maxloadavg')); + if($maxsysload < 1) + $maxsysload = 50; - } else { - $queues = intval(get_config("system", "worker_queues")); + $queues = max(0, ceil($queues * (($maxsysload - $load) / $maxsysload))); + + logger("Current load: ".$load." - maximum: ".$maxsysload." - current queues: ".$active." - maximum: ".$queues, LOGGER_DEBUG); - if ($queues == 0) - $queues = 4; } - if (poller_active_workers() >= $queues) - return; + return($active >= $queues); } function poller_active_workers() { From eb75d9532b008f832238ddeb693e65203032bde1 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 28 Sep 2015 19:14:07 +0200 Subject: [PATCH 15/18] Magical mathematics to reduce the number of workers --- include/poller.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/poller.php b/include/poller.php index fc592d206..4cb6eb8d5 100644 --- a/include/poller.php +++ b/include/poller.php @@ -51,6 +51,10 @@ function poller_run(&$argv, &$argc){ if (!posix_kill($pid["pid"], 0)) q("UPDATE `workerqueue` SET `executed` = '0000-00-00 00:00:00', `pid` = 0 WHERE `pid` = %d", intval($pid["pid"])); + else { + // To-Do: Kill long running processes + // But: Update processes (like the database update) mustn't be killed + } } else // Sleep two seconds before checking for running processes to avoid having too many workers @@ -124,7 +128,12 @@ function poller_too_much_workers() { if($maxsysload < 1) $maxsysload = 50; - $queues = max(0, ceil($queues * (($maxsysload - $load) / $maxsysload))); + $maxworkers = $queues; + + // Some magical mathemathics to reduce the workers + $exponent = 3; + $slope = $maxworkers / pow($maxsysload, $exponent); + $queues = ceil($slope * pow(max(0, $maxsysload - $load), $exponent)); logger("Current load: ".$load." - maximum: ".$maxsysload." - current queues: ".$active." - maximum: ".$queues, LOGGER_DEBUG); From f0cf9ce5197ad7e49bf1581b8755953de5bca242 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 28 Sep 2015 21:58:58 +0200 Subject: [PATCH 16/18] Prevent double execution --- include/poller.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/poller.php b/include/poller.php index 4cb6eb8d5..942fb6eaf 100644 --- a/include/poller.php +++ b/include/poller.php @@ -76,11 +76,18 @@ function poller_run(&$argv, &$argc){ if (poller_too_much_workers()) return; - q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d", + q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d AND `executed` = '0000-00-00 00:00:00'", dbesc(datetime_convert()), intval(getmypid()), intval($r[0]["id"])); + // Assure that there are no tasks executed twice + $id = q("SELECT `id` FROM `workerqueue` WHERE `id` = %d AND `pid` = %d", + intval($r[0]["id"]), + intval(getmypid())); + if (!$id) + continue; + $argv = json_decode($r[0]["parameter"]); $argc = count($argv); From ed8cdc7d8502b03ca9096b4d4a4c5fd9d149d08f Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 28 Sep 2015 22:32:56 +0200 Subject: [PATCH 17/18] Logging message added --- include/poller.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/poller.php b/include/poller.php index 942fb6eaf..ab7deb6e0 100644 --- a/include/poller.php +++ b/include/poller.php @@ -85,8 +85,10 @@ function poller_run(&$argv, &$argc){ $id = q("SELECT `id` FROM `workerqueue` WHERE `id` = %d AND `pid` = %d", intval($r[0]["id"]), intval(getmypid())); - if (!$id) + if (!$id) { + logger("Queue item ".$r[0]["id"]." was executed multiple times - quitting this one", LOGGER_DEBUG); continue; + } $argv = json_decode($r[0]["parameter"]); From c3ee255b0c9a3f8407001f58abbe6639275ec316 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Tue, 29 Sep 2015 06:15:26 +0200 Subject: [PATCH 18/18] Reduction of double executions - hopefully --- include/poller.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/poller.php b/include/poller.php index ab7deb6e0..45740dab6 100644 --- a/include/poller.php +++ b/include/poller.php @@ -68,14 +68,6 @@ function poller_run(&$argv, &$argc){ while ($r = q("SELECT * FROM `workerqueue` WHERE `executed` = '0000-00-00 00:00:00' ORDER BY `created` LIMIT 1")) { - // Quit the poller once every hour - if (time() > ($starttime + 3600)) - return; - - // Count active workers and compare them with a maximum value that depends on the load - if (poller_too_much_workers()) - return; - q("UPDATE `workerqueue` SET `executed` = '%s', `pid` = %d WHERE `id` = %d AND `executed` = '0000-00-00 00:00:00'", dbesc(datetime_convert()), intval(getmypid()), @@ -86,7 +78,7 @@ function poller_run(&$argv, &$argc){ intval($r[0]["id"]), intval(getmypid())); if (!$id) { - logger("Queue item ".$r[0]["id"]." was executed multiple times - quitting this one", LOGGER_DEBUG); + logger("Queue item ".$r[0]["id"]." was executed multiple times - skip this execution", LOGGER_DEBUG); continue; } @@ -116,6 +108,14 @@ function poller_run(&$argv, &$argc){ q("DELETE FROM `workerqueue` WHERE `id` = %d", intval($r[0]["id"])); } else logger("Function ".$funcname." does not exist"); + + // Quit the poller once every hour + if (time() > ($starttime + 3600)) + return; + + // Count active workers and compare them with a maximum value that depends on the load + if (poller_too_much_workers()) + return; } }