tripal_core.jobs.api.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. <?php
  2. /**
  3. * @file
  4. * Tripal offers a job management subsystem for managing tasks that may require
  5. * an extended period of time for completion.
  6. */
  7. /**
  8. * @defgroup tripal_jobs_api Tripal Jobs API
  9. * @ingroup tripal_core_api
  10. * @{
  11. * Tripal offers a job management subsystem for managing tasks that may require
  12. * an extended period of time for completion. Drupal uses a UNIX-based cron
  13. * job to handle tasks such as checking the availability of updates, indexing
  14. * new nodes for searching, etc. Drupal's cron uses the web interface for
  15. * launching these tasks, however, Tripal provides several administrative tasks
  16. * that may time out and not complete due to limitations of the web server.
  17. * Examples including syncing of a large number of features between chado and
  18. * Drupal. To circumvent this, as well as provide more fine-grained control
  19. * and monitoring, Tripal uses a jobs management sub-system built into the
  20. * Tripal Core module. It is anticipated that this functionality will be used
  21. * for managing analysis jobs provided by future tools, with eventual support
  22. * for distributed computing.
  23. *
  24. * The Tripal jobs management system allows administrators to submit tasks to
  25. * be performed which can then be launched through a UNIX command-line PHP
  26. * script or cron job. This command-line script can be added to a cron entry
  27. * along-side the Drupal cron entry for automatic, regular launching of Tripal
  28. * jobs. The order of execution of waiting jobs is determined first by
  29. * priority and second by the order the jobs were entered.
  30. *
  31. * The API functions described below provide a programmatic interface for
  32. * adding, checking and viewing jobs.
  33. * @}
  34. */
  35. /**
  36. * Adds a job to the Tripal Jbo queue
  37. *
  38. * @param $job_name
  39. * The human readable name for the job
  40. * @param $modulename
  41. * The name of the module adding the job
  42. * @param $callback
  43. * The name of a function to be called when the job is executed
  44. * @param $arguments
  45. * An array of arguments to be passed on to the callback
  46. * @param $uid
  47. * The uid of the user adding the job
  48. * @param $priority
  49. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  50. * is 1. The default priority is 10.
  51. *
  52. * @return
  53. * The job_id of the registered job
  54. *
  55. * Example usage:
  56. * @code
  57. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  58. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  59. * $user->uid, $analysis_id, $match_type);
  60. *
  61. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  62. * 'tripal_feature_load_fasta', $args, $user->uid);
  63. * @endcode
  64. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  65. * snipped first builds an array of arguments that will then be passed to the
  66. * tripal_add_job function. The number of arguments provided in the $arguments
  67. * variable should match the argument set for the callback function provided
  68. * as the third argument.
  69. *
  70. * @ingroup tripal_jobs_api
  71. */
  72. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  73. if (!$job_name) {
  74. watchdog('tripal', "Must provide a \$job_name argument to the tripal_add_job() function.", 'error');
  75. return FALSE;
  76. }
  77. if (!$modulename) {
  78. watchdog('tripal', "Must provide a \$modulename argument to the tripal_add_job() function.", 'error');
  79. return FALSE;
  80. }
  81. if (!$callback) {
  82. watchdog('tripal', "Must provide a \$callback argument to the tripal_add_job() function.", 'error');
  83. return FALSE;
  84. }
  85. if (!function_exists($callback)) {
  86. watchdog('tripal', "Must provide a valid callback function to the tripal_add_job() function.", 'error');
  87. return FALSE;
  88. }
  89. if (!is_numeric($uid)) {
  90. watchdog('tripal', "Must provide a numeric \$uid argument to the tripal_add_job() function.", 'error');
  91. return FALSE;
  92. }
  93. if (!$priority or !is_numeric($priority) or $priority < 1 or $priority > 10) {
  94. watchdog('tripal', "Must provide a numeric \$priority argument between 1 and 10 to the tripal_add_job() function.", 'error');
  95. return FALSE;
  96. }
  97. if (!is_array($arguments)) {
  98. watchdog('tripal', "Must provide an array as the \$arguments argument to the tripal_add_job() function.", 'error');
  99. return FALSE;
  100. }
  101. $user = user_load($uid);
  102. // convert the arguments into a string for storage in the database
  103. $args = array();
  104. if (is_array($arguments)) {
  105. $args = serialize($arguments);
  106. }
  107. $job_id = db_insert('tripal_jobs')
  108. ->fields(array(
  109. 'job_name' => $job_name,
  110. 'modulename' => $modulename,
  111. 'callback' => $callback,
  112. 'status' => 'Waiting',
  113. 'submit_date' => REQUEST_TIME,
  114. 'uid' => $uid,
  115. # The lower the number the higher the priority.
  116. 'priority' => $priority,
  117. 'arguments' => $args,
  118. ))
  119. ->execute();
  120. if ($job_id) {
  121. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
  122. if (user_access('administer tripal')) {
  123. $jobs_url = url("admin/tripal/tripal_jobs");
  124. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  125. array('!jobs_url' => $jobs_url)));
  126. drupal_set_message(t("You can execute the job queue manually on the command line " .
  127. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  128. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  129. }
  130. }
  131. else {
  132. drupal_set_message(t("Failed to add job: %job_name.", array('%job_name' => $job_name)), 'error');
  133. }
  134. return $job_id;
  135. }
  136. /**
  137. * Retrieve information regarding a tripal job
  138. *
  139. * @param $job_id
  140. * The unique identifier of the job
  141. *
  142. * @return
  143. * An object describing the job if a job is found or FALSE on failure.
  144. *
  145. * @ingroup tripal_jobs_api
  146. */
  147. function tripal_get_job($job_id) {
  148. if (!$job_id or !is_numeric($job_id)) {
  149. watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
  150. return FALSE;
  151. }
  152. $job = db_query('SELECT j.* FROM {tripal_jobs} j WHERE j.job_id=:job_id', array(':job_id' => $job_id))
  153. ->fetchObject();
  154. $job->submit_date_string = format_date($job->submit_date);
  155. $job->start_time_string = format_date($job->start_time);
  156. $job->end_time_string = format_date($job->end_time);
  157. return $job;
  158. }
  159. /**
  160. * Indicates if any jobs are running.
  161. *
  162. * This function will check the system to see if a job has a process ID
  163. * and if that process ID is still running. It will update the job status
  164. * accordingly before returning.
  165. *
  166. * @return
  167. * Returns TRUE if any job is running or FALSE otherwise.
  168. *
  169. * @ingroup tripal_jobs_api
  170. */
  171. function tripal_is_job_running() {
  172. // iterate through each job that has not ended
  173. // and see if it is still running. If it is not
  174. // running but does not have an end_time then
  175. // set the end time and set the status to 'Error'
  176. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  177. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  178. $jobs = db_query($sql);
  179. foreach ($jobs as $job) {
  180. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  181. if ($job->pid && $status) {
  182. // the job is still running so let it go
  183. // we return 1 to indicate that a job is running
  184. return TRUE;
  185. }
  186. else {
  187. // the job is not running so terminate it
  188. $record = new stdClass();
  189. $record->job_id = $job->job_id;
  190. $record->end_time = REQUEST_TIME;
  191. $record->status = 'Error';
  192. $record->error_msg = 'Job has terminated unexpectedly.';
  193. drupal_write_record('tripal_jobs', $record, 'job_id');
  194. }
  195. }
  196. // return 1 to indicate that no jobs are currently running.
  197. return FALSE;
  198. }
  199. /**
  200. * Returns the start time for a given job
  201. *
  202. * @param $job
  203. * An object describing the job
  204. *
  205. * @return
  206. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  207. *
  208. * @ingroup tripal_jobs_api
  209. */
  210. function tripal_get_job_start($job) {
  211. if ($job->start_time > 0) {
  212. $start = format_date($job->start_time);
  213. }
  214. else {
  215. if (strcmp($job->job_status, 'Cancelled')==0) {
  216. $start = 'Cancelled';
  217. }
  218. else {
  219. $start = 'Not Yet Started';
  220. }
  221. }
  222. return $start;
  223. }
  224. /**
  225. * Returns the end time for a given job
  226. *
  227. * @param $job
  228. * An object describing the job
  229. *
  230. * @return
  231. * The end time of the job if it was already run and empty otherwise
  232. *
  233. * @ingroup tripal_jobs_api
  234. */
  235. function tripal_get_job_end($job) {
  236. if ($job->end_time > 0) {
  237. $end = format_date($job->end_time);
  238. }
  239. else {
  240. $end = '';
  241. }
  242. return $end;
  243. }
  244. /**
  245. * Check for too many concurrent jobs
  246. *
  247. * @param $max_jobs
  248. * The maximum number of concurrent jobs to allow; -1 = no limit
  249. *
  250. * @ingroup tripal_jobs_api
  251. */
  252. function tripal_max_jobs_exceeded($max_jobs) {
  253. if ($max_jobs < 0) {
  254. // No limit on concurrent jobs
  255. return FALSE;
  256. }
  257. $num_jobs_running = 0;
  258. // Iterate through each job that has not ended and see if it is still running.
  259. // If it is not running but does not have an end_time then set the end time
  260. // and set the status to 'Error'
  261. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  262. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  263. $jobs = db_query($sql);
  264. foreach ($jobs as $job) {
  265. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  266. if ($job->pid && $status) {
  267. // the job is still running
  268. $num_jobs_running++;
  269. }
  270. else {
  271. // the job is not running so terminate it
  272. $record = new stdClass();
  273. $record->job_id = $job->job_id;
  274. $record->end_time = REQUEST_TIME;
  275. $record->status = 'Error';
  276. $record->error_msg = 'Job has terminated unexpectedly.';
  277. drupal_write_record('tripal_jobs', $record, 'job_id');
  278. }
  279. }
  280. return ($num_jobs_running > $max_jobs);
  281. }
  282. /**
  283. * Set a job to be re-ran (ie: add it back into the job queue)
  284. *
  285. * @param $job_id
  286. * The job_id of the job to be re-ran
  287. * @param $goto_jobs_page
  288. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  289. *
  290. * @ingroup tripal_jobs_api
  291. */
  292. function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
  293. global $user;
  294. $user_id = $user->uid;
  295. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  296. $results = db_query($sql, array(':job_id' => $job_id));
  297. $job = $results->fetchObject();
  298. // arguments for jobs used to be stored as plain string with a double colon
  299. // separating them. But as of Tripal v2.0 the arguments are stored as
  300. // a serialized array. To be backwards compatible, we should check for serialization
  301. // and if not then we will use the old style
  302. $args = unserialize($job->arguments);
  303. if (!$args) {
  304. $args = explode("::", $job->arguments);
  305. }
  306. $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority);
  307. if ($goto_jobs_page) {
  308. drupal_goto("admin/tripal/tripal_jobs");
  309. }
  310. return $job_id;
  311. }
  312. /**
  313. * Cancel a Tripal Job currently waiting in the job queue
  314. *
  315. * @param $job_id
  316. * The job_id of the job to be cancelled
  317. *
  318. * @return
  319. * FALSE if the an error occured or the job could not be canceled, TRUE
  320. * otherwise.
  321. *
  322. * @ingroup tripal_jobs_api
  323. */
  324. function tripal_cancel_job($job_id, $redirect = TRUE) {
  325. if (!$job_id or !is_numeric($job_id)) {
  326. watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
  327. return FALSE;
  328. }
  329. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  330. $results = db_query($sql, array(':job_id' => $job_id));
  331. $job = $results->fetchObject();
  332. // set the end time for this job
  333. if ($job->start_time == 0) {
  334. $record = new stdClass();
  335. $record->job_id = $job->job_id;
  336. $record->end_time = REQUEST_TIME;
  337. $record->status = 'Cancelled';
  338. $record->progress = '0';
  339. drupal_write_record('tripal_jobs', $record, 'job_id');
  340. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  341. }
  342. else {
  343. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  344. }
  345. if ($redirect) {
  346. drupal_goto("admin/tripal/tripal_jobs");
  347. }
  348. }
  349. /**
  350. * Execute a specific Tripal Job.
  351. *
  352. * @param $job_id
  353. * The job id to be exeuted
  354. * @param bool $redirect [optional]
  355. * Whether to redirect to the job page or not
  356. */
  357. function tripal_execute_job($job_id, $redirect = TRUE) {
  358. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  359. $results = db_query($sql, array(':job_id' => $job_id));
  360. $job = $results->fetchObject();
  361. // set the end time for this job
  362. if ($job->start_time == 0 and $job->end_time == 0) {
  363. tripal_launch_job(1, $job_id);
  364. drupal_set_message(t("Job %job_id has finished executing. See below for more information.", array('%job_id' => $job_id)));
  365. }
  366. else {
  367. drupal_set_message(t("Job %job_id cannot be executed. It has already finished.", array('%job_id' => $job_id)));
  368. }
  369. if ($redirect) {
  370. drupal_goto("admin/tripal/tripal_jobs/view/$job_id");
  371. }
  372. }
  373. /**
  374. * A function used to manually launch all queued tripal jobs
  375. *
  376. * @param $do_parallel
  377. * A boolean indicating whether jobs should be attempted to run in parallel
  378. *
  379. * @param $job_id
  380. * To launch a specific job provide the job id. This option should be
  381. * used sparingly as the jobs queue managment system should launch jobs
  382. * based on order and priority. However there are times when a specific
  383. * job needs to be launched and this argument will allow it. Only jobs
  384. * which have not been run previously will run.
  385. *
  386. * @ingroup tripal_jobs_api
  387. */
  388. function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $single = 0) {
  389. // first check if any jobs are currently running
  390. // if they are, don't continue, we don't want to have
  391. // more than one job script running at a time
  392. if (!$do_parallel and tripal_is_job_running()) {
  393. print date('Y-m-d H:i:s') .": Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  394. return;
  395. }
  396. if ($do_parallel && tripal_max_jobs_exceeded($max_jobs)) {
  397. print date('Y-m-d H:i:s') .": More than $max_jobs jobs are still running. At least one of these jobs much complete before a new job can start.";
  398. return;
  399. }
  400. // get all jobs that have not started and order them such that
  401. // they are processed in a FIFO manner.
  402. if ($job_id) {
  403. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  404. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
  405. "ORDER BY priority ASC,job_id ASC";
  406. $job_res = db_query($sql, array(':job_id' => $job_id));
  407. }
  408. else {
  409. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  410. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
  411. "ORDER BY priority ASC,job_id ASC";
  412. $job_res = db_query($sql);
  413. }
  414. print date('Y-m-d H:i:s') .": There are " . $job_res->rowCount() . " jobs queued.\n";
  415. foreach ($job_res as $job) {
  416. // set the start time for this job
  417. $record = new stdClass();
  418. $record->job_id = $job->job_id;
  419. $record->start_time = REQUEST_TIME;
  420. $record->status = 'Running';
  421. $record->pid = getmypid();
  422. drupal_write_record('tripal_jobs', $record, 'job_id');
  423. // call the function provided in the callback column.
  424. // Add the job_id as the last item in the list of arguments. All
  425. // callback functions should support this argument.
  426. $callback = $job->callback;
  427. // arguments for jobs used to be stored as plain string with a double colon
  428. // separating them. But as of Tripal v2.0 the arguments are stored as
  429. // a serialized array. To be backwards compatible, we should check for serialization
  430. // and if not then we will use the old style
  431. $args = unserialize($job->arguments);
  432. if (!$args) {
  433. $args = explode("::", $job->arguments);
  434. }
  435. $args[] = $job->job_id;
  436. // We need to do some additional processing for printing since the switch
  437. // to serialized arrays now allows nested arrays which cause errors when
  438. // printed using implode alone.
  439. $string_args = array();
  440. foreach ($args as $k => $a) {
  441. if (is_array($a)) {
  442. $string_args[$k] = 'Array';
  443. }
  444. elseif (is_object($a)) {
  445. $string_args[$k] = 'Object';
  446. }
  447. else {
  448. $string_args[$k] = $a;
  449. }
  450. }
  451. print date('Y-m-d H:i:s') .": Calling: $callback(" . implode(", ", $string_args) . ")\n";
  452. call_user_func_array($callback, $args);
  453. // set the end time for this job
  454. $record->end_time = REQUEST_TIME;
  455. $record->status = 'Completed';
  456. $record->progress = '100';
  457. drupal_write_record('tripal_jobs', $record, 'job_id');
  458. if ($single) {
  459. // Don't start any more jobs
  460. break;
  461. }
  462. if (tripal_max_jobs_exceeded($max_jobs)) {
  463. break;
  464. }
  465. // send an email to the user advising that the job has finished
  466. }
  467. }
  468. /**
  469. * An internal function for setting the progress for a current job
  470. *
  471. * @param $job_id
  472. * The job_id to set the progress for
  473. * @param $percentage
  474. * The progress to set the job to
  475. *
  476. * @return
  477. * True on success and False otherwise
  478. *
  479. * @ingroup tripal_core
  480. */
  481. function tripal_set_job_progress($job_id, $percentage) {
  482. if (preg_match("/^(\d+|100)$/", $percentage)) {
  483. $record = new stdClass();
  484. $record->job_id = $job_id;
  485. $record->progress = $percentage;
  486. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  487. return TRUE;
  488. }
  489. }
  490. return FALSE;
  491. }
  492. /**
  493. * Returns a list of jobs that are active.
  494. *
  495. * @param $modulename
  496. * Limit the list returned to those that were added by a specific module. If
  497. * no module name is provided then all active jobs are returned.
  498. *
  499. * @return
  500. * An array of objects where each object describes a tripal job. If no
  501. * jobs were found then an empty array is returned. Each object will have
  502. * the following members:
  503. * - job_id: The unique ID number for the job.
  504. * - uid: The ID of the user that submitted the job.
  505. * - job_name: The human-readable name of the job.
  506. * - modulename: The name of the module that submitted the job.
  507. * - callback: The callback function to be called when the job is run.
  508. * - arguments: An array of arguments to be passed to the callback function.
  509. * - progress: The percent progress of completion if the job is running.
  510. * - status: The status of the job: Waiting, Completed, Running or Cancelled.
  511. * - submit_date: The UNIX timestamp when the job was submitted.
  512. * - start_time: The UNIX timestamp for when the job started running.
  513. * - end_time: The UNIX timestampe when the job completed running.
  514. * - error_msg: Any error message that occured during execution of the job.
  515. * - prirotiy: The execution priority of the job (value between 1 and 10)
  516. *
  517. * @ingroup tripal_jobs_api
  518. */
  519. function tripal_get_active_jobs($modulename = NULL) {
  520. $query = db_select('tripal_jobs', 'TJ')
  521. ->fields('TJ', array('job_id', 'uid', 'job_name', 'modulename', 'callback',
  522. 'arguments', 'progress', 'status', 'submit_date', 'start_time',
  523. 'end_time', 'error_msg', 'priority'));
  524. if ($modulename) {
  525. $query->where(
  526. "TJ.modulename = :modulename and NOT (TJ.status = 'Completed' or TJ.status = 'Cancelled')",
  527. array(':modulename' => $modulename)
  528. );
  529. }
  530. $results = $query->execute();
  531. $jobs = array();
  532. while($job = $results->fetchobject()) {
  533. $jobs->arguments = unserialize($job->arguments);
  534. $jobs[] = $job;
  535. }
  536. return $jobs;
  537. }
  538. /**
  539. * Returns the date the job was added to the queue
  540. *
  541. * @param $job
  542. * An object describing the job
  543. *
  544. * @return
  545. * The date teh job was submitted
  546. *
  547. * @ingroup tripal_jobs_api
  548. */
  549. function tripal_get_job_submit_date($job) {
  550. return format_date($job->submit_date);
  551. }