jobs.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. <?php
  2. /**
  3. * @file
  4. * Contains functions related to the Tripal Jobs API
  5. *
  6. * @defgroup tripal_jobs_api Core Module Jobs API
  7. * @{
  8. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  9. * completion. Drupal uses a UNIX-based cron job to handle tasks such as checking the availability of updates,
  10. * indexing new nodes for searching, etc. Drupal's cron uses the web interface for launching these tasks, however,
  11. * Tripal provides several administrative tasks that may time out and not complete due to limitations of the web
  12. * server. Examples including syncing of a large number of features between chado and Drupal. To circumvent this,
  13. * as well as provide more fine-grained control and monitoring, Tripal uses a jobs management sub-system built into
  14. * the Tripal Core module. It is anticipated that this functionality will be used for managing analysis jobs provided by
  15. * future tools, with eventual support for distributed computing.
  16. *
  17. * The Tripal jobs management system allows administrators to submit tasks to be performed which can then be
  18. * launched through a UNIX command-line PHP script or cron job. This command-line script can be added to a cron
  19. * entry along-side the Drupal cron entry for automatic, regular launching of Tripal jobs. The order of execution of
  20. * waiting jobs is determined first by priority and second by the order the jobs were entered.
  21. *
  22. * The API functions described below provide a programmatic interface for adding, checking and viewing jobs.
  23. * @}
  24. * @ingroup tripal_api
  25. */
  26. /**
  27. * Adds a job to the Tripal Jbo queue
  28. *
  29. * @param $job_name
  30. * The human readable name for the job
  31. * @param $modulename
  32. * The name of the module adding the job
  33. * @param $callback
  34. * The name of a function to be called when the job is executed
  35. * @param $arguments
  36. * An array of arguements to be passed on to the callback
  37. * @param $uid
  38. * The uid of the user adding the job
  39. * @param $priority
  40. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  41. * is 1. The default priority is 10.
  42. *
  43. * @return
  44. * The job_id of the registered job
  45. *
  46. * Example usage:
  47. * @code
  48. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  49. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  50. * $user->uid, $analysis_id, $match_type);
  51. *
  52. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  53. * 'tripal_feature_load_fasta', $args, $user->uid);
  54. * @endcode
  55. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  56. * snipped first builds an array of arguments that will then be passed to the
  57. * tripal_add_job function. The number of arguments provided in the $arguments
  58. * variable should match the argument set for the callback function provided
  59. * as the third argument.
  60. *
  61. * @ingroup tripal_jobs_api
  62. */
  63. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  64. // convert the arguments into a string for storage in the database
  65. $args = implode("::", $arguments);
  66. $record = new stdClass();
  67. $record->job_name = $job_name;
  68. $record->modulename = $modulename;
  69. $record->callback = $callback;
  70. $record->status = 'Waiting';
  71. $record->submit_date = time();
  72. $record->uid = $uid;
  73. $record->priority = $priority; # the lower the number the higher the priority
  74. if ($args) {
  75. $record->arguments = $args;
  76. }
  77. if (drupal_write_record('tripal_jobs', $record)) {
  78. $jobs_url = url("admin/tripal/tripal_jobs");
  79. drupal_set_message(t("Job '%job_name' submitted. Check the <a href='!jobs_url'>jobs page</a> for status", array('%job_name' => $job_name, '!jobs_url' => $jobs_url)));
  80. }
  81. else {
  82. drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)), 'error');
  83. }
  84. return $record->job_id;
  85. }
  86. /**
  87. * An internal function for setting the progress for a current job
  88. *
  89. * @param $job_id
  90. * The job_id to set the progress for
  91. * @param $percentage
  92. * The progress to set the job to
  93. *
  94. * @return
  95. * True on success and False otherwise
  96. *
  97. * @ingroup tripal_core
  98. */
  99. function tripal_job_set_progress($job_id, $percentage) {
  100. if (preg_match("/^(\d+|100)$/", $percentage)) {
  101. $record = new stdClass();
  102. $record->job_id = $job_id;
  103. $record->progress = $percentage;
  104. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  105. return TRUE;
  106. }
  107. }
  108. return FALSE;
  109. }
  110. /**
  111. * Returns a list of jobs associated with the given module
  112. *
  113. * @param $modulename
  114. * The module to return a list of jobs for
  115. *
  116. * @return
  117. * An array of objects where each object describes a tripal job
  118. *
  119. * @ingroup tripal_jobs_api
  120. */
  121. function tripal_get_module_active_jobs($modulename) {
  122. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  123. "WHERE TJ.end_time IS NULL and TJ.modulename = '%s' ";
  124. return db_fetch_object(db_query($sql, $modulename));
  125. }
  126. /**
  127. *
  128. * @ingroup tripal_core
  129. */
  130. function tripal_jobs_report_form($form, &$form_state = NULL) {
  131. $form = array();
  132. // set the default values
  133. $default_status = $form_state['values']['job_status'];
  134. if (!$default_status) {
  135. $default_status = $_SESSION['tripal_job_status_filter'];
  136. }
  137. $form['job_status'] = array(
  138. '#type' => 'select',
  139. '#title' => t('Filter by Job Status'),
  140. '#default_value' => $default_status,
  141. '#options' => array(
  142. 0 => 'All Jobs',
  143. 'Running' => 'Running',
  144. 'Waiting' => 'Waiting',
  145. 'Completed' => 'Completed',
  146. 'Cancelled' => 'Cancelled',
  147. 'Error' => 'Error',
  148. ),
  149. );
  150. $form['submit'] = array(
  151. '#type' => 'submit',
  152. '#value' => t('Filter'),
  153. );
  154. return $form;
  155. }
  156. /**
  157. *
  158. * @ingroup tripal_core
  159. */
  160. function tripal_jobs_report_form_submit($form, &$form_state = NULL) {
  161. $job_status = $form_state['values']['job_status'];
  162. $_SESSION['tripal_job_status_filter'] = $job_status;
  163. }
  164. /**
  165. * Returns the Tripal Job Report
  166. *
  167. * @return
  168. * The HTML to be rendered which describes the job report
  169. *
  170. * @ingroup tripal_core
  171. */
  172. function tripal_jobs_report() {
  173. // run the following function which will
  174. // change the status of jobs that have errored out
  175. tripal_jobs_check_running();
  176. $jobs_status_filter = $_SESSION['tripal_job_status_filter'];
  177. $sql = "
  178. SELECT
  179. TJ.job_id,TJ.uid,TJ.job_name,TJ.modulename,TJ.progress,
  180. TJ.status as job_status, TJ,submit_date,TJ.start_time,
  181. TJ.end_time,TJ.priority,U.name as username
  182. FROM {tripal_jobs} TJ
  183. INNER JOIN {users} U on TJ.uid = U.uid ";
  184. if ($jobs_status_filter) {
  185. $sql .= "WHERE TJ.status = '%s' ";
  186. }
  187. $sql .= "ORDER BY job_id DESC";
  188. $jobs = pager_query($sql, 25, 0, "SELECT count(*) FROM ($sql) as t1", $jobs_status_filter);
  189. $header = array(
  190. 'Job ID',
  191. 'User',
  192. 'Job Name',
  193. array('data' => 'Dates', 'style'=> "white-space: nowrap"),
  194. 'Priority',
  195. 'Progress',
  196. 'Status',
  197. 'Action');
  198. $rows = array();
  199. // iterate through the jobs
  200. while ($job = db_fetch_object($jobs)) {
  201. $submit = tripal_jobs_get_submit_date($job);
  202. $start = tripal_jobs_get_start_time($job);
  203. $end = tripal_jobs_get_end_time($job);
  204. $cancel_link = '';
  205. if ($job->start_time == 0 and $job->end_time == 0) {
  206. $cancel_link = "<a href=\"" . url("admin/tripal/tripal_jobs/cancel/" . $job->job_id) . "\">Cancel</a><br />";
  207. }
  208. $rerun_link = "<a href=\"" . url("admin/tripal/tripal_jobs/rerun/" . $job->job_id) . "\">Re-run</a><br />";
  209. $view_link ="<a href=\"" . url("admin/tripal/tripal_jobs/view/" . $job->job_id) . "\">View</a>";
  210. $rows[] = array(
  211. $job->job_id,
  212. $job->username,
  213. $job->job_name,
  214. "Submit Date: $submit<br>Start Time: $start<br>End Time: $end",
  215. $job->priority,
  216. $job->progress . '%',
  217. $job->job_status,
  218. "$cancel_link $rerun_link $view_link",
  219. );
  220. }
  221. // create the report page
  222. $output .= "Waiting jobs are executed first by priority level (the lower the ".
  223. "number the higher the priority) and second by the order they ".
  224. "were entered";
  225. $output .= drupal_get_form('tripal_jobs_report_form');
  226. $output .= theme('table', $header, $rows);
  227. $output .= theme_pager();
  228. return $output;
  229. }
  230. /**
  231. * Returns the start time for a given job
  232. *
  233. * @param $job
  234. * An object describing the job
  235. *
  236. * @return
  237. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  238. *
  239. * @ingroup tripal_jobs_api
  240. */
  241. function tripal_jobs_get_start_time($job) {
  242. if ($job->start_time > 0) {
  243. $start = format_date($job->start_time);
  244. }
  245. else {
  246. if (strcmp($job->job_status, 'Cancelled')==0) {
  247. $start = 'Cancelled';
  248. }
  249. else {
  250. $start = 'Not Yet Started';
  251. }
  252. }
  253. return $start;
  254. }
  255. /**
  256. * Returns the end time for a given job
  257. *
  258. * @param $job
  259. * An object describing the job
  260. *
  261. * @return
  262. * The end time of the job if it was already run and empty otherwise
  263. *
  264. * @ingroup tripal_jobs_api
  265. */
  266. function tripal_jobs_get_end_time($job) {
  267. if ($job->end_time > 0) {
  268. $end = format_date($job->end_time);
  269. }
  270. else {
  271. $end = '';
  272. }
  273. return $end;
  274. }
  275. /**
  276. * Returns the date the job was added to the queue
  277. *
  278. * @param $job
  279. * An object describing the job
  280. *
  281. * @return
  282. * The date teh job was submitted
  283. *
  284. * @ingroup tripal_jobs_api
  285. */
  286. function tripal_jobs_get_submit_date($job) {
  287. return format_date($job->submit_date);
  288. }
  289. /**
  290. * A function used to manually launch all queued tripal jobs
  291. *
  292. * @param $do_parallel
  293. * A boolean indicating whether jobs should be attempted to run in parallel
  294. *
  295. * @param $job_id
  296. * To launch a specific job provide the job id. This option should be
  297. * used sparingly as the jobs queue managment system should launch jobs
  298. * based on order and priority. However there are times when a specific
  299. * job needs to be launched and this argument will allow it. Only jobs
  300. * which have not been run previously will run.
  301. *
  302. * @ingroup tripal_jobs_api
  303. */
  304. function tripal_jobs_launch($do_parallel = 0, $job_id = NULL) {
  305. // first check if any jobs are currently running
  306. // if they are, don't continue, we don't want to have
  307. // more than one job script running at a time
  308. if (!$do_parallel and tripal_jobs_check_running()) {
  309. print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  310. return;
  311. }
  312. // get all jobs that have not started and order them such that
  313. // they are processed in a FIFO manner.
  314. if ($job_id) {
  315. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  316. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = %d ".
  317. "ORDER BY priority ASC,job_id ASC";
  318. $job_res = db_query($sql,$job_id);
  319. }
  320. else {
  321. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  322. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL ".
  323. "ORDER BY priority ASC,job_id ASC";
  324. $job_res = db_query($sql);
  325. }
  326. while ($job = db_fetch_object($job_res)) {
  327. // set the start time for this job
  328. $record = new stdClass();
  329. $record->job_id = $job->job_id;
  330. $record->start_time = time();
  331. $record->status = 'Running';
  332. $record->pid = getmypid();
  333. drupal_write_record('tripal_jobs', $record, 'job_id');
  334. // call the function provided in the callback column.
  335. // Add the job_id as the last item in the list of arguments. All
  336. // callback functions should support this argument.
  337. $callback = $job->callback;
  338. $args = split("::", $job->arguments);
  339. $args[] = $job->job_id;
  340. print "Calling: $callback(" . implode(", ", $args) . ")\n";
  341. call_user_func_array($callback, $args);
  342. // set the end time for this job
  343. $record->end_time = time();
  344. $record->status = 'Completed';
  345. $record->progress = '100';
  346. drupal_write_record('tripal_jobs', $record, 'job_id');
  347. // send an email to the user advising that the job has finished
  348. }
  349. }
  350. /**
  351. * Returns a list of running tripal jobs
  352. *
  353. * @return
  354. * and array of objects where each object describes a running job or FALSE if no jobs are running
  355. *
  356. * @ingroup tripal_jobs_api
  357. */
  358. function tripal_jobs_check_running() {
  359. // iterate through each job that has not ended
  360. // and see if it is still running. If it is not
  361. // running but does not have an end_time then
  362. // set the end time and set the status to 'Error'
  363. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  364. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  365. $jobs = db_query($sql);
  366. while ($job = db_fetch_object($jobs)) {
  367. $status = `ps --pid=$job->pid --no-header`;
  368. if ($job->pid && $status) {
  369. // the job is still running so let it go
  370. // we return 1 to indicate that a job is running
  371. return TRUE;
  372. }
  373. else {
  374. // the job is not running so terminate it
  375. $record = new stdClass();
  376. $record->job_id = $job->job_id;
  377. $record->end_time = time();
  378. $record->status = 'Error';
  379. $record->error_msg = 'Job has terminated unexpectedly.';
  380. drupal_write_record('tripal_jobs', $record, 'job_id');
  381. }
  382. }
  383. // return 1 to indicate that no jobs are currently running.
  384. return FALSE;
  385. }
  386. /**
  387. * Returns the HTML code to display a given job
  388. *
  389. * @param $job_id
  390. * The job_id of the job to display
  391. *
  392. * @return
  393. * The HTML describing the indicated job
  394. * @ingroup tripal_core
  395. */
  396. function tripal_jobs_view($job_id) {
  397. return theme('tripal_core_job_view', $job_id);
  398. }
  399. /**
  400. * Registers variables for the tripal_core_job_view themeing function
  401. *
  402. * @param $variables
  403. * An array containing all variables supplied to this template
  404. *
  405. * @ingroup tripal_core
  406. */
  407. function tripal_core_preprocess_tripal_core_job_view(&$variables) {
  408. // get the job record
  409. $job_id = $variables['job_id'];
  410. $sql =
  411. "SELECT TJ.job_id,TJ.uid,TJ.job_name,TJ.modulename,TJ.progress,
  412. TJ.status as job_status, TJ,submit_date,TJ.start_time,
  413. TJ.end_time,TJ.priority,U.name as username,TJ.arguments,
  414. TJ.callback,TJ.error_msg,TJ.pid
  415. FROM {tripal_jobs} TJ
  416. INNER JOIN users U on TJ.uid = U.uid
  417. WHERE TJ.job_id = %d";
  418. $job = db_fetch_object(db_query($sql, $job_id));
  419. // we do not know what the arguments are for and we want to provide a
  420. // meaningful description to the end-user. So we use a callback function
  421. // deinfed in the module that created the job to describe in an array
  422. // the arguments provided. If the callback fails then just use the
  423. // arguments as they are
  424. $args = preg_split("/::/", $job->arguments);
  425. $arg_hook = $job->modulename . "_job_describe_args";
  426. if (is_callable($arg_hook)) {
  427. $new_args = call_user_func_array($arg_hook, array($job->callback, $args));
  428. if (is_array($new_args) and count($new_args)) {
  429. $job->arguments = $new_args;
  430. }
  431. else {
  432. $job->arguments = $args;
  433. }
  434. }
  435. else {
  436. $job->arguments = $args;
  437. }
  438. // make our start and end times more legible
  439. $job->submit_date = tripal_jobs_get_submit_date($job);
  440. $job->start_time = tripal_jobs_get_start_time($job);
  441. $job->end_time = tripal_jobs_get_end_time($job);
  442. // add the job to the variables that get exported to the template
  443. $variables['job'] = $job;
  444. }
  445. /**
  446. * Set a job to be re-ran (ie: add it back into the job queue)
  447. *
  448. * @param $job_id
  449. * The job_id of the job to be re-ran
  450. *
  451. * @ingroup tripal_jobs_api
  452. */
  453. function tripal_jobs_rerun($job_id, $goto_jobs_page = TRUE) {
  454. global $user;
  455. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = %d";
  456. $job = db_fetch_object(db_query($sql, $job_id));
  457. $args = explode("::", $job->arguments);
  458. $job_id = tripal_add_job(
  459. $job->job_name,
  460. $job->modulename,
  461. $job->callback,
  462. $args,
  463. $user->uid,
  464. $job->priority);
  465. if ($goto_jobs_page) {
  466. drupal_goto("admin/tripal/tripal_jobs");
  467. }
  468. return $job_id;
  469. }
  470. /**
  471. * Cancel a Tripal Job currently waiting in the job queue
  472. *
  473. * @param $job_id
  474. * The job_id of the job to be cancelled
  475. *
  476. * @ingroup tripal_jobs_api
  477. */
  478. function tripal_jobs_cancel($job_id, $redirect = TRUE) {
  479. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = %d";
  480. $job = db_fetch_object(db_query($sql, $job_id));
  481. // set the end time for this job
  482. if ($job->start_time == 0) {
  483. $record = new stdClass();
  484. $record->job_id = $job->job_id;
  485. $record->end_time = time();
  486. $record->status = 'Cancelled';
  487. $record->progress = '0';
  488. drupal_write_record('tripal_jobs', $record, 'job_id');
  489. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  490. }
  491. else {
  492. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  493. }
  494. if ($redirect) {
  495. drupal_goto("admin/tripal/tripal_jobs");
  496. }
  497. }