tripal_core.jobs.api.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. <?php
  2. /**
  3. * @file
  4. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  5. * completion.
  6. */
  7. /**
  8. * @defgroup tripal_jobs_api Tripal Jobs API
  9. * @ingroup tripal_core_api
  10. * @{
  11. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  12. * completion. Drupal uses a UNIX-based cron job to handle tasks such as checking the availability of updates,
  13. * indexing new nodes for searching, etc. Drupal's cron uses the web interface for launching these tasks, however,
  14. * Tripal provides several administrative tasks that may time out and not complete due to limitations of the web
  15. * server. Examples including syncing of a large number of features between chado and Drupal. To circumvent this,
  16. * as well as provide more fine-grained control and monitoring, Tripal uses a jobs management sub-system built into
  17. * the Tripal Core module. It is anticipated that this functionality will be used for managing analysis jobs provided by
  18. * future tools, with eventual support for distributed computing.
  19. *
  20. * The Tripal jobs management system allows administrators to submit tasks to be performed which can then be
  21. * launched through a UNIX command-line PHP script or cron job. This command-line script can be added to a cron
  22. * entry along-side the Drupal cron entry for automatic, regular launching of Tripal jobs. The order of execution of
  23. * waiting jobs is determined first by priority and second by the order the jobs were entered.
  24. *
  25. * The API functions described below provide a programmatic interface for adding, checking and viewing jobs.
  26. * @}
  27. */
  28. /**
  29. * Adds a job to the Tripal Jbo queue
  30. *
  31. * @param $job_name
  32. * The human readable name for the job
  33. * @param $modulename
  34. * The name of the module adding the job
  35. * @param $callback
  36. * The name of a function to be called when the job is executed
  37. * @param $arguments
  38. * An array of arguments to be passed on to the callback
  39. * @param $uid
  40. * The uid of the user adding the job
  41. * @param $priority
  42. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  43. * is 1. The default priority is 10.
  44. *
  45. * @return
  46. * The job_id of the registered job
  47. *
  48. * Example usage:
  49. * @code
  50. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  51. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  52. * $user->uid, $analysis_id, $match_type);
  53. *
  54. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  55. * 'tripal_feature_load_fasta', $args, $user->uid);
  56. * @endcode
  57. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  58. * snipped first builds an array of arguments that will then be passed to the
  59. * tripal_add_job function. The number of arguments provided in the $arguments
  60. * variable should match the argument set for the callback function provided
  61. * as the third argument.
  62. *
  63. * @ingroup tripal_jobs_api
  64. */
  65. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  66. if (!$job_name) {
  67. watchdog('tripal', "Must provide a \$job_name argument to the tripal_add_job() function.", 'error');
  68. return FALSE;
  69. }
  70. if (!$modulename) {
  71. watchdog('tripal', "Must provide a \$modulename argument to the tripal_add_job() function.", 'error');
  72. return FALSE;
  73. }
  74. if (!$callback) {
  75. watchdog('tripal', "Must provide a \$callback argument to the tripal_add_job() function.", 'error');
  76. return FALSE;
  77. }
  78. foreach ($includes as $include) {
  79. require_once($include);
  80. }
  81. if (!function_exists($callback)) {
  82. watchdog('tripal', "Must provide a valid callback function to the tripal_add_job() function.", 'error');
  83. return FALSE;
  84. }
  85. if (!is_numeric($uid)) {
  86. watchdog('tripal', "Must provide a numeric \$uid argument to the tripal_add_job() function.", 'error');
  87. return FALSE;
  88. }
  89. if (!$priority or !is_numeric($priority) or $priority < 1 or $priority > 10) {
  90. watchdog('tripal', "Must provide a numeric \$priority argument between 1 and 10 to the tripal_add_job() function.", 'error');
  91. return FALSE;
  92. }
  93. if (!is_array($arguments)) {
  94. watchdog('tripal', "Must provide an array as the \$arguments argument to the tripal_add_job() function.", 'error');
  95. return FALSE;
  96. }
  97. $user = user_load($uid);
  98. // convert the arguments into a string for storage in the database
  99. $args = array();
  100. if (is_array($arguments)) {
  101. $args = serialize($arguments);
  102. }
  103. $job_id = db_insert('tripal_jobs')
  104. ->fields(array(
  105. 'job_name' => $job_name,
  106. 'modulename' => $modulename,
  107. 'callback' => $callback,
  108. 'status' => 'Waiting',
  109. 'submit_date' => REQUEST_TIME,
  110. 'uid' => $uid,
  111. # The lower the number the higher the priority.
  112. 'priority' => $priority,
  113. 'arguments' => $args,
  114. ))
  115. ->execute();
  116. if ($job_id) {
  117. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
  118. if (user_access('administer tripal')) {
  119. $jobs_url = url("admin/tripal/tripal_jobs");
  120. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  121. array('!jobs_url' => $jobs_url)));
  122. drupal_set_message(t("You can execute the job queue manually on the command line " .
  123. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  124. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  125. }
  126. }
  127. else {
  128. drupal_set_message(t("Failed to add job: %job_name.", array('%job_name' => $job_name)), 'error');
  129. }
  130. return $job_id;
  131. }
  132. /**
  133. * Retrieve information regarding a tripal job
  134. *
  135. * @param $job_id
  136. * The unique identifier of the job
  137. *
  138. * @return
  139. * An object describing the job if a job is found or FALSE on failure.
  140. *
  141. * @ingroup tripal_jobs_api
  142. */
  143. function tripal_get_job($job_id) {
  144. if (!$job_id or !is_numeric($job_id)) {
  145. watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
  146. return FALSE;
  147. }
  148. $job = db_query('SELECT j.* FROM {tripal_jobs} j WHERE j.job_id=:job_id', array(':job_id' => $job_id))
  149. ->fetchObject();
  150. $job->submit_date_string = format_date($job->submit_date);
  151. $job->start_time_string = format_date($job->start_time);
  152. $job->end_time_string = format_date($job->end_time);
  153. return $job;
  154. }
  155. /**
  156. * Returns a list of running tripal jobs
  157. *
  158. * @return
  159. * and array of objects where each object describes a running job or FALSE if no jobs are running
  160. *
  161. * @ingroup tripal_jobs_api
  162. */
  163. function tripal_is_job_running() {
  164. // iterate through each job that has not ended
  165. // and see if it is still running. If it is not
  166. // running but does not have an end_time then
  167. // set the end time and set the status to 'Error'
  168. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  169. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  170. $jobs = db_query($sql);
  171. foreach ($jobs as $job) {
  172. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  173. if ($job->pid && $status) {
  174. // the job is still running so let it go
  175. // we return 1 to indicate that a job is running
  176. return TRUE;
  177. }
  178. else {
  179. // the job is not running so terminate it
  180. $record = new stdClass();
  181. $record->job_id = $job->job_id;
  182. $record->end_time = REQUEST_TIME;
  183. $record->status = 'Error';
  184. $record->error_msg = 'Job has terminated unexpectedly.';
  185. drupal_write_record('tripal_jobs', $record, 'job_id');
  186. }
  187. }
  188. // return 1 to indicate that no jobs are currently running.
  189. return FALSE;
  190. }
  191. /**
  192. * Returns the start time for a given job
  193. *
  194. * @param $job
  195. * An object describing the job
  196. *
  197. * @return
  198. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  199. *
  200. * @ingroup tripal_jobs_api
  201. */
  202. function tripal_get_job_start($job) {
  203. if ($job->start_time > 0) {
  204. $start = format_date($job->start_time);
  205. }
  206. else {
  207. if (strcmp($job->job_status, 'Cancelled')==0) {
  208. $start = 'Cancelled';
  209. }
  210. else {
  211. $start = 'Not Yet Started';
  212. }
  213. }
  214. return $start;
  215. }
  216. /**
  217. * Returns the end time for a given job
  218. *
  219. * @param $job
  220. * An object describing the job
  221. *
  222. * @return
  223. * The end time of the job if it was already run and empty otherwise
  224. *
  225. * @ingroup tripal_jobs_api
  226. */
  227. function tripal_get_job_end($job) {
  228. if ($job->end_time > 0) {
  229. $end = format_date($job->end_time);
  230. }
  231. else {
  232. $end = '';
  233. }
  234. return $end;
  235. }
  236. /**
  237. * Set a job to be re-ran (ie: add it back into the job queue)
  238. *
  239. * @param $job_id
  240. * The job_id of the job to be re-ran
  241. * @param $goto_jobs_page
  242. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  243. *
  244. * @ingroup tripal_jobs_api
  245. */
  246. function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
  247. global $user;
  248. $user_id = $user->uid;
  249. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  250. $results = db_query($sql, array(':job_id' => $job_id));
  251. $job = $results->fetchObject();
  252. // arguments for jobs used to be stored as plain string with a double colon
  253. // separating them. But as of Tripal v2.0 the arguments are stored as
  254. // a serialized array. To be backwards compatible, we should check for serialization
  255. // and if not then we will use the old style
  256. $args = unserialize($job->arguments);
  257. if (!$args) {
  258. $args = explode("::", $job->arguments);
  259. }
  260. $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority);
  261. if ($goto_jobs_page) {
  262. drupal_goto("admin/tripal/tripal_jobs");
  263. }
  264. return $job_id;
  265. }
  266. /**
  267. * Cancel a Tripal Job currently waiting in the job queue
  268. *
  269. * @param $job_id
  270. * The job_id of the job to be cancelled
  271. *
  272. * @return
  273. * FALSE if the an error occured or the job could not be canceled, TRUE
  274. * otherwise.
  275. *
  276. * @ingroup tripal_jobs_api
  277. */
  278. function tripal_cancel_job($job_id, $redirect = TRUE) {
  279. if (!$job_id or !is_numeric($job_id)) {
  280. watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
  281. return FALSE;
  282. }
  283. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  284. $results = db_query($sql, array(':job_id' => $job_id));
  285. $job = $results->fetchObject();
  286. // set the end time for this job
  287. if ($job->start_time == 0) {
  288. $record = new stdClass();
  289. $record->job_id = $job->job_id;
  290. $record->end_time = REQUEST_TIME;
  291. $record->status = 'Cancelled';
  292. $record->progress = '0';
  293. drupal_write_record('tripal_jobs', $record, 'job_id');
  294. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  295. }
  296. else {
  297. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  298. }
  299. if ($redirect) {
  300. drupal_goto("admin/tripal/tripal_jobs");
  301. }
  302. }
  303. /**
  304. * Execute a specific Tripal Job.
  305. *
  306. * @param $job_id
  307. * The job id to be exeuted
  308. * @param bool $redirect [optional]
  309. * Whether to redirect to the job page or not
  310. */
  311. function tripal_execute_job($job_id, $redirect = TRUE) {
  312. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  313. $results = db_query($sql, array(':job_id' => $job_id));
  314. $job = $results->fetchObject();
  315. // set the end time for this job
  316. if ($job->start_time == 0 and $job->end_time == 0) {
  317. tripal_launch_job(1, $job_id);
  318. drupal_set_message(t("Job %job_id has finished executing. See below for more information.", array('%job_id' => $job_id)));
  319. }
  320. else {
  321. drupal_set_message(t("Job %job_id cannot be executed. It has already finished.", array('%job_id' => $job_id)));
  322. }
  323. if ($redirect) {
  324. drupal_goto("admin/tripal/tripal_jobs/view/$job_id");
  325. }
  326. }
  327. /**
  328. * A function used to manually launch all queued tripal jobs
  329. *
  330. * @param $do_parallel
  331. * A boolean indicating whether jobs should be attempted to run in parallel
  332. *
  333. * @param $job_id
  334. * To launch a specific job provide the job id. This option should be
  335. * used sparingly as the jobs queue managment system should launch jobs
  336. * based on order and priority. However there are times when a specific
  337. * job needs to be launched and this argument will allow it. Only jobs
  338. * which have not been run previously will run.
  339. *
  340. * @ingroup tripal_jobs_api
  341. */
  342. function tripal_launch_job($do_parallel = 0, $job_id = NULL) {
  343. // first check if any jobs are currently running
  344. // if they are, don't continue, we don't want to have
  345. // more than one job script running at a time
  346. if (!$do_parallel and tripal_is_job_running()) {
  347. print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  348. return;
  349. }
  350. // get all jobs that have not started and order them such that
  351. // they are processed in a FIFO manner.
  352. if ($job_id) {
  353. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  354. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
  355. "ORDER BY priority ASC,job_id ASC";
  356. $job_res = db_query($sql, array(':job_id' => $job_id));
  357. }
  358. else {
  359. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  360. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
  361. "ORDER BY priority ASC,job_id ASC";
  362. $job_res = db_query($sql);
  363. }
  364. foreach ($job_res as $job) {
  365. // set the start time for this job
  366. $record = new stdClass();
  367. $record->job_id = $job->job_id;
  368. $record->start_time = REQUEST_TIME;
  369. $record->status = 'Running';
  370. $record->pid = getmypid();
  371. drupal_write_record('tripal_jobs', $record, 'job_id');
  372. // call the function provided in the callback column.
  373. // Add the job_id as the last item in the list of arguments. All
  374. // callback functions should support this argument.
  375. $callback = $job->callback;
  376. // arguments for jobs used to be stored as plain string with a double colon
  377. // separating them. But as of Tripal v2.0 the arguments are stored as
  378. // a serialized array. To be backwards compatible, we should check for serialization
  379. // and if not then we will use the old style
  380. $args = unserialize($job->arguments);
  381. if (!$args) {
  382. $args = explode("::", $job->arguments);
  383. }
  384. $args[] = $job->job_id;
  385. // We need to do some additional processing for printing since the switch
  386. // to serialized arrays now allows nested arrays which cause errors when
  387. // printed using implode alone.
  388. $string_args = array();
  389. foreach ($args as $k => $a) {
  390. if (is_array($a)) {
  391. $string_args[$k] = 'Array';
  392. }
  393. elseif (is_object($a)) {
  394. $string_args[$k] = 'Object';
  395. }
  396. else {
  397. $string_args[$k] = $a;
  398. }
  399. }
  400. print "Calling: $callback(" . implode(", ", $string_args) . ")\n";
  401. call_user_func_array($callback, $args);
  402. // set the end time for this job
  403. $record->end_time = REQUEST_TIME;
  404. $record->status = 'Completed';
  405. $record->progress = '100';
  406. drupal_write_record('tripal_jobs', $record, 'job_id');
  407. // send an email to the user advising that the job has finished
  408. }
  409. }
  410. /**
  411. * An internal function for setting the progress for a current job
  412. *
  413. * @param $job_id
  414. * The job_id to set the progress for
  415. * @param $percentage
  416. * The progress to set the job to
  417. *
  418. * @return
  419. * True on success and False otherwise
  420. *
  421. * @ingroup tripal_core
  422. */
  423. function tripal_set_job_progress($job_id, $percentage) {
  424. if (preg_match("/^(\d+|100)$/", $percentage)) {
  425. $record = new stdClass();
  426. $record->job_id = $job_id;
  427. $record->progress = $percentage;
  428. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  429. return TRUE;
  430. }
  431. }
  432. return FALSE;
  433. }
  434. /**
  435. * Returns a list of jobs that are active.
  436. *
  437. * @param $modulename
  438. * Limit the list returned to those that were added by a specific module. If
  439. * no module name is provided then all active jobs are returned.
  440. *
  441. * @return
  442. * An array of objects where each object describes a tripal job. If no
  443. * jobs were found then an empty array is returned. Each object will have
  444. * the following members:
  445. * - job_id: The unique ID number for the job.
  446. * - uid: The ID of the user that submitted the job.
  447. * - job_name: The human-readable name of the job.
  448. * - modulename: The name of the module that submitted the job.
  449. * - callback: The callback function to be called when the job is run.
  450. * - arguments: An array of arguments to be passed to the callback function.
  451. * - progress: The percent progress of completion if the job is running.
  452. * - status: The status of the job: Waiting, Completed, Running or Cancelled.
  453. * - submit_date: The UNIX timestamp when the job was submitted.
  454. * - start_time: The UNIX timestamp for when the job started running.
  455. * - end_time: The UNIX timestampe when the job completed running.
  456. * - error_msg: Any error message that occured during execution of the job.
  457. * - prirotiy: The execution priority of the job (value between 1 and 10)
  458. *
  459. * @ingroup tripal_jobs_api
  460. */
  461. function tripal_get_active_jobs($modulename = NULL) {
  462. $query = db_select('tripal_jobs', 'TJ')
  463. ->fields('TJ', array('job_id', 'uid', 'job_name', 'modulename', 'callback',
  464. 'arguments', 'progress', 'status', 'submit_date', 'start_time',
  465. 'end_time', 'error_msg', 'priority'));
  466. if ($modulename) {
  467. $query->where(
  468. "TJ.modulename = :modulename and NOT (TJ.status = 'Completed' or TJ.status = 'Cancelled')",
  469. array(':modulename' => $modulename)
  470. );
  471. }
  472. $results = $query->execute();
  473. $jobs = array();
  474. while($job = $results->fetchobject()) {
  475. $jobs->arguments = unserialize($job->arguments);
  476. $jobs[] = $job;
  477. }
  478. return $jobs;
  479. }
  480. /**
  481. * Returns the date the job was added to the queue
  482. *
  483. * @param $job
  484. * An object describing the job
  485. *
  486. * @return
  487. * The date teh job was submitted
  488. *
  489. * @ingroup tripal_jobs_api
  490. */
  491. function tripal_get_job_submit_date($job) {
  492. return format_date($job->submit_date);
  493. }