tripal.jobs.api.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. <?php
  2. /**
  3. * @file
  4. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  5. * completion.
  6. */
  7. /**
  8. * @defgroup tripal_jobs_api Tripal Jobs API
  9. * @ingroup tripal_api
  10. * @{
  11. * Tripal offers a job management subsystem for managing tasks that may require
  12. * an extended period of time for completion. Drupal uses a UNIX-based cron
  13. * job to handle tasks such as checking the availability of updates,
  14. * indexing new nodes for searching, etc. Drupal's cron uses the web interface
  15. * for launching these tasks, however, Tripal provides several administrative
  16. * tasks that may time out and not complete due to limitations of the web
  17. * server. To circumvent this, as well as provide more fine-grained control and
  18. * monitoring, Tripal uses a jobs management sub-system. It is anticipated
  19. * that this functionality will be used for managing analysis jobs provided by
  20. * future tools, with eventual support for distributed computing.
  21. *
  22. * The Tripal jobs management system allows administrators to submit tasks
  23. * to be performed which can then be launched through a UNIX command-line PHP
  24. * script or cron job. This command-line script can be added to a cron
  25. * entry along-side the Drupal cron entry for automatic, regular launching of
  26. * Tripal jobs. The order of execution of waiting jobs is determined first by
  27. * priority and second by the order the jobs were entered.
  28. *
  29. * The API functions described below provide a programmatic interface for
  30. * adding, checking and viewing jobs.
  31. * @}
  32. */
  33. /**
  34. * Adds a job to the Tripal Jbo queue
  35. *
  36. * @param $job_name
  37. * The human readable name for the job
  38. * @param $modulename
  39. * The name of the module adding the job
  40. * @param $callback
  41. * The name of a function to be called when the job is executed
  42. * @param $arguments
  43. * An array of arguments to be passed on to the callback
  44. * @param $uid
  45. * The uid of the user adding the job
  46. * @param $priority
  47. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  48. * is 1. The default priority is 10.
  49. * @param $includes
  50. * An array of paths to files that should be included in order to execute
  51. * the job. Use the module_load_include function to get a path for a given
  52. * file.
  53. * @return
  54. * The job_id of the registered job
  55. *
  56. * Example usage:
  57. *
  58. * @code
  59. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  60. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  61. * $user->uid, $analysis_id, $match_type);
  62. *
  63. * $includes = array()
  64. * $includes[] = module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.fasta_loader');
  65. *
  66. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  67. * 'tripal_feature_load_fasta', $args, $user->uid, 10, $includes);
  68. * @endcode
  69. *
  70. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  71. * snipped first builds an array of arguments that will then be passed to the
  72. * tripal_add_job function. The number of arguments provided in the $arguments
  73. * variable should match the argument set for the callback function provided
  74. * as the third argument.
  75. *
  76. * @ingroup tripal_jobs_api
  77. */
  78. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid,
  79. $priority = 10, $includes = array()) {
  80. global $user;
  81. // convert the arguments into a string for storage in the database
  82. $args = array();
  83. if (is_array($arguments)) {
  84. $args = serialize($arguments);
  85. }
  86. $job_id = db_insert('tripal_jobs')
  87. ->fields(array(
  88. 'job_name' => $job_name,
  89. 'modulename' => $modulename,
  90. 'callback' => $callback,
  91. 'status' => 'Waiting',
  92. 'submit_date' => REQUEST_TIME,
  93. 'uid' => $uid,
  94. # The lower the number the higher the priority.
  95. 'priority' => $priority,
  96. 'arguments' => $args,
  97. 'includes' => serialize($includes),
  98. ))
  99. ->execute();
  100. if ($job_id) {
  101. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
  102. if (user_access('administer tripal')) {
  103. $jobs_url = url("admin/tripal/tripal_jobs");
  104. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  105. array('!jobs_url' => $jobs_url)));
  106. drupal_set_message(t("You can execute the job queue manually on the command line " .
  107. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  108. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  109. }
  110. }
  111. else {
  112. drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)), 'error');
  113. }
  114. return $job_id;
  115. }
  116. /**
  117. * Retrieve information regarding a tripal job
  118. *
  119. * @param $job_id
  120. * The unique identifier of the job
  121. *
  122. * @return
  123. * An object describing the job
  124. *
  125. * @ingroup tripal_jobs_api
  126. */
  127. function tripal_get_job($job_id) {
  128. $job = db_query('SELECT j.* FROM {tripal_jobs} j WHERE j.job_id=:job_id', array(':job_id' => $job_id))
  129. ->fetchObject();
  130. return $job;
  131. }
  132. /**
  133. * Returns a list of running tripal jobs
  134. *
  135. * @return
  136. * and array of objects where each object describes a running job or FALSE if no jobs are running
  137. *
  138. * @ingroup tripal_jobs_api
  139. */
  140. function tripal_is_job_running() {
  141. // iterate through each job that has not ended
  142. // and see if it is still running. If it is not
  143. // running but does not have an end_time then
  144. // set the end time and set the status to 'Error'
  145. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  146. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  147. $jobs = db_query($sql);
  148. foreach ($jobs as $job) {
  149. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  150. if ($job->pid && $status) {
  151. // the job is still running so let it go
  152. // we return 1 to indicate that a job is running
  153. return TRUE;
  154. }
  155. else {
  156. // the job is not running so terminate it
  157. $record = new stdClass();
  158. $record->job_id = $job->job_id;
  159. $record->end_time = REQUEST_TIME;
  160. $record->status = 'Error';
  161. $record->error_msg = 'Job has terminated unexpectedly.';
  162. drupal_write_record('tripal_jobs', $record, 'job_id');
  163. }
  164. }
  165. // return 1 to indicate that no jobs are currently running.
  166. return FALSE;
  167. }
  168. /**
  169. * Returns the start time for a given job
  170. *
  171. * @param $job
  172. * An object describing the job
  173. *
  174. * @return
  175. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  176. *
  177. * @ingroup tripal_jobs_api
  178. */
  179. function tripal_get_job_start($job) {
  180. if ($job->start_time > 0) {
  181. $start = format_date($job->start_time);
  182. }
  183. else {
  184. if (strcmp($job->job_status, 'Cancelled')==0) {
  185. $start = 'Cancelled';
  186. }
  187. else {
  188. $start = 'Not Yet Started';
  189. }
  190. }
  191. return $start;
  192. }
  193. /**
  194. * Returns the end time for a given job
  195. *
  196. * @param $job
  197. * An object describing the job
  198. *
  199. * @return
  200. * The end time of the job if it was already run and empty otherwise
  201. *
  202. * @ingroup tripal_jobs_api
  203. */
  204. function tripal_get_job_end($job) {
  205. if ($job->end_time > 0) {
  206. $end = format_date($job->end_time);
  207. }
  208. else {
  209. $end = '';
  210. }
  211. return $end;
  212. }
  213. /**
  214. * Set a job to be re-ran (ie: add it back into the job queue)
  215. *
  216. * @param $job_id
  217. * The job_id of the job to be re-ran
  218. * @param $goto_jobs_page
  219. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  220. *
  221. * @ingroup tripal_jobs_api
  222. */
  223. function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
  224. global $user;
  225. $user_id = $user->uid;
  226. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  227. $results = db_query($sql, array(':job_id' => $job_id));
  228. $job = $results->fetchObject();
  229. // arguments for jobs used to be stored as plain string with a double colon
  230. // separating them. But as of Tripal v2.0 the arguments are stored as
  231. // a serialized array. To be backwards compatible, we should check for serialization
  232. // and if not then we will use the old style
  233. $args = unserialize($job->arguments);
  234. if (!$args) {
  235. $args = explode("::", $job->arguments);
  236. }
  237. $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority);
  238. if ($goto_jobs_page) {
  239. drupal_goto("admin/tripal/tripal_jobs");
  240. }
  241. return $job_id;
  242. }
  243. /**
  244. * Cancel a Tripal Job currently waiting in the job queue
  245. *
  246. * @param $job_id
  247. * The job_id of the job to be cancelled
  248. *
  249. * @ingroup tripal_jobs_api
  250. */
  251. function tripal_cancel_job($job_id, $redirect = TRUE) {
  252. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  253. $results = db_query($sql, array(':job_id' => $job_id));
  254. $job = $results->fetchObject();
  255. // set the end time for this job
  256. if ($job->start_time == 0) {
  257. $record = new stdClass();
  258. $record->job_id = $job->job_id;
  259. $record->end_time = REQUEST_TIME;
  260. $record->status = 'Cancelled';
  261. $record->progress = '0';
  262. drupal_write_record('tripal_jobs', $record, 'job_id');
  263. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  264. }
  265. else {
  266. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  267. }
  268. if ($redirect) {
  269. drupal_goto("admin/tripal/tripal_jobs");
  270. }
  271. }
  272. /**
  273. * A function used to manually launch all queued tripal jobs
  274. *
  275. * @param $do_parallel
  276. * A boolean indicating whether jobs should be attempted to run in parallel
  277. *
  278. * @param $job_id
  279. * To launch a specific job provide the job id. This option should be
  280. * used sparingly as the jobs queue managment system should launch jobs
  281. * based on order and priority. However there are times when a specific
  282. * job needs to be launched and this argument will allow it. Only jobs
  283. * which have not been run previously will run.
  284. *
  285. * @ingroup tripal_jobs_api
  286. */
  287. function tripal_launch_job($do_parallel = 0, $job_id = NULL) {
  288. // first check if any jobs are currently running
  289. // if they are, don't continue, we don't want to have
  290. // more than one job script running at a time
  291. if (!$do_parallel and tripal_is_job_running()) {
  292. print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  293. return;
  294. }
  295. // get all jobs that have not started and order them such that
  296. // they are processed in a FIFO manner.
  297. if ($job_id) {
  298. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  299. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
  300. "ORDER BY priority ASC,job_id ASC";
  301. $job_res = db_query($sql, array(':job_id' => $job_id));
  302. }
  303. else {
  304. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  305. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
  306. "ORDER BY priority ASC,job_id ASC";
  307. $job_res = db_query($sql);
  308. }
  309. foreach ($job_res as $job) {
  310. // Include the necessary files
  311. foreach (unserialize($job->includes) as $path) {
  312. if ($path) {
  313. require_once $path;
  314. }
  315. }
  316. // set the start time for this job
  317. $record = new stdClass();
  318. $record->job_id = $job->job_id;
  319. $record->start_time = REQUEST_TIME;
  320. $record->status = 'Running';
  321. $record->pid = getmypid();
  322. drupal_write_record('tripal_jobs', $record, 'job_id');
  323. // call the function provided in the callback column.
  324. // Add the job_id as the last item in the list of arguments. All
  325. // callback functions should support this argument.
  326. $callback = $job->callback;
  327. // arguments for jobs used to be stored as plain string with a double colon
  328. // separating them. But as of Tripal v2.0 the arguments are stored as
  329. // a serialized array. To be backwards compatible, we should check for serialization
  330. // and if not then we will use the old style
  331. $args = unserialize($job->arguments);
  332. if (!$args) {
  333. $args = explode("::", $job->arguments);
  334. }
  335. $args[] = $job->job_id;
  336. // We need to do some additional processing for printing since the switch
  337. // to serialized arrays now allows nested arrays which cause errors when
  338. // printed using implode alone.
  339. $string_args = array();
  340. foreach ($args as $k => $a) {
  341. if (is_array($a)) {
  342. $string_args[$k] = 'Array';
  343. }
  344. elseif (is_object($a)) {
  345. $string_args[$k] = 'Object';
  346. }
  347. else {
  348. $string_args[$k] = $a;
  349. }
  350. }
  351. print "Calling: $callback(" . implode(", ", $string_args) . ")\n";
  352. call_user_func_array($callback, $args);
  353. // set the end time for this job
  354. $record->end_time = REQUEST_TIME;
  355. $record->status = 'Completed';
  356. $record->progress = '100';
  357. drupal_write_record('tripal_jobs', $record, 'job_id');
  358. // send an email to the user advising that the job has finished
  359. }
  360. }
  361. /**
  362. * An internal function for setting the progress for a current job
  363. *
  364. * @param $job_id
  365. * The job_id to set the progress for
  366. * @param $percentage
  367. * The progress to set the job to
  368. *
  369. * @return
  370. * True on success and False otherwise
  371. *
  372. * @ingroup tripal
  373. */
  374. function tripal_set_job_progress($job_id, $percentage) {
  375. if (preg_match("/^(\d+|100)$/", $percentage)) {
  376. $record = new stdClass();
  377. $record->job_id = $job_id;
  378. $record->progress = $percentage;
  379. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  380. return TRUE;
  381. }
  382. }
  383. return FALSE;
  384. }
  385. /**
  386. * Returns a list of jobs associated with the given module
  387. *
  388. * @param $modulename
  389. * The module to return a list of jobs for
  390. *
  391. * @return
  392. * An array of objects where each object describes a tripal job
  393. *
  394. * @ingroup tripal_jobs_api
  395. */
  396. function tripal_get_active_jobs($modulename) {
  397. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  398. "WHERE TJ.end_time IS NULL and TJ.modulename = :modulename ";
  399. $results = db_query($sql, array(':modulename' => $modulename));
  400. return $results->fetchObject();
  401. }
  402. /**
  403. * Returns the date the job was added to the queue
  404. *
  405. * @param $job
  406. * An object describing the job
  407. *
  408. * @return
  409. * The date teh job was submitted
  410. *
  411. * @ingroup tripal_jobs_api
  412. */
  413. function tripal_get_job_submit_date($job) {
  414. return format_date($job->submit_date);
  415. }