tripal_core.jobs.api.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. <?php
  2. /**
  3. * @defgroup tripal_jobs_api Jobs API
  4. * @ingroup tripal_core_api
  5. * @{
  6. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  7. * completion. Drupal uses a UNIX-based cron job to handle tasks such as checking the availability of updates,
  8. * indexing new nodes for searching, etc. Drupal's cron uses the web interface for launching these tasks, however,
  9. * Tripal provides several administrative tasks that may time out and not complete due to limitations of the web
  10. * server. Examples including syncing of a large number of features between chado and Drupal. To circumvent this,
  11. * as well as provide more fine-grained control and monitoring, Tripal uses a jobs management sub-system built into
  12. * the Tripal Core module. It is anticipated that this functionality will be used for managing analysis jobs provided by
  13. * future tools, with eventual support for distributed computing.
  14. *
  15. * The Tripal jobs management system allows administrators to submit tasks to be performed which can then be
  16. * launched through a UNIX command-line PHP script or cron job. This command-line script can be added to a cron
  17. * entry along-side the Drupal cron entry for automatic, regular launching of Tripal jobs. The order of execution of
  18. * waiting jobs is determined first by priority and second by the order the jobs were entered.
  19. *
  20. * The API functions described below provide a programmatic interface for adding, checking and viewing jobs.
  21. * @}
  22. */
  23. /**
  24. * Adds a job to the Tripal Jbo queue
  25. *
  26. * @param $job_name
  27. * The human readable name for the job
  28. * @param $modulename
  29. * The name of the module adding the job
  30. * @param $callback
  31. * The name of a function to be called when the job is executed
  32. * @param $arguments
  33. * An array of arguments to be passed on to the callback
  34. * @param $uid
  35. * The uid of the user adding the job
  36. * @param $priority
  37. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  38. * is 1. The default priority is 10.
  39. *
  40. * @return
  41. * The job_id of the registered job
  42. *
  43. * Example usage:
  44. * @code
  45. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  46. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  47. * $user->uid, $analysis_id, $match_type);
  48. *
  49. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  50. * 'tripal_feature_load_fasta', $args, $user->uid);
  51. * @endcode
  52. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  53. * snipped first builds an array of arguments that will then be passed to the
  54. * tripal_add_job function. The number of arguments provided in the $arguments
  55. * variable should match the argument set for the callback function provided
  56. * as the third argument.
  57. *
  58. * @ingroup tripal_jobs_api
  59. */
  60. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  61. // convert the arguments into a string for storage in the database
  62. $args = array();
  63. if (is_array($arguments)) {
  64. $args = serialize($arguments);
  65. }
  66. $record = new stdClass();
  67. $record->job_name = $job_name;
  68. $record->modulename = $modulename;
  69. $record->callback = $callback;
  70. $record->status = 'Waiting';
  71. $record->submit_date = REQUEST_TIME;
  72. $record->uid = $uid;
  73. $record->priority = $priority; # the lower the number the higher the priority
  74. $record->arguments = $args;
  75. if (drupal_write_record('tripal_jobs', $record)) {
  76. $jobs_url = url("admin/tripal/tripal_jobs");
  77. drupal_set_message(t("Job '%job_name' submitted. Check the <a href='!jobs_url'>jobs page</a> for status", array('%job_name' => $job_name, '!jobs_url' => $jobs_url)));
  78. }
  79. else {
  80. drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)), 'error');
  81. }
  82. return $record->job_id;
  83. }
  84. /**
  85. * Returns a list of running tripal jobs
  86. *
  87. * @return
  88. * and array of objects where each object describes a running job or FALSE if no jobs are running
  89. *
  90. * @ingroup tripal_jobs_api
  91. */
  92. function tripal_jobs_check_running() {
  93. // iterate through each job that has not ended
  94. // and see if it is still running. If it is not
  95. // running but does not have an end_time then
  96. // set the end time and set the status to 'Error'
  97. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  98. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  99. $jobs = db_query($sql);
  100. foreach ($jobs as $job) {
  101. $status = `ps -p $job->pid -o pid=`;
  102. if ($job->pid && $status) {
  103. // the job is still running so let it go
  104. // we return 1 to indicate that a job is running
  105. return TRUE;
  106. }
  107. else {
  108. // the job is not running so terminate it
  109. $record = new stdClass();
  110. $record->job_id = $job->job_id;
  111. $record->end_time = REQUEST_TIME;
  112. $record->status = 'Error';
  113. $record->error_msg = 'Job has terminated unexpectedly.';
  114. drupal_write_record('tripal_jobs', $record, 'job_id');
  115. }
  116. }
  117. // return 1 to indicate that no jobs are currently running.
  118. return FALSE;
  119. }
  120. /**
  121. * Returns the start time for a given job
  122. *
  123. * @param $job
  124. * An object describing the job
  125. *
  126. * @return
  127. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  128. *
  129. * @ingroup tripal_jobs_api
  130. */
  131. function tripal_jobs_get_start_time($job) {
  132. if ($job->start_time > 0) {
  133. $start = format_date($job->start_time);
  134. }
  135. else {
  136. if (strcmp($job->job_status, 'Cancelled')==0) {
  137. $start = 'Cancelled';
  138. }
  139. else {
  140. $start = 'Not Yet Started';
  141. }
  142. }
  143. return $start;
  144. }
  145. /**
  146. * Returns the end time for a given job
  147. *
  148. * @param $job
  149. * An object describing the job
  150. *
  151. * @return
  152. * The end time of the job if it was already run and empty otherwise
  153. *
  154. * @ingroup tripal_jobs_api
  155. */
  156. function tripal_jobs_get_end_time($job) {
  157. if ($job->end_time > 0) {
  158. $end = format_date($job->end_time);
  159. }
  160. else {
  161. $end = '';
  162. }
  163. return $end;
  164. }
  165. /**
  166. * Set a job to be re-ran (ie: add it back into the job queue)
  167. *
  168. * @param $job_id
  169. * The job_id of the job to be re-ran
  170. * @param $goto_jobs_page
  171. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  172. *
  173. * @ingroup tripal_jobs_api
  174. */
  175. function tripal_jobs_rerun($job_id, $goto_jobs_page = TRUE) {
  176. global $user;
  177. $user_id = $user->uid;
  178. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  179. $results = db_query($sql, array(':job_id' => $job_id));
  180. $job = $results->fetchObject();
  181. // arguments for jobs used to be stored as plain string with a double colon
  182. // separating them. But as of Tripal v2.0 the arguments are stored as
  183. // a serialized array. To be backwards compatible, we should check for serialization
  184. // and if not then we will use the old style
  185. $args = unserialize($job->arguments);
  186. if (!$args) {
  187. $args = explode("::", $job->arguments);
  188. }
  189. $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority);
  190. if ($goto_jobs_page) {
  191. drupal_goto("admin/tripal/tripal_jobs");
  192. }
  193. return $job_id;
  194. }
  195. /**
  196. * Cancel a Tripal Job currently waiting in the job queue
  197. *
  198. * @param $job_id
  199. * The job_id of the job to be cancelled
  200. *
  201. * @ingroup tripal_jobs_api
  202. */
  203. function tripal_jobs_cancel($job_id, $redirect = TRUE) {
  204. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  205. $results = db_query($sql, array(':job_id' => $job_id));
  206. $job = $results->fetchObject();
  207. // set the end time for this job
  208. if ($job->start_time == 0) {
  209. $record = new stdClass();
  210. $record->job_id = $job->job_id;
  211. $record->end_time = REQUEST_TIME;
  212. $record->status = 'Cancelled';
  213. $record->progress = '0';
  214. drupal_write_record('tripal_jobs', $record, 'job_id');
  215. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  216. }
  217. else {
  218. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  219. }
  220. if ($redirect) {
  221. drupal_goto("admin/tripal/tripal_jobs");
  222. }
  223. }
  224. /**
  225. * A function used to manually launch all queued tripal jobs
  226. *
  227. * @param $do_parallel
  228. * A boolean indicating whether jobs should be attempted to run in parallel
  229. *
  230. * @param $job_id
  231. * To launch a specific job provide the job id. This option should be
  232. * used sparingly as the jobs queue managment system should launch jobs
  233. * based on order and priority. However there are times when a specific
  234. * job needs to be launched and this argument will allow it. Only jobs
  235. * which have not been run previously will run.
  236. *
  237. * @ingroup tripal_jobs_api
  238. */
  239. function tripal_jobs_launch($do_parallel = 0, $job_id = NULL) {
  240. // first check if any jobs are currently running
  241. // if they are, don't continue, we don't want to have
  242. // more than one job script running at a time
  243. if (!$do_parallel and tripal_jobs_check_running()) {
  244. print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  245. return;
  246. }
  247. // get all jobs that have not started and order them such that
  248. // they are processed in a FIFO manner.
  249. if ($job_id) {
  250. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  251. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
  252. "ORDER BY priority ASC,job_id ASC";
  253. $job_res = db_query($sql, array(':job_id', $job_id));
  254. }
  255. else {
  256. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  257. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
  258. "ORDER BY priority ASC,job_id ASC";
  259. $job_res = db_query($sql);
  260. }
  261. foreach ($job_res as $job) {
  262. // set the start time for this job
  263. $record = new stdClass();
  264. $record->job_id = $job->job_id;
  265. $record->start_time = REQUEST_TIME;
  266. $record->status = 'Running';
  267. $record->pid = getmypid();
  268. drupal_write_record('tripal_jobs', $record, 'job_id');
  269. // call the function provided in the callback column.
  270. // Add the job_id as the last item in the list of arguments. All
  271. // callback functions should support this argument.
  272. $callback = $job->callback;
  273. // arguments for jobs used to be stored as plain string with a double colon
  274. // separating them. But as of Tripal v2.0 the arguments are stored as
  275. // a serialized array. To be backwards compatible, we should check for serialization
  276. // and if not then we will use the old style
  277. $args = unserialize($job->arguments);
  278. if (!$args) {
  279. $args = explode("::", $job->arguments);
  280. }
  281. $args[] = $job->job_id;
  282. print "Calling: $callback(" . implode(", ", $args) . ")\n";
  283. call_user_func_array($callback, $args);
  284. // set the end time for this job
  285. $record->end_time = REQUEST_TIME;
  286. $record->status = 'Completed';
  287. $record->progress = '100';
  288. drupal_write_record('tripal_jobs', $record, 'job_id');
  289. // send an email to the user advising that the job has finished
  290. }
  291. }
  292. /**
  293. * An internal function for setting the progress for a current job
  294. *
  295. * @param $job_id
  296. * The job_id to set the progress for
  297. * @param $percentage
  298. * The progress to set the job to
  299. *
  300. * @return
  301. * True on success and False otherwise
  302. *
  303. * @ingroup tripal_core
  304. */
  305. function tripal_job_set_progress($job_id, $percentage) {
  306. if (preg_match("/^(\d+|100)$/", $percentage)) {
  307. $record = new stdClass();
  308. $record->job_id = $job_id;
  309. $record->progress = $percentage;
  310. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  311. return TRUE;
  312. }
  313. }
  314. return FALSE;
  315. }
  316. /**
  317. * Returns a list of jobs associated with the given module
  318. *
  319. * @param $modulename
  320. * The module to return a list of jobs for
  321. *
  322. * @return
  323. * An array of objects where each object describes a tripal job
  324. *
  325. * @ingroup tripal_jobs_api
  326. */
  327. function tripal_get_module_active_jobs($modulename) {
  328. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  329. "WHERE TJ.end_time IS NULL and TJ.modulename = :modulename ";
  330. $results = db_query($sql, array(':modulename' => $modulename));
  331. return $results->fetchObject();
  332. }
  333. /**
  334. * Returns the date the job was added to the queue
  335. *
  336. * @param $job
  337. * An object describing the job
  338. *
  339. * @return
  340. * The date teh job was submitted
  341. *
  342. * @ingroup tripal_jobs_api
  343. */
  344. function tripal_jobs_get_submit_date($job) {
  345. return format_date($job->submit_date);
  346. }