tripal_core.jobs.api.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. <?php
  2. /**
  3. * @file
  4. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  5. * completion.
  6. */
  7. /**
  8. * @defgroup tripal_jobs_api Tripal Jobs API
  9. * @ingroup tripal_core_api
  10. * @{
  11. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  12. * completion. Drupal uses a UNIX-based cron job to handle tasks such as checking the availability of updates,
  13. * indexing new nodes for searching, etc. Drupal's cron uses the web interface for launching these tasks, however,
  14. * Tripal provides several administrative tasks that may time out and not complete due to limitations of the web
  15. * server. Examples including syncing of a large number of features between chado and Drupal. To circumvent this,
  16. * as well as provide more fine-grained control and monitoring, Tripal uses a jobs management sub-system built into
  17. * the Tripal Core module. It is anticipated that this functionality will be used for managing analysis jobs provided by
  18. * future tools, with eventual support for distributed computing.
  19. *
  20. * The Tripal jobs management system allows administrators to submit tasks to be performed which can then be
  21. * launched through a UNIX command-line PHP script or cron job. This command-line script can be added to a cron
  22. * entry along-side the Drupal cron entry for automatic, regular launching of Tripal jobs. The order of execution of
  23. * waiting jobs is determined first by priority and second by the order the jobs were entered.
  24. *
  25. * The API functions described below provide a programmatic interface for adding, checking and viewing jobs.
  26. * @}
  27. */
  28. /**
  29. * Adds a job to the Tripal Jbo queue
  30. *
  31. * @param $job_name
  32. * The human readable name for the job
  33. * @param $modulename
  34. * The name of the module adding the job
  35. * @param $callback
  36. * The name of a function to be called when the job is executed
  37. * @param $arguments
  38. * An array of arguments to be passed on to the callback
  39. * @param $uid
  40. * The uid of the user adding the job
  41. * @param $priority
  42. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  43. * is 1. The default priority is 10.
  44. *
  45. * @return
  46. * The job_id of the registered job
  47. *
  48. * Example usage:
  49. * @code
  50. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  51. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  52. * $user->uid, $analysis_id, $match_type);
  53. *
  54. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  55. * 'tripal_feature_load_fasta', $args, $user->uid);
  56. * @endcode
  57. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  58. * snipped first builds an array of arguments that will then be passed to the
  59. * tripal_add_job function. The number of arguments provided in the $arguments
  60. * variable should match the argument set for the callback function provided
  61. * as the third argument.
  62. *
  63. * @ingroup tripal_jobs_api
  64. */
  65. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  66. global $user;
  67. // convert the arguments into a string for storage in the database
  68. $args = array();
  69. if (is_array($arguments)) {
  70. $args = serialize($arguments);
  71. }
  72. $record = new stdClass();
  73. $record->job_name = $job_name;
  74. $record->modulename = $modulename;
  75. $record->callback = $callback;
  76. $record->status = 'Waiting';
  77. $record->submit_date = REQUEST_TIME;
  78. $record->uid = $uid;
  79. $record->priority = $priority; # the lower the number the higher the priority
  80. $record->arguments = $args;
  81. if (drupal_write_record('tripal_jobs', $record)) {
  82. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
  83. if (user_access('administer tripal')) {
  84. $jobs_url = url("admin/tripal/tripal_jobs");
  85. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  86. array('!jobs_url' => $jobs_url)));
  87. drupal_set_message(t("You can execute the job queue manually on the command line " .
  88. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  89. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  90. }
  91. }
  92. else {
  93. drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)), 'error');
  94. }
  95. return $record->job_id;
  96. }
  97. /**
  98. * Retrieve information regarding a tripal job
  99. *
  100. * @param $job_id
  101. * The unique identifier of the job
  102. *
  103. * @return
  104. * An object describing the job
  105. *
  106. * @ingroup tripal_jobs_api
  107. */
  108. function tripal_get_job($job_id) {
  109. $job = db_query('SELECT j.* FROM {tripal_jobs} j WHERE j.job_id=:job_id', array(':job_id' => $job_id))
  110. ->fetchObject();
  111. return $job;
  112. }
  113. /**
  114. * Returns a list of running tripal jobs
  115. *
  116. * @return
  117. * and array of objects where each object describes a running job or FALSE if no jobs are running
  118. *
  119. * @ingroup tripal_jobs_api
  120. */
  121. function tripal_is_job_running() {
  122. // iterate through each job that has not ended
  123. // and see if it is still running. If it is not
  124. // running but does not have an end_time then
  125. // set the end time and set the status to 'Error'
  126. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  127. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  128. $jobs = db_query($sql);
  129. foreach ($jobs as $job) {
  130. $status = `ps -p $job->pid -o pid=`;
  131. if ($job->pid && $status) {
  132. // the job is still running so let it go
  133. // we return 1 to indicate that a job is running
  134. return TRUE;
  135. }
  136. else {
  137. // the job is not running so terminate it
  138. $record = new stdClass();
  139. $record->job_id = $job->job_id;
  140. $record->end_time = REQUEST_TIME;
  141. $record->status = 'Error';
  142. $record->error_msg = 'Job has terminated unexpectedly.';
  143. drupal_write_record('tripal_jobs', $record, 'job_id');
  144. }
  145. }
  146. // return 1 to indicate that no jobs are currently running.
  147. return FALSE;
  148. }
  149. /**
  150. * Returns the start time for a given job
  151. *
  152. * @param $job
  153. * An object describing the job
  154. *
  155. * @return
  156. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  157. *
  158. * @ingroup tripal_jobs_api
  159. */
  160. function tripal_get_job_start($job) {
  161. if ($job->start_time > 0) {
  162. $start = format_date($job->start_time);
  163. }
  164. else {
  165. if (strcmp($job->job_status, 'Cancelled')==0) {
  166. $start = 'Cancelled';
  167. }
  168. else {
  169. $start = 'Not Yet Started';
  170. }
  171. }
  172. return $start;
  173. }
  174. /**
  175. * Returns the end time for a given job
  176. *
  177. * @param $job
  178. * An object describing the job
  179. *
  180. * @return
  181. * The end time of the job if it was already run and empty otherwise
  182. *
  183. * @ingroup tripal_jobs_api
  184. */
  185. function tripal_get_job_end($job) {
  186. if ($job->end_time > 0) {
  187. $end = format_date($job->end_time);
  188. }
  189. else {
  190. $end = '';
  191. }
  192. return $end;
  193. }
  194. /**
  195. * Set a job to be re-ran (ie: add it back into the job queue)
  196. *
  197. * @param $job_id
  198. * The job_id of the job to be re-ran
  199. * @param $goto_jobs_page
  200. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  201. *
  202. * @ingroup tripal_jobs_api
  203. */
  204. function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
  205. global $user;
  206. $user_id = $user->uid;
  207. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  208. $results = db_query($sql, array(':job_id' => $job_id));
  209. $job = $results->fetchObject();
  210. // arguments for jobs used to be stored as plain string with a double colon
  211. // separating them. But as of Tripal v2.0 the arguments are stored as
  212. // a serialized array. To be backwards compatible, we should check for serialization
  213. // and if not then we will use the old style
  214. $args = unserialize($job->arguments);
  215. if (!$args) {
  216. $args = explode("::", $job->arguments);
  217. }
  218. $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority);
  219. if ($goto_jobs_page) {
  220. drupal_goto("admin/tripal/tripal_jobs");
  221. }
  222. return $job_id;
  223. }
  224. /**
  225. * Cancel a Tripal Job currently waiting in the job queue
  226. *
  227. * @param $job_id
  228. * The job_id of the job to be cancelled
  229. *
  230. * @ingroup tripal_jobs_api
  231. */
  232. function tripal_cancel_job($job_id, $redirect = TRUE) {
  233. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
  234. $results = db_query($sql, array(':job_id' => $job_id));
  235. $job = $results->fetchObject();
  236. // set the end time for this job
  237. if ($job->start_time == 0) {
  238. $record = new stdClass();
  239. $record->job_id = $job->job_id;
  240. $record->end_time = REQUEST_TIME;
  241. $record->status = 'Cancelled';
  242. $record->progress = '0';
  243. drupal_write_record('tripal_jobs', $record, 'job_id');
  244. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  245. }
  246. else {
  247. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  248. }
  249. if ($redirect) {
  250. drupal_goto("admin/tripal/tripal_jobs");
  251. }
  252. }
  253. /**
  254. * A function used to manually launch all queued tripal jobs
  255. *
  256. * @param $do_parallel
  257. * A boolean indicating whether jobs should be attempted to run in parallel
  258. *
  259. * @param $job_id
  260. * To launch a specific job provide the job id. This option should be
  261. * used sparingly as the jobs queue managment system should launch jobs
  262. * based on order and priority. However there are times when a specific
  263. * job needs to be launched and this argument will allow it. Only jobs
  264. * which have not been run previously will run.
  265. *
  266. * @ingroup tripal_jobs_api
  267. */
  268. function tripal_launch_job($do_parallel = 0, $job_id = NULL) {
  269. // first check if any jobs are currently running
  270. // if they are, don't continue, we don't want to have
  271. // more than one job script running at a time
  272. if (!$do_parallel and tripal_is_job_running()) {
  273. print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  274. return;
  275. }
  276. // get all jobs that have not started and order them such that
  277. // they are processed in a FIFO manner.
  278. if ($job_id) {
  279. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  280. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
  281. "ORDER BY priority ASC,job_id ASC";
  282. $job_res = db_query($sql, array(':job_id' => $job_id));
  283. }
  284. else {
  285. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  286. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
  287. "ORDER BY priority ASC,job_id ASC";
  288. $job_res = db_query($sql);
  289. }
  290. foreach ($job_res as $job) {
  291. // set the start time for this job
  292. $record = new stdClass();
  293. $record->job_id = $job->job_id;
  294. $record->start_time = REQUEST_TIME;
  295. $record->status = 'Running';
  296. $record->pid = getmypid();
  297. drupal_write_record('tripal_jobs', $record, 'job_id');
  298. // call the function provided in the callback column.
  299. // Add the job_id as the last item in the list of arguments. All
  300. // callback functions should support this argument.
  301. $callback = $job->callback;
  302. // arguments for jobs used to be stored as plain string with a double colon
  303. // separating them. But as of Tripal v2.0 the arguments are stored as
  304. // a serialized array. To be backwards compatible, we should check for serialization
  305. // and if not then we will use the old style
  306. $args = unserialize($job->arguments);
  307. if (!$args) {
  308. $args = explode("::", $job->arguments);
  309. }
  310. $args[] = $job->job_id;
  311. // We need to do some additional processing for printing since the switch
  312. // to serialized arrays now allows nested arrays which cause errors when
  313. // printed using implode alone.
  314. $string_args = array();
  315. foreach ($args as $k => $a) {
  316. if (is_array($a)) {
  317. $string_args[$k] = 'Array';
  318. }
  319. elseif (is_object($a)) {
  320. $string_args[$k] = 'Object';
  321. }
  322. else {
  323. $string_args[$k] = $a;
  324. }
  325. }
  326. print "Calling: $callback(" . implode(", ", $string_args) . ")\n";
  327. call_user_func_array($callback, $args);
  328. // set the end time for this job
  329. $record->end_time = REQUEST_TIME;
  330. $record->status = 'Completed';
  331. $record->progress = '100';
  332. drupal_write_record('tripal_jobs', $record, 'job_id');
  333. // send an email to the user advising that the job has finished
  334. }
  335. }
  336. /**
  337. * An internal function for setting the progress for a current job
  338. *
  339. * @param $job_id
  340. * The job_id to set the progress for
  341. * @param $percentage
  342. * The progress to set the job to
  343. *
  344. * @return
  345. * True on success and False otherwise
  346. *
  347. * @ingroup tripal_core
  348. */
  349. function tripal_set_job_progress($job_id, $percentage) {
  350. if (preg_match("/^(\d+|100)$/", $percentage)) {
  351. $record = new stdClass();
  352. $record->job_id = $job_id;
  353. $record->progress = $percentage;
  354. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  355. return TRUE;
  356. }
  357. }
  358. return FALSE;
  359. }
  360. /**
  361. * Returns a list of jobs associated with the given module
  362. *
  363. * @param $modulename
  364. * The module to return a list of jobs for
  365. *
  366. * @return
  367. * An array of objects where each object describes a tripal job
  368. *
  369. * @ingroup tripal_jobs_api
  370. */
  371. function tripal_get_active_jobs($modulename) {
  372. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  373. "WHERE TJ.end_time IS NULL and TJ.modulename = :modulename ";
  374. $results = db_query($sql, array(':modulename' => $modulename));
  375. return $results->fetchObject();
  376. }
  377. /**
  378. * Returns the date the job was added to the queue
  379. *
  380. * @param $job
  381. * An object describing the job
  382. *
  383. * @return
  384. * The date teh job was submitted
  385. *
  386. * @ingroup tripal_jobs_api
  387. */
  388. function tripal_get_job_submit_date($job) {
  389. return format_date($job->submit_date);
  390. }