jobs.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. <?php
  2. /**
  3. * @file
  4. * Contains functions related to the Tripal Jobs API
  5. *
  6. * @defgroup tripal_jobs_api Core Module Jobs API
  7. * @{
  8. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  9. * completion. Drupal uses a UNIX-based cron job to handle tasks such as checking the availability of updates,
  10. * indexing new nodes for searching, etc. Drupal's cron uses the web interface for launching these tasks, however,
  11. * Tripal provides several administrative tasks that may time out and not complete due to limitations of the web
  12. * server. Examples including syncing of a large number of features between chado and Drupal. To circumvent this,
  13. * as well as provide more fine-grained control and monitoring, Tripal uses a jobs management sub-system built into
  14. * the Tripal Core module. It is anticipated that this functionality will be used for managing analysis jobs provided by
  15. * future tools, with eventual support for distributed computing.
  16. *
  17. * The Tripal jobs management system allows administrators to submit tasks to be performed which can then be
  18. * launched through a UNIX command-line PHP script or cron job. This command-line script can be added to a cron
  19. * entry along-side the Drupal cron entry for automatic, regular launching of Tripal jobs. The order of execution of
  20. * waiting jobs is determined first by priority and second by the order the jobs were entered.
  21. *
  22. * The API functions described below provide a programmatic interface for adding, checking and viewing jobs.
  23. * @}
  24. * @ingroup tripal_api
  25. */
  26. /**
  27. * Adds a job to the Tripal Jbo queue
  28. *
  29. * @param $job_name
  30. * The human readable name for the job
  31. * @param $modulename
  32. * The name of the module adding the job
  33. * @param $callback
  34. * The name of a function to be called when the job is executed
  35. * @param $arguments
  36. * An array of arguements to be passed on to the callback
  37. * @param $uid
  38. * The uid of the user adding the job
  39. * @param $priority
  40. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  41. * is 1. The default priority is 10.
  42. *
  43. * @return
  44. * The job_id of the registered job
  45. *
  46. * @ingroup tripal_jobs_api
  47. */
  48. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid, $priority = 10) {
  49. // convert the arguments into a string for storage in the database
  50. $args = implode("::", $arguments);
  51. $record = new stdClass();
  52. $record->job_name = $job_name;
  53. $record->modulename = $modulename;
  54. $record->callback = $callback;
  55. $record->status = 'Waiting';
  56. $record->submit_date = time();
  57. $record->uid = $uid;
  58. $record->priority = $priority; # the lower the number the higher the priority
  59. if ($args) {
  60. $record->arguments = $args;
  61. }
  62. if (drupal_write_record('tripal_jobs', $record)) {
  63. $jobs_url = url("admin/tripal/tripal_jobs");
  64. drupal_set_message(t("Job '%job_name' submitted. Check the %jobs_link for status", array('%job_name' => $job_name, '%jobs_link' => l($jobs_url, 'jobs page'))));
  65. }
  66. else {
  67. drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)), 'error');
  68. }
  69. return $record->job_id;
  70. }
  71. /**
  72. * An internal function for setting the progress for a current job
  73. *
  74. * @param $job_id
  75. * The job_id to set the progress for
  76. * @param $percentage
  77. * The progress to set the job to
  78. *
  79. * @return
  80. * True on success and False otherwise
  81. *
  82. * @ingroup tripal_core
  83. */
  84. function tripal_job_set_progress($job_id, $percentage) {
  85. if (preg_match("/^(\d+|100)$/", $percentage)) {
  86. $record = new stdClass();
  87. $record->job_id = $job_id;
  88. $record->progress = $percentage;
  89. if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
  90. return TRUE;
  91. }
  92. }
  93. return FALSE;
  94. }
  95. /**
  96. * Returns a list of jobs associated with the given module
  97. *
  98. * @param $modulename
  99. * The module to return a list of jobs for
  100. *
  101. * @return
  102. * An array of objects where each object describes a tripal job
  103. *
  104. * @ingroup tripal_jobs_api
  105. */
  106. function tripal_get_module_active_jobs($modulename) {
  107. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  108. "WHERE TJ.end_time IS NULL and TJ.modulename = '%s' ";
  109. return db_fetch_object(db_query($sql, $modulename));
  110. }
  111. /**
  112. * Returns the Tripal Job Report
  113. *
  114. * @return
  115. * The HTML to be rendered which describes the job report
  116. *
  117. * @ingroup tripal_core
  118. */
  119. function tripal_jobs_report() {
  120. //$jobs = db_query("SELECT * FROM {tripal_jobs} ORDER BY job_id DESC");
  121. $jobs = pager_query(
  122. "SELECT TJ.job_id,TJ.uid,TJ.job_name,TJ.modulename,TJ.progress,
  123. TJ.status as job_status, TJ,submit_date,TJ.start_time,
  124. TJ.end_time,TJ.priority,U.name as username
  125. FROM {tripal_jobs} TJ
  126. INNER JOIN {users} U on TJ.uid = U.uid
  127. ORDER BY job_id DESC", 10, 0, "SELECT count(*) FROM {tripal_jobs}");
  128. // create a table with each row containig stats for
  129. // an individual job in the results set.
  130. $output .= "Waiting jobs are executed first by priority level (the lower the ".
  131. "number the higher the priority) and second by the order they ".
  132. "were entered";
  133. $output .= "<table class=\"tripal-table tripal-table-horz\">".
  134. " <tr>".
  135. " <th>Job ID</th>".
  136. " <th>User</th>".
  137. " <th>Job Name</th>".
  138. " <th nowrap>Dates</th>".
  139. " <th>Priority</th>".
  140. " <th>Progress</th>".
  141. " <th>Status</th>".
  142. " <th>Actions</th>".
  143. " </tr>";
  144. $i = 0;
  145. while ($job = db_fetch_object($jobs)) {
  146. $class = 'tripal-table-odd-row';
  147. if ($i % 2 == 0 ) {
  148. $class = 'tripal-table-even-row';
  149. }
  150. $submit = tripal_jobs_get_submit_date($job);
  151. $start = tripal_jobs_get_start_time($job);
  152. $end = tripal_jobs_get_end_time($job);
  153. $cancel_link = '';
  154. if ($job->start_time == 0 and $job->end_time == 0) {
  155. $cancel_link = "<a href=\"" . url("admin/tripal/tripal_jobs/cancel/" . $job->job_id) . "\">Cancel</a><br />";
  156. }
  157. $rerun_link = "<a href=\"" . url("admin/tripal/tripal_jobs/rerun/" . $job->job_id) . "\">Re-run</a><br />";
  158. $view_link ="<a href=\"" . url("admin/tripal/tripal_jobs/view/" . $job->job_id) . "\">View</a>";
  159. $output .= " <tr class=\"$class\">";
  160. $output .= " <td>$job->job_id</td>".
  161. " <td>$job->username</td>".
  162. " <td>$job->job_name</td>".
  163. " <td nowrap>Submit Date: $submit".
  164. " <br />Start Time: $start".
  165. " <br />End Time: $end</td>".
  166. " <td>$job->priority</td>".
  167. " <td>$job->progress%</td>".
  168. " <td>$job->job_status</td>".
  169. " <td>$cancel_link $rerun_link $view_link</td>".
  170. " </tr>";
  171. $i++;
  172. }
  173. $output .= "</table>";
  174. $output .= theme_pager();
  175. return $output;
  176. }
  177. /**
  178. * Returns the start time for a given job
  179. *
  180. * @param $job
  181. * An object describing the job
  182. *
  183. * @return
  184. * The start time of the job if it was already run and either "Cancelled" or "Not Yet Started" otherwise
  185. *
  186. * @ingroup tripal_jobs_api
  187. */
  188. function tripal_jobs_get_start_time($job) {
  189. if ($job->start_time > 0) {
  190. $start = format_date($job->start_time);
  191. }
  192. else {
  193. if (strcmp($job->job_status, 'Cancelled')==0) {
  194. $start = 'Cancelled';
  195. }
  196. else {
  197. $start = 'Not Yet Started';
  198. }
  199. }
  200. return $start;
  201. }
  202. /**
  203. * Returns the end time for a given job
  204. *
  205. * @param $job
  206. * An object describing the job
  207. *
  208. * @return
  209. * The end time of the job if it was already run and empty otherwise
  210. *
  211. * @ingroup tripal_jobs_api
  212. */
  213. function tripal_jobs_get_end_time($job) {
  214. if ($job->end_time > 0) {
  215. $end = format_date($job->end_time);
  216. }
  217. else {
  218. $end = '';
  219. }
  220. return $end;
  221. }
  222. /**
  223. * Returns the date the job was added to the queue
  224. *
  225. * @param $job
  226. * An object describing the job
  227. *
  228. * @return
  229. * The date teh job was submitted
  230. *
  231. * @ingroup tripal_jobs_api
  232. */
  233. function tripal_jobs_get_submit_date($job) {
  234. return format_date($job->submit_date);
  235. }
  236. /**
  237. * A function used to manually launch all queued tripal jobs
  238. *
  239. * @param $do_parallel
  240. * A boolean indicating whether jobs should be attempted to run in parallel
  241. *
  242. * @ingroup tripal_jobs_api
  243. */
  244. function tripal_jobs_launch($do_parallel = 0) {
  245. // first check if any jobs are currently running
  246. // if they are, don't continue, we don't want to have
  247. // more than one job script running at a time
  248. if (!$do_parallel and tripal_jobs_check_running()) {
  249. return;
  250. }
  251. // get all jobs that have not started and order them such that
  252. // they are processed in a FIFO manner.
  253. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  254. "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL ".
  255. "ORDER BY priority ASC,job_id ASC";
  256. $job_res = db_query($sql);
  257. while ($job = db_fetch_object($job_res)) {
  258. // set the start time for this job
  259. $record = new stdClass();
  260. $record->job_id = $job->job_id;
  261. $record->start_time = time();
  262. $record->status = 'Running';
  263. $record->pid = getmypid();
  264. drupal_write_record('tripal_jobs', $record, 'job_id');
  265. // call the function provided in the callback column.
  266. // Add the job_id as the last item in the list of arguments. All
  267. // callback functions should support this argument.
  268. $callback = $job->callback;
  269. $args = split("::", $job->arguments);
  270. $args[] = $job->job_id;
  271. print "Calling: $callback(" . implode(", ", $args) . ")\n";
  272. call_user_func_array($callback, $args);
  273. // set the end time for this job
  274. $record->end_time = time();
  275. $record->status = 'Completed';
  276. $record->progress = '100';
  277. drupal_write_record('tripal_jobs', $record, 'job_id');
  278. // send an email to the user advising that the job has finished
  279. }
  280. }
  281. /**
  282. * Returns a list of running tripal jobs
  283. *
  284. * @return
  285. * and array of objects where each object describes a running job or FALSE if no jobs are running
  286. *
  287. * @ingroup tripal_jobs_api
  288. */
  289. function tripal_jobs_check_running() {
  290. // iterate through each job that has not ended
  291. // and see if it is still running. If it is not
  292. // running but does not have an end_time then
  293. // set the end time and set the status to 'Error'
  294. $sql = "SELECT * FROM {tripal_jobs} TJ ".
  295. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  296. $jobs = db_query($sql);
  297. while ($job = db_fetch_object($jobs)) {
  298. $status = `ps --pid=$job->pid --no-header`;
  299. if ($job->pid && $status) {
  300. // the job is still running so let it go
  301. // we return 1 to indicate that a job is running
  302. print "Job is still running (pid $job->pid)\n";
  303. return TRUE;
  304. }
  305. else {
  306. // the job is not running so terminate it
  307. $record = new stdClass();
  308. $record->job_id = $job->job_id;
  309. $record->end_time = time();
  310. $record->status = 'Error';
  311. $record->error_msg = 'Job has terminated unexpectedly.';
  312. drupal_write_record('tripal_jobs', $record, 'job_id');
  313. }
  314. }
  315. // return 1 to indicate that no jobs are currently running.
  316. return FALSE;
  317. }
  318. /**
  319. * Returns the HTML code to display a given job
  320. *
  321. * @param $job_id
  322. * The job_id of the job to display
  323. *
  324. * @return
  325. * The HTML describing the indicated job
  326. * @ingroup tripal_core
  327. */
  328. function tripal_jobs_view($job_id) {
  329. return theme('tripal_core_job_view', $job_id);
  330. }
  331. /**
  332. * Registers variables for the tripal_core_job_view themeing function
  333. *
  334. * @param $variables
  335. * An array containing all variables supplied to this template
  336. *
  337. * @ingroup tripal_core
  338. */
  339. function tripal_core_preprocess_tripal_core_job_view(&$variables) {
  340. // get the job record
  341. $job_id = $variables['job_id'];
  342. $sql =
  343. "SELECT TJ.job_id,TJ.uid,TJ.job_name,TJ.modulename,TJ.progress,
  344. TJ.status as job_status, TJ,submit_date,TJ.start_time,
  345. TJ.end_time,TJ.priority,U.name as username,TJ.arguments,
  346. TJ.callback,TJ.error_msg,TJ.pid
  347. FROM {tripal_jobs} TJ
  348. INNER JOIN users U on TJ.uid = U.uid
  349. WHERE TJ.job_id = %d";
  350. $job = db_fetch_object(db_query($sql, $job_id));
  351. // we do not know what the arguments are for and we want to provide a
  352. // meaningful description to the end-user. So we use a callback function
  353. // deinfed in the module that created the job to describe in an array
  354. // the arguments provided. If the callback fails then just use the
  355. // arguments as they are
  356. $args = preg_split("/::/", $job->arguments);
  357. $arg_hook = $job->modulename . "_job_describe_args";
  358. if (is_callable($arg_hook)) {
  359. $new_args = call_user_func_array($arg_hook, array($job->callback, $args));
  360. if (is_array($new_args) and count($new_args)) {
  361. $job->arguments = $new_args;
  362. }
  363. else {
  364. $job->arguments = $args;
  365. }
  366. }
  367. else {
  368. $job->arguments = $args;
  369. }
  370. // make our start and end times more legible
  371. $job->submit_date = tripal_jobs_get_submit_date($job);
  372. $job->start_time = tripal_jobs_get_start_time($job);
  373. $job->end_time = tripal_jobs_get_end_time($job);
  374. // add the job to the variables that get exported to the template
  375. $variables['job'] = $job;
  376. }
  377. /**
  378. * Set a job to be re-ran (ie: add it back into the job queue)
  379. *
  380. * @param $job_id
  381. * The job_id of the job to be re-ran
  382. *
  383. * @ingroup tripal_jobs_api
  384. */
  385. function tripal_jobs_rerun($job_id) {
  386. global $user;
  387. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = %d";
  388. $job = db_fetch_object(db_query($sql, $job_id));
  389. $args = explode("::", $job->arguments);
  390. tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user->uid,
  391. $job->priority);
  392. drupal_goto("admin/tripal/tripal_jobs");
  393. }
  394. /**
  395. * Cancel a Tripal Job currently waiting in the job queue
  396. *
  397. * @param $job_id
  398. * The job_id of the job to be cancelled
  399. *
  400. * @ingroup tripal_jobs_api
  401. */
  402. function tripal_jobs_cancel($job_id) {
  403. $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = %d";
  404. $job = db_fetch_object(db_query($sql, $job_id));
  405. // set the end time for this job
  406. if ($job->start_time == 0) {
  407. $record = new stdClass();
  408. $record->job_id = $job->job_id;
  409. $record->end_time = time();
  410. $record->status = 'Cancelled';
  411. $record->progress = '0';
  412. drupal_write_record('tripal_jobs', $record, 'job_id');
  413. drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
  414. }
  415. else {
  416. drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
  417. }
  418. drupal_goto("admin/tripal/tripal_jobs");
  419. }