tripal.jobs.api.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. <?php
  2. /**
  3. * @file
  4. * Tripal offers a job management subsystem for managing tasks that may require an extended period of time for
  5. * completion.
  6. */
  7. /**
  8. * @defgroup tripal_jobs_api Tripal Jobs API
  9. * @ingroup tripal_api
  10. * @{
  11. * Tripal offers a job management subsystem for managing tasks that may require
  12. * an extended period of time for completion. Drupal uses a UNIX-based cron
  13. * job to handle tasks such as checking the availability of updates,
  14. * indexing new nodes for searching, etc. Drupal's cron uses the web interface
  15. * for launching these tasks, however, Tripal provides several administrative
  16. * tasks that may time out and not complete due to limitations of the web
  17. * server. To circumvent this, as well as provide more fine-grained control and
  18. * monitoring, Tripal uses a jobs management sub-system. It is anticipated
  19. * that this functionality will be used for managing analysis jobs provided by
  20. * future tools, with eventual support for distributed computing.
  21. *
  22. * The Tripal jobs management system allows administrators to submit tasks
  23. * to be performed which can then be launched through a UNIX command-line PHP
  24. * script or cron job. This command-line script can be added to a cron
  25. * entry along-side the Drupal cron entry for automatic, regular launching of
  26. * Tripal jobs. The order of execution of waiting jobs is determined first by
  27. * priority and second by the order the jobs were entered.
  28. *
  29. * The API functions described below provide a programmatic interface for
  30. * adding, checking and viewing jobs.
  31. * @}
  32. */
  33. /**
  34. * Adds a job to the Tripal Jbo queue
  35. *
  36. * @param $job_name
  37. * The human readable name for the job
  38. * @param $modulename
  39. * The name of the module adding the job
  40. * @param $callback
  41. * The name of a function to be called when the job is executed
  42. * @param $arguments
  43. * An array of arguments to be passed on to the callback
  44. * @param $uid
  45. * The uid of the user adding the job
  46. * @param $priority
  47. * The priority at which to run the job where the highest priority is 10 and the lowest priority
  48. * is 1. The default priority is 10.
  49. * @param $includes
  50. * An array of paths to files that should be included in order to execute
  51. * the job. Use the module_load_include function to get a path for a given
  52. * file.
  53. * @return
  54. * The job_id of the registered job, or FALSE on failure.
  55. *
  56. * Example usage:
  57. *
  58. * @code
  59. * $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
  60. * $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
  61. * $user->uid, $analysis_id, $match_type);
  62. *
  63. * $includes = array()
  64. * $includes[] = module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.fasta_loader');
  65. *
  66. * tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
  67. * 'tripal_feature_load_fasta', $args, $user->uid, 10, $includes);
  68. * @endcode
  69. *
  70. * The code above is copied from the tripal_feature/fasta_loader.php file. The
  71. * snipped first builds an array of arguments that will then be passed to the
  72. * tripal_add_job function. The number of arguments provided in the $arguments
  73. * variable should match the argument set for the callback function provided
  74. * as the third argument.
  75. *
  76. * @ingroup tripal_jobs_api
  77. */
  78. function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid,
  79. $priority = 10, $includes = array()) {
  80. $user = user_load($uid);
  81. try {
  82. $job = new TripalJob();
  83. $job->create(array(
  84. 'job_name' => $job_name,
  85. 'modulename' => $modulename,
  86. 'callback' => $callback,
  87. 'arguments' => $arguments,
  88. 'uid' => $uid,
  89. 'priority' => $priority,
  90. 'includes' => $includes,
  91. ));
  92. // If no exceptions were thrown then we know the creation worked. So
  93. // let the user know!
  94. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
  95. // If this is the Tripal admin user then give a bit more information
  96. // about how to run the job.
  97. if (user_access('administer tripal')) {
  98. $jobs_url = url("admin/tripal/tripal_jobs");
  99. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  100. array('!jobs_url' => $jobs_url)));
  101. drupal_set_message(t("You can execute the job queue manually on the command line " .
  102. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  103. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  104. }
  105. return $job->getJobID();
  106. }
  107. catch (Exception $e) {
  108. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  109. drupal_set_message($e->getMessage(), 'error');
  110. return FALSE;
  111. }
  112. }
  113. /**
  114. * Retrieve information regarding a tripal job
  115. *
  116. * @param $job_id
  117. * The unique identifier of the job
  118. *
  119. * @return
  120. * An object representing a record from the tripal_job table or FALSE on
  121. * failure.
  122. *
  123. * @ingroup tripal_jobs_api
  124. */
  125. function tripal_get_job($job_id) {
  126. try {
  127. $job = new TripalJob();
  128. $job->load($job_id);
  129. return $job->getJob();
  130. }
  131. catch (Exception $e) {
  132. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  133. drupal_set_message($e->getMessage(), 'error');
  134. return FALSE;
  135. }
  136. }
  137. /**
  138. * Indicates if any jobs are running.
  139. *
  140. * This function will check the system to see if a job has a process ID
  141. * and if that process ID is still running. It will update the job status
  142. * accordingly before returning.
  143. *
  144. * @return
  145. * Returns TRUE if any job is running or FALSE otherwise.
  146. *
  147. * @ingroup tripal_jobs_api
  148. */
  149. function tripal_is_job_running() {
  150. // iterate through each job that has not ended
  151. // and see if it is still running. If it is not
  152. // running but does not have an end_time then
  153. // set the end time and set the status to 'Error'
  154. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  155. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  156. $jobs = db_query($sql);
  157. foreach ($jobs as $job) {
  158. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  159. if ($job->pid && $status) {
  160. // the job is still running so let it go
  161. // we return 1 to indicate that a job is running
  162. return TRUE;
  163. }
  164. else {
  165. // the job is not running so terminate it
  166. $record = new stdClass();
  167. $record->job_id = $job->job_id;
  168. $record->end_time = time();
  169. $record->status = 'Error';
  170. $record->error_msg = 'Job has terminated unexpectedly.';
  171. drupal_write_record('tripal_jobs', $record, 'job_id');
  172. }
  173. }
  174. // return 1 to indicate that no jobs are currently running.
  175. return FALSE;
  176. }
  177. /**
  178. * Check for too many concurrent jobs.
  179. *
  180. * @param $max_jobs
  181. * The maximum number of concurrent jobs to allow; -1 = no limit
  182. *
  183. * @ingroup tripal_jobs_api
  184. */
  185. function tripal_max_jobs_exceeded($max_jobs) {
  186. if ($max_jobs < 0) {
  187. // No limit on concurrent jobs
  188. return FALSE;
  189. }
  190. $num_jobs_running = 0;
  191. // Iterate through each job that has not ended and see if it is still running.
  192. // If it is not running but does not have an end_time then set the end time
  193. // and set the status to 'Error'
  194. $sql = "SELECT * FROM {tripal_jobs} TJ " .
  195. "WHERE TJ.end_time IS NULL and NOT TJ.start_time IS NULL ";
  196. $jobs = db_query($sql);
  197. foreach ($jobs as $job) {
  198. $status = shell_exec('ps -p ' . escapeshellarg($job->pid) . ' -o pid=');
  199. if ($job->pid && $status) {
  200. // the job is still running
  201. $num_jobs_running++;
  202. }
  203. else {
  204. // the job is not running so terminate it
  205. $record = new stdClass();
  206. $record->job_id = $job->job_id;
  207. $record->end_time = time();
  208. $record->status = 'Error';
  209. $record->error_msg = 'Job has terminated unexpectedly.';
  210. drupal_write_record('tripal_jobs', $record, 'job_id');
  211. }
  212. }
  213. return ($num_jobs_running >= $max_jobs);
  214. }
  215. /**
  216. * Set a job to be re-ran (ie: add it back into the job queue)
  217. *
  218. * @param $job_id
  219. * The job_id of the job to be re-ran
  220. * @param $goto_jobs_page
  221. * If set to TRUE then after the re run job is added Drupal will redirect to the jobs page
  222. *
  223. * @ingroup tripal_jobs_api
  224. */
  225. function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
  226. global $user;
  227. $user_id = $user->uid;
  228. $job = new TripalJob();
  229. $job->load($job_id);
  230. $arguments = $job->getArguments();
  231. $includes = $job->getIncludes();
  232. $newJob = new Tripaljob();
  233. try {
  234. $job->create(array(
  235. 'job_name' => $job->getJobName(),
  236. 'modulename' => $job->getModuleName(),
  237. 'callback' => $job->getCallback(),
  238. 'arguments' => $arguments,
  239. 'uid' => $user_id,
  240. 'priority' => $job->getPriority(),
  241. 'includes' => $includes
  242. ));
  243. // If no exceptions were thrown then we know the creation worked. So
  244. // let the user know!
  245. drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job->getJobName())));
  246. // If this is the Tripal admin user then give a bit more information
  247. // about how to run the job.
  248. if (user_access('administer tripal')) {
  249. $jobs_url = url("admin/tripal/tripal_jobs");
  250. drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
  251. array('!jobs_url' => $jobs_url)));
  252. drupal_set_message(t("You can execute the job queue manually on the command line " .
  253. "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
  254. array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
  255. }
  256. if ($goto_jobs_page) {
  257. drupal_goto("admin/tripal/tripal_jobs");
  258. }
  259. }
  260. catch (Exception $e){
  261. drupal_set_message($e->getMessage(), 'error');
  262. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  263. }
  264. }
  265. /**
  266. * Cancel a Tripal Job currently waiting in the job queue
  267. *
  268. * @param $job_id
  269. * The job_id of the job to be cancelled
  270. *
  271. * @return
  272. * FALSE if the an error occured or the job could not be canceled, TRUE
  273. * otherwise.
  274. *
  275. * @ingroup tripal_jobs_api
  276. */
  277. function tripal_cancel_job($job_id, $redirect = TRUE) {
  278. if (!$job_id or !is_numeric($job_id)) {
  279. watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
  280. return FALSE;
  281. }
  282. try {
  283. $job = new Tripaljob();
  284. $job->load($job_id);
  285. $job->cancel();
  286. drupal_set_message('Job is now cancelled.');
  287. drupal_goto("admin/tripal/tripal_jobs");
  288. return TRUE;
  289. }
  290. catch (Exception $e) {
  291. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  292. drupal_set_message($e->getMessage(), 'error');
  293. drupal_goto("admin/tripal/tripal_jobs");
  294. return FALSE;
  295. }
  296. }
  297. /**
  298. * A function used to manually launch all queued tripal jobs
  299. *
  300. * @param $do_parallel
  301. * A boolean indicating whether jobs should be attempted to run in parallel
  302. *
  303. * @param $job_id
  304. * To launch a specific job provide the job id. This option should be
  305. * used sparingly as the jobs queue managment system should launch jobs
  306. * based on order and priority. However there are times when a specific
  307. * job needs to be launched and this argument will allow it. Only jobs
  308. * which have not been run previously will run.
  309. * @param $max_jobs
  310. * The maximum number of jobs that should be run concurrently. If -1 then unlimited.
  311. * @param $single
  312. * Ensures only a single job is run rather then the entire queue.
  313. *
  314. * @ingroup tripal_jobs_api
  315. */
  316. function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $single = 0) {
  317. // First check if any jobs are currently running if they are, don't continue,
  318. // we don't want to have more than one job script running at a time.
  319. if (!$do_parallel and tripal_is_job_running()) {
  320. print date('Y-m-d H:i:s') . ": Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.";
  321. return;
  322. }
  323. if ($do_parallel && tripal_max_jobs_exceeded($max_jobs)) {
  324. print date('Y-m-d H:i:s') . ": More than $max_jobs jobs are still running. At least one of these jobs much complete before a new job can start.";
  325. return;
  326. }
  327. // Get all jobs that have not started and order them such that they are
  328. // processed in a FIFO manner.
  329. if ($job_id) {
  330. $sql = "
  331. SELECT TJ.job_id
  332. FROM {tripal_jobs} TJ
  333. WHERE
  334. TJ.start_time IS NULL AND
  335. TJ.end_time IS NULL AND
  336. TJ.job_id = :job_id
  337. ORDER BY priority ASC, job_id ASC
  338. ";
  339. $jobs = db_query($sql, array(':job_id' => $job_id));
  340. }
  341. else {
  342. $sql = "
  343. SELECT TJ.job_id
  344. FROM {tripal_jobs} TJ
  345. WHERE
  346. TJ.start_time IS NULL AND
  347. TJ.end_time IS NULL AND
  348. NOT TJ.status = 'Cancelled'
  349. ORDER BY priority ASC,job_id ASC
  350. ";
  351. $jobs = db_query($sql);
  352. }
  353. if ($jobs) {
  354. print date('Y-m-d H:i:s') . ": There are " . $jobs->rowCount() . " jobs queued.\n";
  355. }
  356. foreach ($jobs as $jid) {
  357. $job_id = $jid->job_id;
  358. // Create the Tripoaljob object.
  359. $job = new TripalJob();
  360. $job->load($job_id);
  361. // We need to do some additional processing for printing since the switch
  362. // to serialized arrays now allows nested arrays which cause errors when
  363. // printed using implode alone.
  364. $args = $job->getArguments();
  365. $string_args = array();
  366. foreach ($args as $k => $a) {
  367. if (is_array($a)) {
  368. $string_args[$k] = 'Array';
  369. }
  370. elseif (is_object($a)) {
  371. $string_args[$k] = 'Object';
  372. }
  373. else {
  374. $string_args[$k] = $a;
  375. }
  376. }
  377. // Run the job
  378. $callback = $job->getCallback();
  379. print date('Y-m-d H:i:s') .": Calling: $callback(" . implode(", ", $string_args) . ")\n";
  380. try {
  381. $job->run();
  382. }
  383. catch (Exception $e) {
  384. $job->logMessage($e->getMessage(), array(), TRIPAL_ERROR);
  385. drupal_set_message($e->getMessage(), 'error');
  386. }
  387. if ($single) {
  388. // Don't start any more jobs
  389. break;
  390. }
  391. if (tripal_max_jobs_exceeded($max_jobs)) {
  392. break;
  393. }
  394. // TODO: Send an email to the user advising that the job has finished
  395. }
  396. }
  397. /**
  398. * An internal function for setting the progress for a current job
  399. *
  400. * @param $job_id
  401. * The job_id to set the progress for
  402. * @param $percentage
  403. * The progress to set the job to
  404. *
  405. * @return
  406. * True on success and False otherwise
  407. *
  408. * @ingroup tripal
  409. */
  410. function tripal_set_job_progress($job_id, $percentage) {
  411. try {
  412. $job = new TripalJob();
  413. $job->load($job_id);
  414. $job->setProgress($percentage);
  415. }
  416. catch (Exception $e) {
  417. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  418. drupal_set_message($e->getMessage(), 'error');
  419. return FALSE;
  420. }
  421. return TRUE;
  422. }
  423. /**
  424. * Retrieves the current proress of a job.
  425. *
  426. * @param $job_id
  427. * The job_id to get the progress for
  428. *
  429. * @return
  430. * A value between 0 and 100 indicating the percentage complete of the job.
  431. * FALSE on failure.
  432. */
  433. function tripal_get_job_progress($job_id) {
  434. try {
  435. $job = new TripalJob();
  436. $job->load($job_id);
  437. $progress = $job->getProgress();
  438. return $progress;
  439. }
  440. catch (Exception $e) {
  441. tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
  442. drupal_set_message($e->getMessage(), 'error');
  443. return FALSE;
  444. }
  445. }
  446. /**
  447. * Returns a list of jobs that are active.
  448. *
  449. * @param $modulename
  450. * Limit the list returned to those that were added by a specific module. If
  451. * no module name is provided then all active jobs are returned.
  452. *
  453. * @return
  454. * An array of objects where each object describes a tripal job. If no
  455. * jobs were found then an empty array is returned. Each object will have
  456. * the following members:
  457. * - job_id: The unique ID number for the job.
  458. * - uid: The ID of the user that submitted the job.
  459. * - job_name: The human-readable name of the job.
  460. * - modulename: The name of the module that submitted the job.
  461. * - callback: The callback function to be called when the job is run.
  462. * - arguments: An array of arguments to be passed to the callback function.
  463. * - progress: The percent progress of completion if the job is running.
  464. * - status: The status of the job: Waiting, Completed, Running or Cancelled.
  465. * - submit_date: The UNIX timestamp when the job was submitted.
  466. * - start_time: The UNIX timestamp for when the job started running.
  467. * - end_time: The UNIX timestampe when the job completed running.
  468. * - error_msg: Any error message that occured during execution of the job.
  469. * - prirotiy: The execution priority of the job (value between 1 and 10)
  470. *
  471. * @ingroup tripal_jobs_api
  472. */
  473. function tripal_get_active_jobs($modulename = NULL) {
  474. $query = db_select('tripal_jobs', 'TJ')
  475. ->fields('TJ', array('job_id', 'uid', 'job_name', 'modulename', 'callback',
  476. 'arguments', 'progress', 'status', 'submit_date', 'start_time',
  477. 'end_time', 'error_msg', 'priority'));
  478. if ($modulename) {
  479. $query->where(
  480. "TJ.modulename = :modulename and NOT (TJ.status = 'Completed' or TJ.status = 'Cancelled')",
  481. array(':modulename' => $modulename)
  482. );
  483. }
  484. $results = $query->execute();
  485. $jobs = array();
  486. while($job = $results->fetchobject()) {
  487. $jobs->arguments = unserialize($job->arguments);
  488. $jobs[] = $job;
  489. }
  490. return $jobs;
  491. }
  492. /**
  493. * Execute a specific Tripal Job.
  494. *
  495. * @param $job_id
  496. * The job id to be exeuted.
  497. * @param bool $redirect [optional]
  498. * Whether to redirect to the job page or not.
  499. */
  500. function tripal_execute_job($job_id, $redirect = TRUE) {
  501. $job = new TripalJob();
  502. $job->load($job_id);
  503. // Run the job.
  504. if ($job->getStartTime() == 0 and $job->getEndTime() == 0) {
  505. tripal_launch_job(1, $job_id);
  506. drupal_set_message(t("Job %job_id has finished executing. See below for more information.", array('%job_id' => $job_id)));
  507. }
  508. else {
  509. drupal_set_message(t("Job %job_id cannot be executed. It has already finished.", array('%job_id' => $job_id)));
  510. }
  511. if ($redirect) {
  512. drupal_goto("admin/tripal/tripal_jobs/view/$job_id");
  513. }
  514. }