TripalImporter.inc 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. <?php
  2. class TripalImporter {
  3. // --------------------------------------------------------------------------
  4. // EDITABLE STATIC CONSTANTS
  5. //
  6. // The following constants SHOULD be set for each descendent class. They are
  7. // used by the static functions to provide information to Drupal about
  8. // the field and it's default widget and formatter.
  9. // --------------------------------------------------------------------------
  10. /**
  11. * The name of this loader. This name will be presented to the site
  12. * user.
  13. */
  14. public static $name = 'Tripal Loader';
  15. /**
  16. * The machine name for this loader. This name will be used to construct
  17. * the URL for the loader.
  18. */
  19. public static $machine_name = 'tripal_loader';
  20. /**
  21. * A brief description for this loader. This description will be
  22. * presented to the site user.
  23. */
  24. public static $description = 'A base loader for all Tripal loaders';
  25. /**
  26. * An array containing the extensions of allowed file types.
  27. */
  28. public static $file_types = array();
  29. /**
  30. * Provides information to the user about the file upload. Typically this
  31. * may include a description of the file types allowed.
  32. */
  33. public static $upload_description = '';
  34. /**
  35. * The title that should appear above the upload button.
  36. */
  37. public static $upload_title = 'File Upload';
  38. /**
  39. * If the loader should require an analysis record. To maintain provenance
  40. * we should always indiate where the data we are uploading comes from.
  41. * The method that Tripal attempts to use for this by associating upload files
  42. * with an analysis record. The analysis record provides the details for
  43. * how the file was created or obtained. Set this to FALSE if the loader
  44. * should not require an analysis when loading. if $use_analysis is set to
  45. * true then the form values will have an 'analysis_id' key in the $form_state
  46. * array on submitted forms.
  47. */
  48. public static $use_analysis = TRUE;
  49. /**
  50. * If the $use_analysis value is set above then this value indicates if the
  51. * analysis should be required.
  52. */
  53. public static $require_analysis = TRUE;
  54. /**
  55. * Text that should appear on the button at the bottom of the importer
  56. * form.
  57. */
  58. public static $button_text = 'Import File';
  59. /**
  60. * Indicates the methods that the file uploader will support.
  61. */
  62. public static $methods = array(
  63. // Allow the user to upload a file to the server.
  64. 'file_upload' => TRUE,
  65. // Allow the user to provide the path on the Tripal server for the file.
  66. 'file_local' => TRUE,
  67. // Allow the user to provide a remote URL for the file.
  68. 'file_remote' => TRUE,
  69. );
  70. /**
  71. * Indicates if the file must be provided. An example when it may not be
  72. * necessary to require that the user provide a file for uploading if the
  73. * loader keeps track of previous files and makes those available for
  74. * selection.
  75. */
  76. public static $file_required = TRUE;
  77. /**
  78. * The array of arguments used for this loader. Each argument should
  79. * be a separate array containing a machine_name, name, and description
  80. * keys. This information is used to build the help text for the loader.
  81. */
  82. public static $argument_list = array();
  83. /**
  84. * Indicates how many files are allowed to be uploaded. By default this is
  85. * set to allow only one file. Change to any positive number. A value of
  86. * zero indicates an unlimited number of uploaded files are allowed.
  87. */
  88. public static $cardinality = 1;
  89. /**
  90. * Be default, all loaders are automaticlly added to the Admin >
  91. * Tripal > Data Laders menu. However, if this loader should be
  92. * made available via a different menu path, then set it here. If the
  93. * value is empty then the path will be the default.
  94. */
  95. public static $menu_path = '';
  96. /**
  97. * If your importer requires more flexibility and advance features than
  98. * the TripalImporter provies you can indicate a callback function. If set,
  99. * the callback will be used to provide the importer interface to the
  100. * end-user. However, because this bypasses the class infrastructure the
  101. * run() function will also not be available and your importer must be
  102. * fully self-sufficient outside of this calss. The benefit for using a
  103. * TripalImporter desipte your loader being self-sufficient is that Tripal
  104. * will treat your loader like all others providing a consistent location
  105. * in the menu and set of permissions.
  106. */
  107. public static $callback = '';
  108. /**
  109. * The name of the module that provides the callback function.
  110. */
  111. public static $callback_module = '';
  112. /**
  113. * An include path for the callback function. Use a relative path within
  114. * this scope of this module
  115. * (e.g. includes/loaders/tripal_chado_pub_importers).
  116. */
  117. public static $callback_path = '';
  118. // --------------------------------------------------------------------------
  119. // PRIVATE MEMBERS -- DO NOT EDIT or OVERRIDE
  120. // --------------------------------------------------------------------------
  121. /**
  122. * The number of items that this importer needs to process. A progress
  123. * can be calculated by dividing the number of items process by this
  124. * number.
  125. */
  126. private $total_items;
  127. /**
  128. * The number of items that have been handled so far. This must never
  129. * be below 0 and never exceed $total_items;
  130. */
  131. private $num_handled;
  132. /**
  133. * The interval when the job progress should be updated. Updating the job
  134. * progress incurrs a database write which takes time and if it occurs to
  135. * frequently can slow down the loader. This should be a value between
  136. * 0 and 100 to indicate a percent interval (e.g. 1 means update the
  137. * progress every time the num_handled increases by 1%).
  138. */
  139. private $interval;
  140. /**
  141. * Each time the job progress is updated this variable gets set. It is
  142. * used to calculate if the $interval has passed for the next update.
  143. */
  144. private $prev_update;
  145. /**
  146. * The job that this importer is associated with. This is needed for
  147. * updating the status of the job.
  148. */
  149. protected $job;
  150. /**
  151. * The arguments needed for the importer. This is a list of key/value
  152. * pairs in an associative array.
  153. */
  154. protected $arguments;
  155. /**
  156. * The ID for this import record.
  157. */
  158. protected $import_id;
  159. /**
  160. * Prior to running an importer it must be prepared to make sure the file
  161. * is available. Preparing the importer will download all the necessary
  162. * files. This value is set to TRUE after the importer is prepared for
  163. * funning.
  164. */
  165. protected $is_prepared;
  166. // --------------------------------------------------------------------------
  167. // CONSTRUCTORS
  168. // --------------------------------------------------------------------------
  169. /**
  170. * Instantiates a new TripalImporter object.
  171. *
  172. * @param TripalJob $job
  173. * An optional TripalJob object that this loader is associated with.
  174. */
  175. public function __construct(TripalJob $job = NULL) {
  176. // Intialize the private member variables.
  177. $this->is_prepared = FALSE;
  178. $this->import_id = NULL;
  179. $this->arguments = array();
  180. $this->job = NULL;
  181. $this->total_items = 0;
  182. $this->interval = 1;
  183. $this->num_handled = 0;
  184. $this->prev_update = 0;
  185. }
  186. /**
  187. * Instantiates a new TripalImporter object using the import record ID.
  188. *
  189. * This function will automatically instantiate the correct TripalImporter
  190. * child class that is appropriate for the provided ID.
  191. *
  192. * @param $import_id
  193. * The ID of the import recrod.
  194. * @return
  195. * An TripalImporter object of the appropriate child class.
  196. */
  197. static public function byID($import_id) {
  198. // Get the importer.
  199. $import = db_select('tripal_import', 'ti')
  200. ->fields('ti')
  201. ->condition('ti.import_id', $import_id)
  202. ->execute()
  203. ->fetchObject();
  204. if (!$import) {
  205. throw new Exception('Cannot find an importer that matches the given import ID.');
  206. }
  207. $class = $import->class;
  208. tripal_load_include_importer_class($class);
  209. if (class_exists($class)) {
  210. $loader = new $class();
  211. $loader->load($import_id);
  212. return $loader;
  213. }
  214. else {
  215. throw new Exception('Cannot find the matching class for this import record.');
  216. }
  217. }
  218. /**
  219. * Associate this importer with the Tripal job that is running it.
  220. *
  221. * Associating an import with a job will allow the importer to log messages
  222. * to the job log.
  223. *
  224. * @param TripalJob $job
  225. * An instnace of a TripalJob.
  226. */
  227. public function setJob(TripalJob $job) {
  228. $this->job = $job;
  229. }
  230. /**
  231. * Creates a new importer record.
  232. *
  233. * @param $run_args
  234. * An associative array of the arguments needed to run the importer. Each
  235. * importer will have its own defined set of arguments.
  236. *
  237. * @param $file_details
  238. * An associative array with one of the following keys:
  239. * -fid: provides the Drupal managed File ID for the file.
  240. * -file_local: provides the full path to the file on the server.
  241. * -file_remote: provides the remote URL for the file.
  242. * This argument is optional if the loader does not use the built-in
  243. * file loader.
  244. *
  245. * @throws Exception
  246. */
  247. public function create($run_args, $file_details = array()) {
  248. global $user;
  249. $class = get_called_class();
  250. try {
  251. // Build the values for the tripal_importer table insert.
  252. $values = array(
  253. 'uid' => $user->uid,
  254. 'class' => $class,
  255. 'submit_date' => time(),
  256. );
  257. // Build the arguments array, which consists of the run arguments
  258. // and the file.
  259. $arguments = array(
  260. 'run_args' => $run_args,
  261. 'files' => array(),
  262. );
  263. // Get the file argument.
  264. $has_file = 0;
  265. if (array_key_exists('file_local', $file_details)) {
  266. $arguments['files'][] = array(
  267. 'file_local' => $file_details['file_local'],
  268. 'file_path' => $file_details['file_local']
  269. );
  270. $has_file++;
  271. }
  272. if (array_key_exists('file_remote', $file_details)) {
  273. $arguments['files'][] = array(
  274. 'file_remote' => $file_details['file_remote']
  275. );
  276. $has_file++;
  277. }
  278. if (array_key_exists('fid', $file_details)) {
  279. $values['fid'] = $file_details['fid'];
  280. // Handle multiple file uploads.
  281. if (preg_match('/\|/', $file_details['fid'])) {
  282. $fids = explode('|', $file_details['fid']);
  283. foreach ($fids as $fid) {
  284. $file = file_load($fid);
  285. $arguments['files'][] = array(
  286. 'file_path' => base_path() . drupal_realpath($file->uri),
  287. 'fid' => $fid
  288. );
  289. $has_file++;
  290. }
  291. }
  292. // Handle a single file.
  293. else {
  294. $fid = $file_details['fid'];
  295. $file = file_load($fid);
  296. $arguments['files'][] = array(
  297. 'file_path' => base_path() . drupal_realpath($file->uri),
  298. 'fid' => $fid
  299. );
  300. $has_file++;
  301. // For backwards compatibility add the old 'file' element.
  302. $arguments['file'] = array(
  303. 'file_path' => base_path() . drupal_realpath($file->uri),
  304. 'fid' => $fid
  305. );
  306. }
  307. }
  308. // Validate the $file_details argument.
  309. if ($has_file == 0 and $class::$file_required == TRUE) {
  310. throw new Exception("Must provide a proper file identifier for the \$file_details argument.");
  311. }
  312. // Store the arguments in the class and serialize for table insertion.
  313. $this->arguments = $arguments;
  314. $values['arguments'] = serialize($arguments);
  315. // Insert the importer record.
  316. $import_id = db_insert('tripal_import')
  317. ->fields($values)
  318. ->execute();
  319. $this->import_id = $import_id;
  320. }
  321. catch (Exception $e){
  322. throw new Exception('Cannot create importer: ' . $e->getMessage());
  323. }
  324. }
  325. /**
  326. * Loads an existing import record into this object.
  327. *
  328. * @param $import_id
  329. * The ID of the import record.
  330. */
  331. public function load($import_id) {
  332. $class = get_called_class();
  333. // Get the importer.
  334. $import = db_select('tripal_import', 'ti')
  335. ->fields('ti')
  336. ->condition('ti.import_id', $import_id)
  337. ->execute()
  338. ->fetchObject();
  339. if (!$import) {
  340. throw new Exception('Cannot find an importer that matches the given import ID.');
  341. }
  342. if ($import->class != $class) {
  343. throw new Exception('The importer specified by the given ID does not match this importer class.');
  344. }
  345. $this->arguments = unserialize($import->arguments);
  346. $this->import_id = $import_id;
  347. }
  348. /**
  349. * Submits the importer for execution as a job.
  350. *
  351. * @return
  352. * The ID of the newly submitted job.
  353. */
  354. public function submitJob() {
  355. global $user;
  356. $class = get_called_class();
  357. if (!$this->import_id) {
  358. throw new Exception('Cannot submit an importer job without an import record. Please run create() first.');
  359. }
  360. // Add a job to run the importer.
  361. try {
  362. $args = array($this->import_id);
  363. $includes = array(
  364. module_load_include('inc', 'tripal', 'api/tripal.importer.api'),
  365. );
  366. $job_id = tripal_add_job($class::$button_text, 'tripal',
  367. 'tripal_run_importer', $args, $user->uid, 10, $includes);
  368. return $job_id;
  369. }
  370. catch (Exception $e){
  371. throw new Exception('Cannot create importer job: ' . $e->getMessage());
  372. }
  373. }
  374. /**
  375. * Prepares the importer files for execution.
  376. *
  377. * This function must be run prior to the run() function to ensure that
  378. * the import file is ready to go.
  379. */
  380. public function prepareFiles() {
  381. $class = get_called_class();
  382. // If no file is required then just indicate that all is good to go.
  383. if ($class::$file_required == FALSE) {
  384. $this->is_prepared = TRUE;
  385. return;
  386. }
  387. try {
  388. for($i = 0; $i < count($this->arguments['files']); $i++) {
  389. if (!empty($this->arguments['files'][$i]['file_remote'])) {
  390. $file_remote = $this->arguments['files'][$i]['file_remote'];
  391. $this->logMessage('Download file: !file_remote...', array('!file_remote' => $file_remote));
  392. // If this file is compressed then keepthe .gz extension so we can
  393. // uncompress it.
  394. $ext = '';
  395. if (preg_match('/^(.*?)\.gz$/', $file_remote)) {
  396. $ext = '.gz';
  397. }
  398. // Create a temporary file.
  399. $temp = tempnam("temporary://", 'import_') . $ext;
  400. $this->logMessage("Saving as: !file", array('!file' => $temp));
  401. $url_fh = fopen($file_remote, "r");
  402. $tmp_fh = fopen($temp, "w");
  403. if (!$url_fh) {
  404. throw new Exception(t("Unable to download the remote file at %url. Could a firewall be blocking outgoing connections?",
  405. array('%url', $file_remote)));
  406. }
  407. // Write the contents of the remote file to the temp file.
  408. while (!feof($url_fh)) {
  409. fwrite($tmp_fh, fread($url_fh, 255), 255);
  410. }
  411. // Set the path to the file for the importer to use.
  412. $this->arguments['files'][$i]['file_path'] = $temp;
  413. $this->is_prepared = TRUE;
  414. }
  415. // Is this file compressed? If so, then uncompress it
  416. $matches = array();
  417. if (preg_match('/^(.*?)\.gz$/', $this->arguments['files'][$i]['file_path'], $matches)) {
  418. $this->logMessage("Uncompressing: !file", array('!file' => $this->arguments['files'][$i]['file_path']));
  419. $buffer_size = 4096;
  420. $new_file_path = $matches[1];
  421. $gzfile = gzopen($this->arguments['files'][$i]['file_path'], 'rb');
  422. $out_file = fopen($new_file_path, 'wb');
  423. if (!$out_file) {
  424. throw new Exception("Cannot uncompress file: new temporary file, '$new_file_path', cannot be created.");
  425. }
  426. // Keep repeating until the end of the input file
  427. while (!gzeof($gzfile)) {
  428. // Read buffer-size bytes
  429. // Both fwrite and gzread and binary-safe
  430. fwrite($out_file, gzread($gzfile, $buffer_size));
  431. }
  432. // Files are done, close files
  433. fclose($out_file);
  434. gzclose($gzfile);
  435. // Now remove the .gz file and reset the file_path to the new
  436. // uncompressed version.
  437. unlink($this->arguments['files'][$i]['file_path']);
  438. $this->arguments['files'][$i]['file_path'] = $new_file_path;
  439. }
  440. }
  441. }
  442. catch (Exception $e){
  443. throw new Exception('Cannot prepare the importer: ' . $e->getMessage());
  444. }
  445. }
  446. /**
  447. * Cleans up any temporary files that were created by the prepareFile().
  448. *
  449. * This function should be called after a run() to remove any temporary
  450. * files and keep them from building up on the server.
  451. */
  452. public function cleanFile() {
  453. try {
  454. // If a remote file was downloaded then remove it.
  455. for($i = 0; $i < count($this->arguments['files']); $i++) {
  456. if (!empty($this->arguments['files'][$i]['file_remote']) and
  457. file_exists($this->arguments['files'][$i]['file_path'])) {
  458. $this->logMessage('Removing downloaded file...');
  459. unlink($this->arguments['files'][$i]['file_path']);
  460. $this->is_prepared = FALSE;
  461. }
  462. }
  463. }
  464. catch (Exception $e){
  465. throw new Exception('Cannot prepare the importer: ' . $e->getMessage());
  466. }
  467. }
  468. /**
  469. * Logs a message for the importer.
  470. *
  471. * There is no distinction between status messages and error logs. Any
  472. * message that is intended for the user to review the status of the loading
  473. * can be provided here. If this importer is associated with a job then
  474. * the logging is passed on to the job for storage.
  475. *
  476. * Messages that are are of severity TRIPAL_CRITICAL or TRIPAL_ERROR
  477. * are also logged to the watchdog.
  478. *
  479. * @param $message
  480. * The message to store in the log. Keep $message translatable by not
  481. * concatenating dynamic values into it! Variables in the message should
  482. * be added by using placeholder strings alongside the variables argument
  483. * to declare the value of the placeholders. See t() for documentation on
  484. * how $message and $variables interact.
  485. * @param $variables
  486. * Array of variables to replace in the message on display or NULL if
  487. * message is already translated or not possible to translate.
  488. * @param $severity
  489. * The severity of the message; one of the following values:
  490. * - TRIPAL_CRITICAL: Critical conditions.
  491. * - TRIPAL_ERROR: Error conditions.
  492. * - TRIPAL_WARNING: Warning conditions.
  493. * - TRIPAL_NOTICE: Normal but significant conditions.
  494. * - TRIPAL_INFO: (default) Informational messages.
  495. * - TRIPAL_DEBUG: Debug-level messages.
  496. */
  497. public function logMessage($message, $variables = array(), $severity = TRIPAL_INFO) {
  498. // Generate a translated message.
  499. $tmessage = t($message, $variables);
  500. // If we have a job then pass along the messaging to the job.
  501. if ($this->job) {
  502. $this->job->logMessage($message, $variables, $severity);
  503. }
  504. // If we don't have a job then just use the drpual_set_message.
  505. else {
  506. // Report this message to watchdog or set a message.
  507. if ($severity == TRIPAL_CRITICAL or $severity == TRIPAL_ERROR) {
  508. drupal_set_message($tmessage, 'error');
  509. }
  510. if ($severity == TRIPAL_WARNING) {
  511. drupal_set_message($tmessage, 'warning');
  512. }
  513. }
  514. }
  515. /**
  516. * Sets the total number if items to be processed.
  517. *
  518. * This should typically be called near the beginning of the loading process
  519. * to indicate the number of items that must be processed.
  520. *
  521. * @param $total_items
  522. * The total number of items to process.
  523. */
  524. protected function setTotalItems($total_items) {
  525. $this->total_items = $total_items;
  526. }
  527. /**
  528. * Adds to the count of the total number of items that have been handled.
  529. *
  530. * @param $num_handled
  531. */
  532. protected function addItemsHandled($num_handled) {
  533. $items_handled = $this->num_handled = $this->num_handled + $num_handled;
  534. $this->setItemsHandled($items_handled);
  535. }
  536. /**
  537. * Sets the number of items that have been processed.
  538. *
  539. * This should be called anytime the loader wants to indicate how many
  540. * items have been processed. The amount of progress will be
  541. * calculated using this number. If the amount of items handled exceeds
  542. * the interval specified then the progress is reported to the user. If
  543. * this loader is associated with a job then the job progress is also updated.
  544. *
  545. * @param $total_handled
  546. * The total number of items that have been processed.
  547. */
  548. protected function setItemsHandled($total_handled) {
  549. // First set the number of items handled.
  550. $this->num_handled = $total_handled;
  551. if ($total_handled == 0) {
  552. $memory = number_format(memory_get_usage());
  553. print "Percent complete: 0%. Memory: " . $memory . " bytes.\r";
  554. return;
  555. }
  556. // Now see if we need to report to the user the percent done. A message
  557. // will be printed on the command-line if the job is run there.
  558. $percent = sprintf("%.2f", ($this->num_handled / $this->total_items) * 100);
  559. $diff = $percent - $this->prev_update;
  560. if ($diff >= $this->interval) {
  561. $memory = number_format(memory_get_usage());
  562. print "Percent complete: " . $percent . "%. Memory: " . $memory . " bytes.\r";
  563. // If we have a job the update the job progress too.
  564. if ($this->job) {
  565. $this->job->setProgress($percent);
  566. }
  567. $this->prev_update = $diff;
  568. }
  569. }
  570. /**
  571. * Updates the percent interval when the job progress is updated.
  572. *
  573. * Updating the job
  574. * progress incurrs a database write which takes time and if it occurs to
  575. * frequently can slow down the loader. This should be a value between
  576. * 0 and 100 to indicate a percent interval (e.g. 1 means update the
  577. * progress every time the num_handled increases by 1%).
  578. *
  579. * @param $interval
  580. * A number between 0 and 100.
  581. */
  582. protected function setInterval($interval) {
  583. $this->interval = $interval;
  584. }
  585. // --------------------------------------------------------------------------
  586. // OVERRIDEABLE FUNCTIONS
  587. // --------------------------------------------------------------------------
  588. /**
  589. * Provides form elements to be added to the loader form.
  590. *
  591. * These form elements are added after the file uploader section that
  592. * is automaticaly provided by the TripalImporter.
  593. *
  594. * @return
  595. * A $form array.
  596. */
  597. public function form($form, &$form_state) {
  598. return $form;
  599. }
  600. /**
  601. * Handles submission of the form elements.
  602. *
  603. * The form elements provided in the implementation of the form() function
  604. * can be used for special submit if needed.
  605. */
  606. public function formSubmit($form, &$form_state) {
  607. }
  608. /**
  609. * Handles validation of the form elements.
  610. *
  611. * The form elements provided in the implementation of the form() function
  612. * should be validated using this function.
  613. */
  614. public function formValidate($form, &$form_state) {
  615. }
  616. /**
  617. * Performs the import.
  618. */
  619. public function run() {
  620. }
  621. /**
  622. * Performs the import.
  623. */
  624. public function postRun() {
  625. }
  626. }