TripalFieldDownloader.inc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. <?php
  2. abstract class TripalFieldDownloader {
  3. /**
  4. * Sets the label shown to the user describing this formatter. It
  5. * should be a short identifier. Use the $full_label for a more
  6. * descriptive label.
  7. */
  8. static public $label = 'Generic';
  9. /**
  10. * A more verbose label that better describes the formatter.
  11. */
  12. static public $full_label = 'Generic File format';
  13. /**
  14. * Indicates the default extension for the outputfile.
  15. */
  16. static public $default_extension = 'txt';
  17. /**
  18. * The data collection assigned to this downloader.
  19. */
  20. protected $collection = NULL;
  21. /**
  22. * The collection ID
  23. */
  24. protected $collection_id = NULL;
  25. /**
  26. * An array of collection_bundle records for the content types that
  27. * belong to this data collection.
  28. */
  29. protected $collection_bundles = NULL;
  30. /**
  31. * The output file URI.
  32. */
  33. protected $outfile = '';
  34. /**
  35. * An array of printable fields. Because fields can come from multiple
  36. * bundles and those bundles can be from multiple sites, it is possible that
  37. * 1) two bundles use the same field and we want to conslidate to a
  38. * single printable field; and 2) that a remote site may use the same term
  39. * for a field as a bundle on the local site. The only way to sort out this
  40. * mess is to use the term accession numbers. Therefore, the array contains
  41. * a unique list of printable fields using their accession numbers as keys
  42. * and a field label as the value.
  43. *
  44. */
  45. protected $printable_fields = array();
  46. /**
  47. * The remote site json data returned for the entity
  48. */
  49. protected $remote_entity = '';
  50. /**
  51. * An array that associates a field ID with a term.
  52. *
  53. * The first-level key is the site ID. For the local site this will be
  54. * the word 'local' for all others it will be the numeric id. The second
  55. * level key is the bundle bundle name. For local bundles this will
  56. * always be bio_data_xxxx. Third, are two subkeys: by_field and
  57. * by_accession. To lookup a field's term you use the 'by_field' subkey
  58. * with the field_id as the next level. To lookup the field ID for a term
  59. * use the 'by_accession' subkey with the accession as the next level. Below
  60. * is an example of the structure of this array.
  61. *
  62. * @code
  63. Array (
  64. [local] => Array(
  65. [bio_data_7] => Array(
  66. [by_field] => Array(
  67. [56] => data:2091,
  68. [57] => OBI:0100026,
  69. [17] => schema:name,
  70. [58] => data:2044,
  71. [34] => data:0842,
  72. [67] => schema:alternateName,
  73. ),
  74. [by_accession] => Array (
  75. [data:2091] => 56,
  76. [OBI:0100026] => 57,
  77. [schema:name] => 17,
  78. [data:2044] => 58,
  79. [data:0842] => 34,
  80. [schema:alternateName] => 67,
  81. ),
  82. ),
  83. ),
  84. )
  85. * @endcode
  86. */
  87. protected $fields2terms = array();
  88. /**
  89. * A list of field and instance items, indexed first by site_id with 'local'
  90. * being the key for local fields and the numeric site_id for remote
  91. * fields. The second-levle key is the bundle_name and the the field_id.
  92. * Below the field_id are the keys 'field' or 'instance' where the
  93. * value of each is the field or instance details respectively.
  94. */
  95. protected $fields = array();
  96. /**
  97. * Constructs a new instance of the TripalFieldDownloader class.
  98. *
  99. * @param $collection_id
  100. * The ID for the collection.
  101. * @param $outfile_name
  102. * The name of the output file to create. The name should not include
  103. * a path.
  104. */
  105. public function __construct($collection_id, $outfile_name) {
  106. if (!$outfile_name) {
  107. throw new Exception("Please provide an outputfilename");
  108. }
  109. // Get the collection record and store it.
  110. $this->collection = db_select('tripal_collection', 'tc')
  111. ->fields('tc')
  112. ->condition('collection_id', $collection_id, '=')
  113. ->execute()
  114. ->fetchObject();
  115. // Make sure the user directory exists
  116. $user = user_load($this->collection->uid);
  117. $user_dir = 'public://tripal/users/' . $user->uid;
  118. // Set the collection ID of the collection that this downloader will use.
  119. $this->collection_id = $collection_id;
  120. $this->outfile = $user_dir . '/' . $outfile_name;
  121. $this->selected_fields = $selected_fields;
  122. // A data collection may have multiple bundles. We'll need to get
  123. // them all and store them.
  124. $collection_bundles = db_select('tripal_collection_bundle')
  125. ->fields('tripal_collection_bundle')
  126. ->condition('collection_id', $collection_id, '=')
  127. ->execute();
  128. while ($collection_bundle = $collection_bundles->fetchObject()) {
  129. $collection_bundle->ids = unserialize($collection_bundle->ids);
  130. $collection_bundle->fields = unserialize($collection_bundle->fields);
  131. $this->collection_bundles[] = $collection_bundle;
  132. }
  133. if (!file_prepare_directory($user_dir, FILE_CREATE_DIRECTORY)) {
  134. $message = 'Could not access the directory on the server for storing this file.';
  135. watchdog('tripal', $message, array(), WATCHDOG_ERROR);
  136. drupal_json_output(array(
  137. 'status' => 'failed',
  138. 'message' => $message,
  139. 'file_id' => '',
  140. ));
  141. return;
  142. }
  143. // Map the fields to their term accessions.
  144. $this->setFields();
  145. $this->setFields2Terms();
  146. $this->setPrintableFields();
  147. }
  148. /**
  149. * Inidcates if a given field is supported by this Downloader class.
  150. *
  151. * @param $field
  152. * A field info array.
  153. */
  154. public function isFieldSupported($field, $instance) {
  155. $field_name = $field['field_name'];
  156. $field_type = $field['type'];
  157. // If a field is a TripalField then check its supported downloaders.
  158. if (tripal_load_include_field_class($field_type)) {
  159. $formatters = $field_type::$download_formatters;
  160. if (in_array($formatter, $settings['download_formatters'])) {
  161. return TRUE;
  162. }
  163. }
  164. $is_remote = $field['field']['storage']['type'] == 'tripal_remote_field' ? TRUE : FALSE;
  165. if ($is_remote) {
  166. if (in_array($formatter, $instance['formatters'])) {
  167. return TRUE;
  168. }
  169. }
  170. }
  171. /**
  172. * Retrieves the URL for the downloadable file.
  173. */
  174. public function getURL() {
  175. return $this->outfile;
  176. }
  177. /**
  178. * Removes the downloadable file.
  179. */
  180. public function delete() {
  181. $fid = db_select('file_managed', 'fm')
  182. ->fields('fm', array('fid'))
  183. ->condition('uri', $this->outfile)
  184. ->execute()
  185. ->fetchField();
  186. if ($fid) {
  187. $file = file_load($fid);
  188. file_usage_delete($file, 'tripal', 'data-collection');
  189. file_delete($file, TRUE);
  190. }
  191. }
  192. /**
  193. * Creates the downloadable file.
  194. *
  195. * @param $job
  196. * If this function is run as a Tripal Job then this argument can be
  197. * set to the Tripaljob object for keeping track of progress.
  198. */
  199. public function write(TripalJob $job = NULL) {
  200. $user = user_load($this->collection->uid);
  201. $fh = fopen(drupal_realpath($this->outfile), "w");
  202. if (!$fh) {
  203. throw new Exception("Cannout open collection file: " . $this->outfile);
  204. }
  205. $headers = $this->getHeader();
  206. if ($headers) {
  207. foreach ($headers as $line) {
  208. fwrite($fh, $line . "\r\n");
  209. }
  210. }
  211. // Count the total number of entities
  212. $total_entities = 0;
  213. $bundle_collections = $this->collection_bundles;
  214. foreach ($bundle_collections as $bundle_collection) {
  215. $total_entities += count($bundle_collection->ids);
  216. }
  217. if ($job) {
  218. $job->setTotalItems($total_entities);
  219. }
  220. $num_handled = 0;
  221. foreach ($bundle_collections as $bundle_collection) {
  222. $collection_bundle_id = $bundle_collection->collection_bundle_id;
  223. $bundle_name = $bundle_collection->bundle_name;
  224. $entity_ids = $bundle_collection->ids;
  225. $fields = $bundle_collection->fields;
  226. $site_id = $bundle_collection->site_id;
  227. foreach ($entity_ids as $entity_id) {
  228. $num_handled++;
  229. if ($job) {
  230. $job->setItemsHandled($num_handled);
  231. }
  232. // if we have a site_id then we need to get the entity from the
  233. // remote service. Otherwise create the entity from the local system.
  234. if ($site_id) {
  235. $entity = $this->loadRemoteEntity($entity_id, $site_id, $bundle_name);
  236. if (!$entity) {
  237. continue;
  238. }
  239. }
  240. else {
  241. $result = tripal_load_entity('TripalEntity', array($entity_id), FALSE, $fields);
  242. $entity = $result[$entity_id];
  243. }
  244. if (!$entity) {
  245. continue;
  246. }
  247. $lines = $this->formatEntity($entity);
  248. foreach ($lines as $line) {
  249. fwrite($fh, $line . "\r\n");
  250. }
  251. }
  252. }
  253. fclose($fh);
  254. $file = new stdClass();
  255. $file->uri = $this->outfile;
  256. $file->filename = basename($this->outfile);
  257. $file->filemime = file_get_mimetype($this->outfile);
  258. $file->uid = $user->uid;
  259. $file->status = FILE_STATUS_PERMANENT;
  260. // Check if this file already exists. If it does then just update
  261. // the stats.
  262. $fid = db_select('file_managed', 'fm')
  263. ->fields('fm', array('fid'))
  264. ->condition('uri', $this->outfile)
  265. ->execute()
  266. ->fetchField();
  267. if ($fid) {
  268. $file->fid = $fid;
  269. $file = file_save($file);
  270. }
  271. else {
  272. $file = file_save($file);
  273. $fid = $file->fid;
  274. $file = file_load($fid);
  275. }
  276. // We use the fid for the last argument because these files
  277. // aren't really associated with any entity, but we need a value./
  278. // But, only add the usage if it doens't already exists.
  279. $usage = file_usage_list($file);
  280. if (array_key_exists('tripal', $usage)) {
  281. if (!array_key_exists('data-collection', $usage['tripal'])) {
  282. file_usage_add($file, 'tripal', 'data-collection', $fid);
  283. }
  284. }
  285. if ($job) {
  286. $job->setItemsHandled($num_handled);
  287. }
  288. }
  289. /**
  290. * Setups a download stream for the file.
  291. */
  292. public function download() {
  293. }
  294. /**
  295. * Build and return a fake entity from a remote site using
  296. * tripal web services calls.
  297. *
  298. * @param $remote_ids
  299. * Array of the remote ids.
  300. *
  301. * @param $site_id
  302. * Unique site id assigned in the tripal_sites table when
  303. * a new site is created via the web services interface.
  304. *
  305. * @param $remote_fields
  306. * Array of the remote fields.
  307. *
  308. * @param $bundle_name
  309. * Bundle name of the remote field, in this instance it will be
  310. * the accession of the field.
  311. *
  312. * @return $fake_tripal_entity
  313. * This is a fake entity structured to allow the format
  314. * entity function to process and return the info.
  315. */
  316. protected function loadRemoteEntity($remote_id, $site_id, $bundle_name) {
  317. // Get the site documentation
  318. $site = empty($site_id) ? 'local' : $site_id;
  319. $site_doc = $this->retrieveRemoteAPIDoc($site_id);
  320. // Get the remote entity and create the fake entity.
  321. $query = $bundle_name . '/' . $remote_id;
  322. $remote_entity = tripal_query_remote_site($site_id, $query);
  323. if (!$remote_entity) {
  324. return FALSE;
  325. }
  326. // Start building the fake id.
  327. $entity = new stdClass();
  328. $entity->entityType = 'TripalEntity';
  329. $entity->entityInfo = [];
  330. $entity->id = $remote_id;
  331. $entity->type = 'TripalEntity';
  332. $entity->bundle = $bundle_name;
  333. $entity->site_id = $site_id;
  334. // Get the context JSON for this remote entity, we'll use it to map
  335. // the properties to the correct fields.
  336. $context = drupal_http_request($remote_entity['@context']);
  337. $context = drupal_json_decode($context->data);
  338. $context = $context['@context'];
  339. // Iterate through the fields that are printable and get those values
  340. // from the results.
  341. foreach ($this->printable_fields as $accession => $label) {
  342. $field_id = $this->fields2terms[$site][$bundle_name]['by_accession'][$accession];
  343. // If the field isn't part of this bundle then skip it.
  344. if (!$field_id) {
  345. continue;
  346. }
  347. $field = $this->fields[$site][$bundle_name][$field_id]['field'];
  348. $instance = $this->fields[$site][$bundle_name][$field_id]['instance'];
  349. $field_name = $field['field_name'];
  350. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  351. // Get the key for this field from the context.
  352. $field_key = $accession;
  353. foreach ($context as $k => $v) {
  354. if (!is_array($v)) {
  355. }
  356. if (!is_array($v) and $v == $accession) {
  357. $field_key = $k;
  358. }
  359. }
  360. // If the field is not in this remote bundle then add an empty value for
  361. // it.
  362. if (!$field_key) {
  363. $entity->{$field_name}['und'][0]['value'] = '';
  364. continue;
  365. }
  366. // If the key is for a field that is not "auto attached' then we need
  367. // to get that field through a separate call.
  368. $needs_query = FALSE;
  369. if (array_key_exists($field_name, $context) and is_array($context[$field_name]) and
  370. array_key_exists('@type', $context[$field_name]) and $context[$field_name]['@type'] == '@id'){
  371. $needs_query = TRUE;
  372. }
  373. $value = '';
  374. if (!$needs_query) {
  375. $value = $remote_entity[$field_key];
  376. }
  377. $entity->{$field_name}['und'][0]['value'] = $value;
  378. }
  379. return $entity;
  380. }
  381. /**
  382. * Retrieves the vocabulary for a remote Tripal web service.
  383. *
  384. * @return
  385. * The vocabulary of a remote Tripal web service.
  386. */
  387. protected function retrieveRemoteAPIDoc($site_id) {
  388. $cache_name = 'tripal_web_services_doc_' . $site_id;
  389. if ($cache = cache_get($cache_name)) {
  390. $site_doc = $cache->data;
  391. }
  392. else {
  393. $site_doc = tripal_get_remote_site_doc($site_id);
  394. if (!$site_doc) {
  395. cache_set('tripal_web_services_doc_' . $site_id, $site_doc);
  396. }
  397. }
  398. return $site_doc;
  399. }
  400. /**
  401. * A helper function for the setFields() function.
  402. *
  403. * Adds local fields to the list of fields.
  404. */
  405. private function setLocalFields() {
  406. foreach ($this->collection_bundles as $collection_bundle) {
  407. $bundle_name = $collection_bundle->bundle_name;
  408. $site = empty($collection_bundle->site_id) ? 'local' : $collection_bundle->site_id;
  409. // Skip remote fields.
  410. if ($collection_bundle->site_id) {
  411. continue;
  412. }
  413. foreach ($collection_bundle->fields as $field_id) {
  414. $field = field_info_field_by_id($field_id);
  415. $instance = field_info_instance('TripalEntity', $field['field_name'], $bundle_name);
  416. $this->fields[$site][$bundle_name][$field_id]['field'] = $field;
  417. $this->fields[$site][$bundle_name][$field_id]['instance'] = $instance;
  418. }
  419. }
  420. }
  421. /**
  422. * A helper function for the setFields() function.
  423. *
  424. * Adds remote fields to the list of fields.
  425. */
  426. private function setRemoteFields() {
  427. foreach ($this->collection_bundles as $collection_bundle) {
  428. $bundle_name = $collection_bundle->bundle_name;
  429. // Skip local fields.
  430. if (!$collection_bundle->site_id) {
  431. continue;
  432. }
  433. $site = empty($site_id) ? 'local' : $site_id;
  434. $site_doc = $this->retrieveRemoteAPIDoc($collection_bundle->site_id);
  435. // Get the class that matches this bundle.
  436. $class = $this->getRemoteClass($bundle_name, $site_doc);
  437. // Iterate through the fields of this collection and get the
  438. // info for each one from the class. We will create "fake" field and
  439. // instance info arrays.
  440. foreach ($collection_bundle->fields as $field_id) {
  441. // Get the property from the document for this field.
  442. $property = $this->getRemoteClassProperty($class, $field_id, $site_doc);
  443. // Now create the fake field and instance.
  444. list($vocab, $accession) = explode(':', $field_id);
  445. $field_name = 'tripal_remote_site_' . $collection_bundle->site_id . '_' . $field_id;
  446. $field = array(
  447. 'field_name' => $field_name,
  448. 'type' => $field_name,
  449. 'storage' => array(
  450. 'type' => 'tripal_remote_site'
  451. ),
  452. );
  453. $instance = array(
  454. 'label' => $property['hydra:title'],
  455. 'description' => $property['hydra:description'],
  456. 'formatters' => $property['tripal_formatters'],
  457. 'settings' => array(
  458. 'term_vocabulary' => $vocab,
  459. 'term_accession' => $accession
  460. ),
  461. 'field_name' => $field_name,
  462. 'entity_type' => 'TripalEntity',
  463. 'bundle_name' => $bundle_name,
  464. );
  465. $this->fields[$site][$bundle_name][$field_id]['field'] = $field;
  466. $this->fields[$site][$bundle_name][$field_id]['instance'] = $instance;
  467. }
  468. }
  469. }
  470. private function getRemoteClass($class_id, $site_doc){
  471. // Get the class that matches this bundle.
  472. $classes = $site_doc['supportedClass'];
  473. $class = NULL;
  474. foreach ($classes as $item) {
  475. if ($item['@id'] == $class_id) {
  476. $class = $item;
  477. }
  478. }
  479. return $class;
  480. }
  481. private function getRemoteClassProperty($class, $prop_id, $site_doc){
  482. $properties = $class['supportedProperty'];
  483. foreach ($properties as $item) {
  484. if ($item['property'] == $prop_id) {
  485. return $item;
  486. }
  487. }
  488. }
  489. /**
  490. * Sets the fields array
  491. */
  492. protected function setFields() {
  493. $this->setLocalFields();
  494. $this->setRemoteFields();
  495. }
  496. /**
  497. * Sets the fields2term array.
  498. *
  499. * The fields2term array provides an easy lookup for mapping a term
  500. * to it's accession number.
  501. **/
  502. protected function setFields2Terms() {
  503. foreach ($this->fields as $site => $bundles) {
  504. foreach ($bundles as $bundle_name => $bundle_fields) {
  505. foreach ($bundle_fields as $field_id => $info) {
  506. $instance = $info['instance'];
  507. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  508. $this->fields2terms[$site][$bundle_name]['by_field'][$field_id] = $accession;
  509. $this->fields2terms[$site][$bundle_name]['by_accession'][$accession] = $field_id;
  510. }
  511. }
  512. }
  513. }
  514. /**
  515. * Conslidates all the fields into a single list of accession numbers.
  516. *
  517. * The array of printable fields will contain an array containing the
  518. * accession number and the label. The title used is from the first
  519. * occurance of an accession.
  520. */
  521. protected function setPrintableFields() {
  522. foreach ($this->fields as $site => $bundles) {
  523. foreach ($bundles as $bundle_name => $bundle_fields) {
  524. foreach ($bundle_fields as $field_id => $info) {
  525. $instance = $info['instance'];
  526. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  527. if (!array_key_exists($accession, $this->printable_fields)) {
  528. // Only include fields that support this downloader type in
  529. // or list of printable fields.
  530. if ($this->isFieldSupported($field, $instance)) {
  531. $this->printable_fields[$accession] = $instance['label'];
  532. }
  533. }
  534. }
  535. }
  536. }
  537. }
  538. /**
  539. * Formats the entity and the specified fields for output.
  540. *
  541. * This function should be implemented by a child class. It should iterate
  542. * over the fields for the entity and return the appropriate format. It may
  543. * return multiple lines of output if necessary.
  544. *
  545. * @param $entity
  546. * The entity object. The fields that should be formatted are already
  547. * loaded.
  548. *
  549. * @return
  550. * An array of strings (one per line of output.
  551. */
  552. abstract protected function formatEntity($entity);
  553. /**
  554. * Retrieves header lines
  555. *
  556. * This function should be implemented by a child class. It should return
  557. * the header lines for an output file.
  558. */
  559. abstract protected function getHeader();
  560. }