TripalFieldDownloader.inc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. <?php
  2. abstract class TripalFieldDownloader {
  3. /**
  4. * Sets the label shown to the user describing this formatter. It
  5. * should be a short identifier. Use the $full_label for a more
  6. * descriptive label.
  7. */
  8. static public $label = 'Generic';
  9. /**
  10. * A more verbose label that better describes the formatter.
  11. */
  12. static public $full_label = 'Generic File format';
  13. /**
  14. * Indicates the default extension for the outputfile.
  15. */
  16. static public $default_extension = 'txt';
  17. /**
  18. * The data collection assigned to this downloader.
  19. */
  20. protected $collection = NULL;
  21. /**
  22. * The collection ID
  23. */
  24. protected $collection_id = NULL;
  25. /**
  26. * An array of collection_bundle records for the content types that
  27. * belong to this data collection.
  28. */
  29. protected $collection_bundles = NULL;
  30. /**
  31. * The output file URI.
  32. */
  33. protected $outfile = '';
  34. /**
  35. * An array of printable fields. Because fields can come from multiple
  36. * bundles and those bundles can be from multiple sites, it is possible that
  37. * 1) two bundles use the same field and we want to conslidate to a
  38. * single printable field; and 2) that a remote site may use the same term
  39. * for a field as a bundle on the local site. The only way to sort out this
  40. * mess is to use the term accession numbers. Therefore, the array contains
  41. * a unique list of printable fields using their accession numbers as keys
  42. * and a field label as the value.
  43. *
  44. */
  45. protected $printable_fields = array();
  46. /**
  47. * The remote site json data returned for the entity
  48. */
  49. protected $remote_entity = '';
  50. /**
  51. * An array that associates a field ID with a term.
  52. *
  53. * The first-level key is the site ID. For the local site this will be
  54. * the word 'local' for all others it will be the numeric id. The second
  55. * level key is the bundle bundle name. For local bundles this will
  56. * always be bio_data_xxxx. Third, are two subkeys: by_field and
  57. * by_accession. To lookup a field's term you use the 'by_field' subkey
  58. * with the field_id as the next level. To lookup the field ID for a term
  59. * use the 'by_accession' subkey with the accession as the next level. Below
  60. * is an example of the structure of this array.
  61. *
  62. * @code
  63. Array (
  64. [local] => Array(
  65. [bio_data_7] => Array(
  66. [by_field] => Array(
  67. [56] => data:2091,
  68. [57] => OBI:0100026,
  69. [17] => schema:name,
  70. [58] => data:2044,
  71. [34] => data:0842,
  72. [67] => schema:alternateName,
  73. ),
  74. [by_accession] => Array (
  75. [data:2091] => 56,
  76. [OBI:0100026] => 57,
  77. [schema:name] => 17,
  78. [data:2044] => 58,
  79. [data:0842] => 34,
  80. [schema:alternateName] => 67,
  81. ),
  82. ),
  83. ),
  84. )
  85. * @endcode
  86. */
  87. protected $fields2terms = array();
  88. /**
  89. * A list of field and instance items, indexed first by site_id with 'local'
  90. * being the key for local fields and the numeric site_id for remote
  91. * fields. The second-levle key is the bundle_name and the the field_id.
  92. * Below the field_id are the keys 'field' or 'instance' where the
  93. * value of each is the field or instance details respectively.
  94. */
  95. protected $fields = array();
  96. /**
  97. * The file handle for an opeend file using during writing.
  98. */
  99. protected $fh;
  100. /**
  101. * Constructs a new instance of the TripalFieldDownloader class.
  102. *
  103. * @param $collection_id
  104. * The ID for the collection.
  105. * @param $outfile_name
  106. * The name of the output file to create. The name should not include
  107. * a path.
  108. */
  109. public function __construct($collection_id, $outfile_name) {
  110. if (!$outfile_name) {
  111. throw new Exception("Please provide an outputfilename");
  112. }
  113. // Get the collection record and store it.
  114. $collection = db_select('tripal_collection', 'tc')
  115. ->fields('tc')
  116. ->condition('collection_id', $collection_id, '=')
  117. ->execute()
  118. ->fetchObject();
  119. if (!$collection) {
  120. throw new Exception(t("Cannot find a collection that matches the provided id: !id", array('!id' => $collection_id)));
  121. }
  122. $this->collection = $collection;
  123. // Make sure the user directory exists
  124. $user = user_load($this->collection->uid);
  125. $user_dir = tripal_get_user_files_dir($user);
  126. if (!tripal_is_user_files_dir_writeable($user)) {
  127. throw new Exception(t("The user's data directory is not writeable: !user_dir.", array('!user_dir' => $user_dir)));
  128. }
  129. // Set the collection ID of the collection that this downloader will use.
  130. $this->collection_id = $collection_id;
  131. // Make sure the data_collections directory exists.
  132. $collections_dir = $user_dir . '/data_collections';
  133. if (!file_exists($collections_dir)) {
  134. if (!file_prepare_directory($collections_dir, [FILE_CREATE_DIRECTORY])) {
  135. throw new Exception(t("The data_collection directory cannot be created: !collections_dir.", array('!collections_dir' => $collections_dir)));
  136. }
  137. }
  138. $this->outfile = $collections_dir . '/' . $outfile_name;
  139. // A data collection may have multiple bundles. We'll need to get
  140. // them all and store them.
  141. $collection_bundles = db_select('tripal_collection_bundle')
  142. ->fields('tripal_collection_bundle')
  143. ->condition('collection_id', $collection_id, '=')
  144. ->execute();
  145. while ($collection_bundle = $collection_bundles->fetchObject()) {
  146. $collection_bundle->ids = unserialize($collection_bundle->ids);
  147. $collection_bundle->fields = unserialize($collection_bundle->fields);
  148. $this->collection_bundles[] = $collection_bundle;
  149. }
  150. // Map the fields to their term accessions.
  151. $this->setFields();
  152. $this->setFields2Terms();
  153. $this->setPrintableFields();
  154. }
  155. /**
  156. * Inidcates if a given field is supported by this Downloader class.
  157. *
  158. * @param $field
  159. * A field info array.
  160. */
  161. public function isFieldSupported($field, $instance) {
  162. $field_name = $field['field_name'];
  163. $field_type = $field['type'];
  164. $this_formatter = get_class($this);
  165. // If a field is a TripalField then check its supported downloaders.
  166. if (tripal_load_include_field_class($field_type)) {
  167. $formatters = $field_type::$download_formatters;
  168. if (in_array($this_formatter, $formatters)) {
  169. return TRUE;
  170. }
  171. }
  172. // If this is a remote field then check that differently.
  173. if ($field['storage']['type'] == 'tripal_remote_field' ) {
  174. if (in_array($this_formatter, $instance['formatters'])) {
  175. return TRUE;
  176. }
  177. }
  178. }
  179. /**
  180. * Retrieves the URL for the downloadable file.
  181. */
  182. public function getURL() {
  183. return $this->outfile;
  184. }
  185. /**
  186. * Removes the downloadable file.
  187. */
  188. public function delete() {
  189. $fid = db_select('file_managed', 'fm')
  190. ->fields('fm', array('fid'))
  191. ->condition('uri', $this->outfile)
  192. ->execute()
  193. ->fetchField();
  194. if ($fid) {
  195. $file = file_load($fid);
  196. file_usage_delete($file, 'tripal', 'data-collection');
  197. file_delete($file, TRUE);
  198. }
  199. }
  200. /**
  201. *
  202. * @param TripalJob $job
  203. */
  204. public function writeInit(TripalJob $job = NULL) {
  205. $user = user_load($this->collection->uid);
  206. $this->fh = fopen(drupal_realpath($this->outfile), "w");
  207. if (!$this->fh) {
  208. throw new Exception("Cannout open collection file: " . $this->outfile);
  209. }
  210. // Add the headers to the file.
  211. $headers = $this->getHeader();
  212. if ($headers) {
  213. foreach ($headers as $line) {
  214. fwrite($this->fh, $line . "\r\n");
  215. }
  216. }
  217. }
  218. /**
  219. * Write a single entity to the file.
  220. *
  221. * Before calling this function call the initWrite() function to
  222. * establish the file and write headers.
  223. *
  224. * @param $entity
  225. * The Entity to write.
  226. * @param TripalJob $job
  227. */
  228. public function writeEntity($entity, TripalJob $job = NULL){
  229. $lines = $this->formatEntity($entity);
  230. foreach ($lines as $line) {
  231. fwrite($this->fh, $line . "\r\n");
  232. }
  233. }
  234. /**
  235. * Closes the output file once writing of all entities is completed.
  236. *
  237. * @param TripalJob $job
  238. */
  239. public function writeDone(TripalJob $job = NULL) {
  240. fclose($this->fh);
  241. $user = user_load($this->collection->uid);
  242. $file = new stdClass();
  243. $file->uri = $this->outfile;
  244. $file->filename = basename($this->outfile);
  245. $file->filemime = file_get_mimetype($this->outfile);
  246. $file->uid = $user->uid;
  247. $file->status = FILE_STATUS_PERMANENT;
  248. // Check if this file already exists. If it does then just update
  249. // the stats.
  250. $fid = db_select('file_managed', 'fm')
  251. ->fields('fm', array('fid'))
  252. ->condition('uri', $this->outfile)
  253. ->execute()
  254. ->fetchField();
  255. if ($fid) {
  256. $file->fid = $fid;
  257. $file = file_save($file);
  258. }
  259. else {
  260. $file = file_save($file);
  261. $fid = $file->fid;
  262. $file = file_load($fid);
  263. }
  264. // Associate this file with data collections if it isn't already
  265. $usage = file_usage_list($file);
  266. if (!array_key_exists('tripal', $usage) or
  267. !array_key_exists('data_collection', $usage['tripal'])) {
  268. file_usage_add($file, 'tripal', 'data_collection', $this->collection_id);
  269. }
  270. }
  271. /**
  272. * Creates the downloadable file.
  273. *
  274. * @param $job
  275. * If this function is run as a Tripal Job then this argument can be
  276. * set to the Tripaljob object for keeping track of progress.
  277. */
  278. public function write(TripalJob $job = NULL) {
  279. $this->initWrite($job);
  280. $num_handled = 0;
  281. foreach ($this->collection_bundles as $bundle_collection) {
  282. $collection_bundle_id = $bundle_collection->collection_bundle_id;
  283. $bundle_name = $bundle_collection->bundle_name;
  284. $entity_ids = $bundle_collection->ids;
  285. $fields = $bundle_collection->fields;
  286. $site_id = $bundle_collection->site_id;
  287. foreach ($entity_ids as $entity_id) {
  288. $num_handled++;
  289. if ($job) {
  290. $job->setItemsHandled($num_handled);
  291. }
  292. // if we have a site_id then we need to get the entity from the
  293. // remote service. Otherwise create the entity from the local system.
  294. if ($site_id) {
  295. $entity = $this->loadRemoteEntity($entity_id, $site_id, $bundle_name);
  296. if (!$entity) {
  297. continue;
  298. }
  299. }
  300. else {
  301. $result = tripal_load_entity('TripalEntity', array($entity_id), FALSE, $fields, FALSE);
  302. $entity = $result[$entity_id];
  303. }
  304. if (!$entity) {
  305. continue;
  306. }
  307. $lines = $this->formatEntity($entity);
  308. foreach ($lines as $line) {
  309. fwrite($this->fh, $line . "\r\n");
  310. }
  311. }
  312. }
  313. $this->finishWrite($job);
  314. }
  315. /**
  316. * Setups a download stream for the file.
  317. */
  318. public function download() {
  319. }
  320. /**
  321. * A helper function for the setFields() function.
  322. *
  323. * Adds local fields to the list of fields.
  324. */
  325. private function setLocalFields() {
  326. foreach ($this->collection_bundles as $collection_bundle) {
  327. $bundle_name = $collection_bundle->bundle_name;
  328. if ($collection_bundle->site_id) {
  329. continue;
  330. }
  331. foreach ($collection_bundle->fields as $field_id) {
  332. $field = field_info_field_by_id($field_id);
  333. $instance = field_info_instance('TripalEntity', $field['field_name'], $bundle_name);
  334. $this->fields['local'][$bundle_name][$field_id]['field'] = $field;
  335. $this->fields['local'][$bundle_name][$field_id]['instance'] = $instance;
  336. }
  337. }
  338. }
  339. /**
  340. * A helper function for the setFields() function.
  341. *
  342. * Adds remote fields to the list of fields.
  343. */
  344. private function setRemoteFields() {
  345. // We can't use the Tripal ws API extensions if the
  346. // tripal_ws module is not enabled.
  347. if (!module_exists('tripal_ws')) {
  348. return;
  349. }
  350. foreach ($this->collection_bundles as $collection_bundle) {
  351. $bundle_name = $collection_bundle->bundle_name;
  352. $site_id = $collection_bundle->site_id;
  353. // Skip local fields.
  354. if (!$site_id) {
  355. continue;
  356. }
  357. // Iterate through the fields of this collection and get the
  358. // info for each one from the class. We will create "fake" field and
  359. // instance info arrays.
  360. foreach ($collection_bundle->fields as $field_id) {
  361. $field = tripal_get_remote_field_info($site_id, $bundle_name, $field_id);
  362. $instance = tripal_get_remote_field_instance_info($site_id, $bundle_name, $field_id);
  363. $this->fields[$site_id][$bundle_name][$field_id]['field'] = $field;
  364. $this->fields[$site_id][$bundle_name][$field_id]['instance'] = $instance;
  365. }
  366. }
  367. }
  368. /**
  369. * Sets the fields array
  370. */
  371. protected function setFields() {
  372. $this->setLocalFields();
  373. $this->setRemoteFields();
  374. }
  375. /**
  376. * Sets the fields2term array.
  377. *
  378. * The fields2term array provides an easy lookup for mapping a term
  379. * to it's accession number.
  380. **/
  381. protected function setFields2Terms() {
  382. foreach ($this->fields as $site => $bundles) {
  383. foreach ($bundles as $bundle_name => $bundle_fields) {
  384. foreach ($bundle_fields as $field_id => $info) {
  385. $instance = $info['instance'];
  386. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  387. $this->fields2terms[$site][$bundle_name]['by_field'][$field_id] = $accession;
  388. $this->fields2terms[$site][$bundle_name]['by_accession'][$accession] = $field_id;
  389. }
  390. }
  391. }
  392. }
  393. /**
  394. * Conslidates all the fields into a single list of accession numbers.
  395. *
  396. * The array of printable fields will contain an array containing the
  397. * accession number and the label. The title used is from the first
  398. * occurance of an accession.
  399. */
  400. protected function setPrintableFields() {
  401. foreach ($this->fields as $site => $bundles) {
  402. foreach ($bundles as $bundle_name => $bundle_fields) {
  403. foreach ($bundle_fields as $field_id => $info) {
  404. $field = $info['field'];
  405. $instance = $info['instance'];
  406. $accession = $instance['settings']['term_vocabulary'] . ':' . $instance['settings']['term_accession'];
  407. if (!array_key_exists($accession, $this->printable_fields)) {
  408. // Only include fields that support this downloader type in
  409. // or list of printable fields.
  410. if ($this->isFieldSupported($field, $instance)) {
  411. $this->printable_fields[$accession] = $instance['label'];
  412. }
  413. }
  414. }
  415. }
  416. }
  417. }
  418. /**
  419. * Formats the entity and the specified fields for output.
  420. *
  421. * This function should be implemented by a child class. It should iterate
  422. * over the fields for the entity and return the appropriate format. It may
  423. * return multiple lines of output if necessary.
  424. *
  425. * @param $entity
  426. * The entity object. The fields that should be formatted are already
  427. * loaded.
  428. *
  429. * @return
  430. * An array of strings (one per line of output.
  431. */
  432. abstract protected function formatEntity($entity);
  433. /**
  434. * Retrieves header lines
  435. *
  436. * This function should be implemented by a child class. It should return
  437. * the header lines for an output file.
  438. */
  439. abstract protected function getHeader();
  440. }