tripal_pub.api.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. <?php
  2. /**
  3. * @file
  4. * The Tripal Pub API
  5. *
  6. * @defgroup tripal_pub_api Publication Module API
  7. * @ingroup tripal_api
  8. */
  9. /*
  10. * Retrieves a list of publications as an associated array where
  11. * keys correspond directly with Tripal Pub CV terms.
  12. *
  13. * @param remote_db
  14. * The name of the remote publication database to query. These names should
  15. * match the name of the databases in the Chado 'db' table. Currently
  16. * supported databass include
  17. * 'PMID': PubMed
  18. *
  19. * @param search_array
  20. * An associate array containing the search criteria. The following key
  21. * are expected
  22. * 'remote_db': Specifies the name of the remote publication database
  23. * 'num_criteria': Specifies the number of criteria present in the search array
  24. * 'days': The number of days to include in the search starting from today
  25. * 'criteria': An associate array containing the search critiera. There should
  26. * be no less than 'num_criteria' elements in this array.
  27. *
  28. * The following keys are expected in the 'criteria' array
  29. * 'search_terms': A list of terms to search on, separated by spaces.
  30. * 'scope': The fields to search in the remote database. Valid values
  31. * include: 'title', 'abstract', 'author' and 'any'
  32. * 'operation': The logical operation to use for this criteria. Valid
  33. * values include: 'AND', 'OR' and 'NOT'.
  34. * @param $num_to_retrieve
  35. * The number of records to retrieve. In cases with large numbers of
  36. * records to retrieve, the remote database may limit the size of each
  37. * retrieval.
  38. * @param $pager_id
  39. * Optional. This function uses the 'tripal_pager_callback' function
  40. * to page a set of results. This is helpful when generating results to
  41. * be view online. The pager works identical to the pager_query function
  42. * of drupal. Simply provide a unique integer value for this argument. Each
  43. * form on a single page should have a unique $pager_id.
  44. * @param $page
  45. * Optional. If this function is called where the
  46. * page for the pager cannot be set using the $_GET variable, use this
  47. * argument to specify the page to retrieve.
  48. *
  49. * @return
  50. * Returns an array of pubs where each element is
  51. * an associative array where the keys are Tripal Pub CV terms.
  52. *
  53. * @ingroup tripal_pub_api
  54. */
  55. function tripal_pub_get_remote_search_results($remote_db, $search_array,
  56. $num_to_retrieve, $pager_id = 0, $page = 0) {
  57. // construct the callback function using the remote database name
  58. $callback = 'tripal_pub_remote_search_' . strtolower($remote_db);
  59. // manually set the $_GET['page'] parameter to trick the pager
  60. // into giving us the requested page
  61. if (is_int($page) and $page > 0) {
  62. $_GET['page'] = $page;
  63. }
  64. // now call the callback function to get the rsults
  65. $pubs = array();
  66. if (function_exists($callback)) {
  67. $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $pager_id);
  68. }
  69. return $pubs;
  70. }
  71. /*
  72. * @ingroup tripal_pub_api
  73. */
  74. function tripal_pub_update_publications($do_contact = FALSE) {
  75. // get a persistent connection
  76. $connection = tripal_db_persistent_chado();
  77. if (!$connection) {
  78. print "A persistant connection was not obtained. Loading will be slow\n";
  79. }
  80. // if we cannot get a connection then let the user know the loading will be slow
  81. tripal_db_start_transaction();
  82. if ($connection) {
  83. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  84. "If the load fails or is terminated prematurely then the entire set of \n" .
  85. "insertions/updates is rolled back and will not be found in the database\n\n";
  86. }
  87. // get a list of all publications that have
  88. // supported databases
  89. $sql = "
  90. SELECT DB.name as db_name, DBX.accession
  91. FROM pub P
  92. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  93. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  94. INNER JOIN db DB ON DB.db_id = DBX.db_id
  95. ORDER BY DB.name
  96. ";
  97. $results = chado_query($sql);
  98. $num_to_retrieve = 100;
  99. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  100. $curr_db = ''; // keeps track of the current current database
  101. $ids = array(); // the list of IDs for the database
  102. $search = array(); // the search array passed to the search function
  103. // iterate through the pub IDs
  104. while ($pub = db_fetch_object($results)) {
  105. $accession = $pub->accession;
  106. $remote_db = $pub->db_name;
  107. // if we're switching databases then reset the search array
  108. if($remote_db != $curr_db) {
  109. // if we had a previous DB then do the update.
  110. if ($curr_db) {
  111. $search['num_criteria'] = $i - 1;
  112. $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);
  113. tripal_pub_add_publications($pubs, $do_contact);
  114. }
  115. $curr_db = $remote_db;
  116. $search = array(
  117. 'remote_db' => $remote_db,
  118. 'criteria' => array(),
  119. );
  120. $ids = array();
  121. $i = 0;
  122. }
  123. // if we've hit the maximum number to retrieve then do the search
  124. if($i == $num_to_retrieve) {
  125. $search['num_criteria'] = $i - 1;
  126. $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);
  127. tripal_pub_add_publications($pubs, $do_contact);
  128. $search['criteria'] = array();
  129. $i = 0;
  130. }
  131. // add each accession to the search criteria
  132. $search['criteria'][] = array(
  133. 'search_terms' => $accession,
  134. 'scope' => 'id',
  135. 'operation' => 'OR'
  136. );
  137. $i++;
  138. }
  139. // now update any remaining in the search criteria array
  140. $search['num_criteria'] = $i - 1;
  141. $pubs = tripal_pub_get_remote_search_results($remote_db, $search, $i, 0);
  142. tripal_pub_add_publications($pubs, $do_contact);
  143. // sync the newly added publications with Drupal
  144. print "Syncing publications with Drupal...\n";
  145. tripal_pub_sync_pubs();
  146. // if the caller wants to create contacts then we should sync them
  147. if ($do_contact) {
  148. print "Syncing contacts with Drupal...\n";
  149. tripal_contact_sync_contacts();
  150. }
  151. // transaction is complete
  152. tripal_db_commit_transaction();
  153. print "Done.\n";
  154. }
  155. /*
  156. * @ingroup tripal_pub_api
  157. */
  158. function tripal_pub_import_publications() {
  159. $num_to_retrieve = 100;
  160. $pager_id = 0;
  161. $page = 0;
  162. $num_pubs = 0;
  163. // get a persistent connection
  164. $connection = tripal_db_persistent_chado();
  165. if (!$connection) {
  166. print "A persistant connection was not obtained. Loading will be slow\n";
  167. }
  168. // if we cannot get a connection then let the user know the loading will be slow
  169. tripal_db_start_transaction();
  170. if ($connection) {
  171. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  172. "If the load fails or is terminated prematurely then the entire set of \n" .
  173. "insertions/updates is rolled back and will not be found in the database\n\n";
  174. }
  175. // get all of the loaders
  176. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0";
  177. $results = db_query($sql);
  178. $do_contact = FALSE;
  179. while ($import = db_fetch_object($results)) {
  180. // keep track if any of the importers want to create contacts from authors
  181. if ($import->do_contact == 1) {
  182. $do_contact = TRUE;
  183. }
  184. $criteria = unserialize($import->criteria);
  185. $remote_db = $criteria['remote_db'];
  186. do {
  187. // retrieve the pubs for this page. We'll retreive 10 at a time
  188. $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
  189. tripal_pub_add_publications($pubs, $import->do_contact);
  190. $page++;
  191. }
  192. // continue looping until we have a $pubs array that does not have
  193. // our requested numer of records. This means we've hit the end
  194. while (count($pubs) == $num_to_retrieve);
  195. }
  196. // sync the newly added publications with Drupal
  197. print "Syncing publications with Drupal...\n";
  198. tripal_pub_sync_pubs();
  199. // if any of the importers wanted to create contacts from the authors then sync them
  200. if($do_contact) {
  201. print "Syncing contacts with Drupal...\n";
  202. tripal_contact_sync_contacts();
  203. }
  204. // transaction is complete
  205. tripal_db_commit_transaction();
  206. print "Done.\n";
  207. }
  208. /*
  209. *
  210. */
  211. function tripal_pub_add_publications($pubs, $do_contact) {
  212. // iterate through the publications and add each one
  213. foreach ($pubs as $pub) {
  214. // add the publication to Chado and sync it with Chado
  215. $pub_id = tripal_pub_add_publication($pub, $do_contact);
  216. // add the publication cross reference (e.g. to PubMed)
  217. if ($pub_id) {
  218. $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, $pub);
  219. }
  220. $num_pubs++;
  221. print $num_pubs . ". " . $pub['Publication Database'] . ' ' . $pub['Pub Accession'] . "\n";
  222. } // end for loop
  223. }
  224. /*
  225. *
  226. */
  227. function tripal_pub_add_pub_dbxref($pub_id, $pub) {
  228. // check to see if the pub_dbxref record already exist
  229. $values = array(
  230. 'dbxref_id' => array(
  231. 'accession' => $pub['Pub Accession'],
  232. 'db_id' => array(
  233. 'name' => $pub['Publication Database'],
  234. ),
  235. ),
  236. 'pub_id' => $pub_id,
  237. );
  238. $options = array('statement_name' => 'sel_pubdbxref_dbpu');
  239. $results = tripal_core_chado_select('pub_dbxref', array('*'), $values, $options);
  240. // if the pub_dbxref record exist then we don't need to re-add it.
  241. if(count($results) > 0) {
  242. return $results[0];
  243. }
  244. // make sure our database already exists
  245. $db = tripal_db_add_db($pub['Publication Database']);
  246. // get the database cross-reference
  247. $dbxvalues = array(
  248. 'accession' => $pub['Pub Accession'],
  249. 'db_id' => $db->db_id,
  250. );
  251. $dbxoptions = array('statement_name' => 'sel_dbxref_acdb');
  252. $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $dbxvalues, $dbxoptions);
  253. // if the accession doesn't exist then add it
  254. if(count($results) == 0){
  255. $dbxref = tripal_db_add_dbxref($db->db_id, $pub['Pub Accession']);
  256. }
  257. else {
  258. $dbxref = $results[0];
  259. }
  260. // now add the record
  261. $options = array('statement_name' => 'ins_pubdbxref_dbpu');
  262. $results = tripal_core_chado_insert('pub_dbxref', $values, $options);
  263. if (!$results) {
  264. watchdog('tripal_pub', "Cannot add publication dbxref: %db:%accession.",
  265. array('%db' => $pub['Publication Database'], '%accession' => $pub['Pub Accession']). WATCHDOG_ERROR);
  266. return FALSE;
  267. }
  268. return $results;
  269. }
  270. /*
  271. *
  272. */
  273. function tripal_pub_add_publication($pub_details, $do_contact) {
  274. $pub_id = 0;
  275. // first try to find the publication using the accession number. It will ahve
  276. // one if the pub has already been loaded for the publication database
  277. if ($pub_details['Pub Accession'] and $pub_details['Publication Database']) {
  278. $values = array(
  279. 'dbxref_id' => array (
  280. 'accession' => $pub_details['Pub Accession'],
  281. 'db_id' => array(
  282. 'name' => $pub_details['Publication Database']
  283. ),
  284. ),
  285. );
  286. $options = array('statement_name' => 'sel_pubdbxref_db');
  287. $results = tripal_core_chado_select('pub_dbxref', array('pub_id'), $values, $options);
  288. if(count($results) == 1) {
  289. $pub_id = $results[0]->pub_id;
  290. }
  291. elseif(count($results) > 1) {
  292. watchdog('tripal_pub', "There are two publications with this accession: %db:%accession. Cannot determine which to update.",
  293. array('%db' => $pub_details['Publication Database'], '%accession' => $pub_details['Pub Accession']), WATCHDOG_ERROR);
  294. return FALSE;
  295. }
  296. }
  297. // if we couldn't find a publication by the accession (which means it doesn't
  298. // yet exist or it has been added using a different publication database) then
  299. // try to find it using the title and publication year.
  300. elseif ($pub_details['Title']) {
  301. $values = array();
  302. $values['title'] = $pub_details['Title'];
  303. $stmnt_suffix = 'ti';
  304. if ($pub_details['Year']) {
  305. $values['pyear'] = $pub_details['Year'];
  306. $stmnt_suffix .= 'py';
  307. }
  308. $options = array('statement_name' => 'sel_pub_');
  309. $results = tripal_core_chado_select('pub', array('*'), $values, $options);
  310. if (count($results) == 1) {
  311. $pub_id = $results[0]->pub_id;
  312. }
  313. elseif (count($results) > 1) {
  314. watchdog('tripal_pub', "The publication with the same title is present multiple times. Cannot ".
  315. "determine which to use. Title: %title", array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  316. return FALSE;
  317. }
  318. }
  319. // if we couldn't find the publication using the database accession or the title/year then add it
  320. if(!$pub_id) {
  321. // get the publication type (use the first publication type, any others will get stored as properties)
  322. $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
  323. if (!$pub_type) {
  324. watchdog('tripal_pub', "Cannot find publication type: %type",
  325. array('%type' => $pub_type), WATCHDOG_ERROR);
  326. return FALSE;
  327. }
  328. // if the publication does not exist then create it.
  329. $values = array(
  330. 'title' => $pub_details['Title'],
  331. 'volume' => $pub_details['Volume'],
  332. 'series_name' => $pub_details['Journal Name'],
  333. 'issue' => $pub_details['Issue'],
  334. 'pyear' => $pub_details['Year'],
  335. 'pages' => $pub_details['Pages'],
  336. 'uniquename' => $pub_details['Citation'],
  337. 'type_id' => $pub_type->cvterm_id,
  338. );
  339. $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
  340. $pub = tripal_core_chado_insert('pub', $values, $options);
  341. if (!$pub) {
  342. watchdog('tripal_pub', "Cannot insert the publication with title: %title",
  343. array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  344. return FALSE;
  345. }
  346. $pub_id = $pub['pub_id'];
  347. }
  348. // now add in any other items that remain as properties of the publication
  349. foreach ($pub_details as $key => $value) {
  350. // get the cvterm by name or synonym
  351. $cvterm = tripal_cv_get_cvterm_by_name($key, NULL, 'tripal_pub');
  352. if (!$cvterm) {
  353. $cvterm = tripal_cv_get_cvterm_by_synonym($key, NULL, 'tripal_pub');
  354. }
  355. if (!$cvterm) {
  356. print_r($cvterm);
  357. watchdog('tripal_pub', "Cannot find term: '%prop'. Skipping.", array('%prop' => $key), WATCHDOG_ERROR);
  358. continue;
  359. }
  360. // skip details that won't be stored as properties
  361. if ($key == 'Authors') {
  362. tripal_pub_add_authors($pub_id, $value, $do_contact);
  363. continue;
  364. }
  365. if ($key == 'Title' or $key == 'Volume' or $key == 'Journal Name' or $key == 'Issue' or
  366. $key == 'Year' or $key == 'Pages') {
  367. continue;
  368. }
  369. $success = 0;
  370. if (is_array($value)) {
  371. foreach ($value as $subkey => $subvalue) {
  372. // if the key is an integer then this array is a simple list and
  373. // we will insert using the primary key. Otheriwse, use the new key
  374. if(is_int($subkey)) {
  375. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $subvalue, TRUE);
  376. }
  377. else {
  378. $success = tripal_core_insert_property('pub', $pub_id, $subkey, 'tripal_pub', $subvalue, TRUE);
  379. }
  380. }
  381. }
  382. else {
  383. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $value, TRUE);
  384. }
  385. if (!$success) {
  386. watchdog('tripal_pub', "Cannot add property '%prop' to publication. Skipping.",
  387. array('%prop' => $key), WATCHDOG_ERROR);
  388. continue;
  389. }
  390. }
  391. return $pub_id;
  392. }
  393. /*
  394. *
  395. */
  396. function tripal_pub_add_authors($pub_id, $authors, $do_contact) {
  397. $rank = 0;
  398. // first remove any of the existing pubauthor entires
  399. $sql = "DELETE FROM {pubauthor} WHERE pub_id = %d";
  400. chado_query($sql, $pub_id);
  401. // iterate through the authors and add them to the pubauthors and contact
  402. // tables of chado, then link them through the custom pubauthors_contact table
  403. foreach ($authors as $author) {
  404. // skip invalid author entires
  405. if ($author['valid'] == 'N') {
  406. continue;
  407. }
  408. // remove the 'valid' property as we don't have a CV term for it
  409. unset($author['valid']);
  410. // construct the contact.name field using the author information
  411. $name = '';
  412. $type = 'Person';
  413. if ($author['Given Name']) {
  414. $name .= $author['Given Name'];
  415. }
  416. if ($author['Surname']) {
  417. $name .= ' ' . $author['Surname'];
  418. }
  419. if ($author['Suffix']) {
  420. $name .= ' ' . $author['Suffix'];
  421. }
  422. if ($author['Collective']) {
  423. $name = $author['Collective'];
  424. $type = 'Collective';
  425. }
  426. $name = trim($name);
  427. // add an entry to the pubauthors table
  428. $values = array(
  429. 'pub_id' => $pub_id,
  430. 'rank' => $rank,
  431. 'surname' => $author['Surname'] ? $author['Surname'] : $author['Collective'],
  432. 'givennames' => $author['Given Name'],
  433. 'suffix' => $author['Suffix'],
  434. );
  435. $options = array('statement_name' => 'ins_pubauthor_idrasugisu');
  436. $pubauthor = tripal_core_chado_insert('pubauthor', $values, $options);
  437. // if the user wants us to create a contact for each author then do it.
  438. if ($do_contact) {
  439. // Add the contact
  440. $contact = tripal_contact_add_contact($name, '', $type, $author);
  441. // if we have succesfully added the contact and the pubauthor entries then we want to
  442. // link them together
  443. if ($contact and $pubauthor) {
  444. // link the pubauthor entry to the contact
  445. $values = array(
  446. 'pubauthor_id' => $pubauthor['pubauthor_id'],
  447. 'contact_id' => $contact['contact_id'],
  448. );
  449. $options = array('statement_name' => 'ins_pubauthorcontact_puco');
  450. $pubauthor_contact = tripal_core_chado_insert('pubauthor_contact', $values, $options);
  451. if (!$pubauthor_contact) {
  452. watchdog('tripal_pub', "Cannot link pub authro and contact.", array(), WATCHDOG_ERROR);
  453. }
  454. }
  455. }
  456. $rank++;
  457. }
  458. }