tripal_chado.pub.api.inc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Publication Module API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) to manage chado publications
  11. *
  12. * @stephen add documentation here for how to add a new importer.
  13. *
  14. * @}
  15. */
  16. /**
  17. * Retrieves a chado publication array
  18. *
  19. * @param $identifier
  20. * An array used to uniquely identify a publication. This array has the same
  21. * format as that used by the chado_generate_var(). The following keys can be
  22. * useful for uniquely identifying a publication as they should be unique:
  23. * - pub_id: the chado pub.pub_id primary key
  24. * - nid: the drupal nid of the publication
  25. * - uniquename: A value to matach with the pub.uniquename field
  26. * There are also some specially handled keys. They are:
  27. * - property: An array describing the property to select records for. It
  28. * should at least have either a 'type_name' key (if unique across cvs) or
  29. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  30. * 'value' and 'rank'
  31. * - dbxref: The database cross reference accession. It should be in the form
  32. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  33. * unique publication identifier (e.g. PMID:4382934)
  34. * - dbxref_id: The dbxref.dbxref_id of the publication.
  35. * @param $options
  36. * An array of options. Supported keys include:
  37. * - Any keys supported by chado_generate_var(). See that function definition for
  38. * additional details.
  39. *
  40. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  41. * chado_select_record(). It should fully specify the pub record to be returned.
  42. *
  43. * @return
  44. * If a singe publication is retreived using the identifiers, then a publication
  45. * array will be returned. The array is of the same format returned by the
  46. * chado_generate_var() function. Otherwise, FALSE will be returned.
  47. *
  48. * @ingroup tripal_pub_api
  49. */
  50. function tripal_get_publication($identifiers, $options = array()) {
  51. // Error Checking of parameters
  52. if (!is_array($identifiers)) {
  53. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  54. "chado_get_publication: The identifier passed in is expected to be an array with the key
  55. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  56. array('%identifier'=> print_r($identifiers, TRUE))
  57. );
  58. }
  59. elseif (empty($identifiers)) {
  60. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  61. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  62. is expected to be an array with the key matching a column name in the pub table
  63. (ie: pub_id or name). You passed in %identifier.",
  64. array('%identifier'=> print_r($identifiers, TRUE))
  65. );
  66. }
  67. // If one of the identifiers is property then use chado_get_record_with_property()
  68. if (array_key_exists('property', $identifiers)) {
  69. $property = $identifiers['property'];
  70. unset($identifiers['property']);
  71. $pub = chado_get_record_with_property(
  72. array('table' => 'pub', 'base_records' => $identifiers),
  73. array('type_name' => $property),
  74. $options
  75. );
  76. }
  77. elseif (array_key_exists('dbxref', $identifiers)) {
  78. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  79. $dbname = $matches[1];
  80. $accession = $matches[2];
  81. // First make sure the dbxref is present.
  82. $values = array(
  83. 'accession' => $accession,
  84. 'db_id' => array(
  85. 'name' => $dbname
  86. ),
  87. );
  88. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  89. if (count($dbxref) == 0) {
  90. return FALSE;
  91. }
  92. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  93. if (count($pub_dbxref) == 0) {
  94. return FALSE;
  95. }
  96. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  97. }
  98. else {
  99. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  100. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  101. array('%identifier'=> print_r($identifiers, TRUE))
  102. );
  103. }
  104. }
  105. elseif (array_key_exists('dbxref_id', $identifiers)) {
  106. // first get the pub_dbxref record
  107. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  108. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  109. // now get the pub
  110. if (count($pub_dbxref) > 0) {
  111. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  112. }
  113. else {
  114. return FALSE;
  115. }
  116. }
  117. // Else we have a simple case and we can just use chado_generate_var to get the pub
  118. else {
  119. // Try to get the pub
  120. $pub = chado_generate_var('pub', $identifiers, $options);
  121. }
  122. // Ensure the pub is singular. If it's an array then it is not singular
  123. if (is_array($pub)) {
  124. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  125. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  126. array('%identifier'=> print_r($identifiers, TRUE))
  127. );
  128. }
  129. // Report an error if $pub is FALSE since then chado_generate_var has failed
  130. elseif ($pub === FALSE) {
  131. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  132. "chado_get_publication: Could not find a publication using the identifiers
  133. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  134. array('%identifier'=> print_r($identifiers, TRUE))
  135. );
  136. }
  137. // Else, as far we know, everything is fine so give them their pub :)
  138. else {
  139. return $pub;
  140. }
  141. }
  142. /**
  143. * The publication table of Chado only has a unique constraint for the
  144. * uniquename of the publiation, but in reality a publication can be considered
  145. * unique by a combination of the title, publication type, published year and
  146. * series name (e.g. journal name or conference name). The site administrator
  147. * can configure how publications are determined to be unique. This function
  148. * uses the configuration specified by the administrator to look for publications
  149. * that match the details specified by the $pub_details argument
  150. * and indicates if one ore more publications match the criteria.
  151. *
  152. * @param $pub_details
  153. * An associative array with details about the publications. The expected keys
  154. * are:
  155. * 'Title': The title of the publication
  156. * 'Year': The published year of the publication
  157. * 'Publication Type': An array of publication types. A publication can have more than one type.
  158. * 'Series Name': The series name of the publication
  159. * 'Journal Name': An alternative to 'Series Name'
  160. * 'Conference Name': An alternative to 'Series Name'
  161. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  162. * If this key is present it will also be checked
  163. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  164. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  165. *
  166. * @return
  167. * An array containing the pub_id's of matching publications. Returns an
  168. * empty array if no pubs match
  169. *
  170. * @ingroup tripal_pub_api
  171. */
  172. function tripal_publication_exists($pub_details) {
  173. // first try to find the publication using the accession number if that key exists in the details array
  174. if (array_key_exists('Publication Dbxref', $pub_details)) {
  175. $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  176. if($pub) {
  177. return array($pub->pub_id);
  178. }
  179. }
  180. // make sure the citation is unique
  181. if (array_key_exists('Citation', $pub_details)) {
  182. $pub = tripal_get_publication(array('uniquename' => $pub_details['Citation']));
  183. if($pub) {
  184. return array($pub->pub_id);
  185. }
  186. }
  187. // get the publication type (use the first publication type)
  188. if (array_key_exists('Publication Type', $pub_details)) {
  189. $type_name = '';
  190. if(is_array($pub_details['Publication Type'])) {
  191. $type_name = $pub_details['Publication Type'][0];
  192. }
  193. else {
  194. $type_name = $pub_details['Publication Type'];
  195. }
  196. $identifiers = array(
  197. 'name' => $type_name,
  198. 'cv_id' => array(
  199. 'name' => 'tripal_pub',
  200. ),
  201. );
  202. $pub_type = tripal_get_cvterm($identifiers);
  203. }
  204. else {
  205. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  206. "tripal_publication_exists(): The Publication Type is a " .
  207. "required property but is missing", array());
  208. return array();
  209. }
  210. if (!$pub_type) {
  211. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  212. "tripal_publication_exists(): Cannot find publication type: '%type'",
  213. array('%type' => $pub_details['Publication Type'][0]));
  214. return array();
  215. }
  216. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  217. $series_name = '';
  218. if (array_key_exists('Series Name', $pub_details)) {
  219. $series_name = substr($pub_details['Series Name'], 0, 255);
  220. }
  221. if (array_key_exists('Journal Name', $pub_details)) {
  222. $series_name = substr($pub_details['Journal Name'], 0, 255);
  223. }
  224. if (array_key_exists('Conference Name', $pub_details)) {
  225. $series_name = substr($pub_details['Conference Name'], 0, 255);
  226. }
  227. // make sure the publication is unique using the prefereed import duplication check
  228. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  229. $pubs = array();
  230. switch ($import_dups_check) {
  231. case 'title_year':
  232. $identifiers = array(
  233. 'title' => $pub_details['Title'],
  234. 'pyear' => $pub_details['Year']
  235. );
  236. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  237. break;
  238. case 'title_year_type':
  239. $identifiers = array(
  240. 'title' => $pub_details['Title'],
  241. 'pyear' => $pub_details['Year'],
  242. 'type_id' => $pub_type->cvterm_id,
  243. );
  244. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  245. break;
  246. case 'title_year_media':
  247. $identifiers = array(
  248. 'title' => $pub_details['Title'],
  249. 'pyear' => $pub_details['Year'],
  250. 'series_name' => $series_name,
  251. );
  252. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  253. break;
  254. }
  255. $return = array();
  256. foreach ($pubs as $pub) {
  257. $return[] = $pub->pub_id;
  258. }
  259. return $return;
  260. }
  261. /**
  262. * Used for autocomplete in forms for identifying for publications.
  263. *
  264. * @param $field
  265. * The field in the publication to search on.
  266. * @param $string
  267. * The string to search for
  268. *
  269. * @return
  270. * A json array of terms that begin with the provided string
  271. *
  272. * @ingroup tripal_chado_api
  273. */
  274. function tripal_autocomplete_pub($field, $string = '') {
  275. $items = array();
  276. $sql = "
  277. SELECT uniquename, title
  278. FROM {pub}
  279. WHERE :field like :str
  280. ORDER by title
  281. LIMIT 25 OFFSET 0
  282. ";
  283. $pubs = chado_query($sql, array(':field' => $field, ':str' => $string . '%'));
  284. foreach ($pubs as $pub) {
  285. $items[$pub->uniquename] = $pub->$field;
  286. }
  287. drupal_json_output($items);
  288. }
  289. /**
  290. * Imports a singe publication specified by a remote database cross reference.
  291. *
  292. * @param $pub_dbxref
  293. * The unique database ID for the record to update. This value must
  294. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  295. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  296. * for the record in the database.
  297. * @param $do_contact
  298. * Set to TRUE if authors should automatically have a contact record added
  299. * to Chado.
  300. * @param $do_update
  301. * If set to TRUE then the publication will be updated if it already exists
  302. * in the database.
  303. *
  304. * @ingroup tripal_pub
  305. */
  306. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
  307. $num_to_retrieve = 1;
  308. $pager_id = 0;
  309. $page = 0;
  310. $num_pubs = 0;
  311. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  312. "If the load fails or is terminated prematurely then the entire set of \n" .
  313. "insertions/updates is rolled back and will not be found in the database\n\n";
  314. $transaction = db_transaction();
  315. try {
  316. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  317. $dbname = $matches[1];
  318. $accession = $matches[2];
  319. $criteria = array(
  320. 'num_criteria' => 1,
  321. 'remote_db' => $dbname,
  322. 'criteria' => array(
  323. '1' => array(
  324. 'search_terms' => "$dbname:$accession",
  325. 'scope' => 'id',
  326. 'operation' => '',
  327. 'is_phrase' => 0,
  328. ),
  329. ),
  330. );
  331. $remote_db = $criteria['remote_db'];
  332. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  333. $pubs = $results['pubs'];
  334. $search_str = $results['search_str'];
  335. $total_records = $results['total_records'];
  336. $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
  337. }
  338. // For backwards compatibility check to see if the legacy pub module
  339. // is enabled. If so, then sync the nodes.
  340. if (module_exists('tripal_pub')) {
  341. // sync the newly added publications with Drupal
  342. print "Syncing publications with Drupal...\n";
  343. chado_node_sync_records('pub');
  344. // if any of the importers wanted to create contacts from the authors then sync them
  345. if($do_contact) {
  346. print "Syncing contacts with Drupal...\n";
  347. chado_node_sync_records('contact');
  348. }
  349. }
  350. }
  351. catch (Exception $e) {
  352. $transaction->rollback();
  353. print "\n"; // make sure we start errors on new line
  354. watchdog_exception('T_pub_import', $e);
  355. print "FAILED: Rolling back database changes...\n";
  356. return;
  357. }
  358. print "Done.\n";
  359. }
  360. /**
  361. * Imports all publications for all active import setups.
  362. *
  363. * @param $report_email
  364. * A list of email address, separated by commas, that should be notified
  365. * once importing has completed
  366. * @param $do_update
  367. * If set to TRUE then publications that already exist in the Chado database
  368. * will be updated, whereas if FALSE only new publications will be added
  369. *
  370. * @ingroup tripal_pub
  371. */
  372. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  373. $num_to_retrieve = 100;
  374. $page = 0;
  375. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  376. "If the load fails or is terminated prematurely then the entire set of \n" .
  377. "insertions/updates is rolled back and will not be found in the database\n\n";
  378. // start the transaction
  379. $transaction = db_transaction();
  380. try {
  381. // get all of the loaders
  382. $args = array();
  383. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  384. $results = db_query($sql, $args);
  385. $do_contact = FALSE;
  386. $reports = array();
  387. foreach ($results as $import) {
  388. $page = 0;
  389. print "Executing importer: '" . $import->name . "'\n";
  390. // keep track if any of the importers want to create contacts from authors
  391. if ($import->do_contact == 1) {
  392. $do_contact = TRUE;
  393. }
  394. $criteria = unserialize($import->criteria);
  395. $remote_db = $criteria['remote_db'];
  396. do {
  397. // retrieve the pubs for this page. We'll retreive 100 at a time
  398. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  399. $pubs = $results['pubs'];
  400. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  401. $page++;
  402. }
  403. // continue looping until we have a $pubs array that does not have
  404. // our requested numer of records. This means we've hit the end
  405. while (count($pubs) == $num_to_retrieve);
  406. }
  407. // sync the newly added publications with Drupal. If the user
  408. // requested a report then we don't want to print any syncing information
  409. // so pass 'FALSE' to the sync call
  410. // For backwards compatibility check to see if the legacy pub module
  411. // is enabled. If so, then sync the nodes.
  412. if (module_exists('tripal_pub')) {
  413. print "Syncing publications with Drupal...\n";
  414. chado_node_sync_records('pub');
  415. }
  416. // iterate through each of the reports and generate a final report with HTML links
  417. $HTML_report = '';
  418. if ($report_email) {
  419. $HTML_report .= "<html>";
  420. global $base_url;
  421. foreach ($reports as $importer => $report) {
  422. $total = count($report['inserted']);
  423. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  424. foreach ($report['inserted'] as $pub) {
  425. $item = $pub['Title'];
  426. if (array_key_exists('pub_id', $pub)) {
  427. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  428. }
  429. $HTML_report .= "<li>$item</li>\n";
  430. }
  431. $HTML_report .= "</ol>\n";
  432. }
  433. $HTML_report .= "</html>";
  434. $site_email = variable_get('site_mail', '');
  435. $params = array(
  436. 'message' => $HTML_report
  437. );
  438. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  439. }
  440. // For backwards compatibility check to see if the legacy pub module
  441. // is enabled. If so, then sync the nodes.
  442. if (module_exists('tripal_pub')) {
  443. // if any of the importers wanted to create contacts from the authors then sync them
  444. if($do_contact) {
  445. print "Syncing contacts with Drupal...\n";
  446. chado_node_sync_records('contact');
  447. }
  448. }
  449. }
  450. catch (Exception $e) {
  451. $transaction->rollback();
  452. print "\n"; // make sure we start errors on new line
  453. watchdog_exception('T_pub_import', $e);
  454. print "FAILED: Rolling back database changes...\n";
  455. return;
  456. }
  457. print "Done.\n";
  458. }
  459. /**
  460. * Updates publication records.
  461. *
  462. * Updates publication records that currently exist in the Chado pub table
  463. * with the most recent data in the remote database.
  464. *
  465. * @param $do_contact
  466. * Set to TRUE if authors should automatically have a contact record added
  467. * to Chado. Contacts are added using the name provided by the remote
  468. * database.
  469. * @param $dbxref
  470. * The unique database ID for the record to update. This value must
  471. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  472. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  473. * for the record in the database.
  474. * @param $db
  475. * The name of the remote database to update. If this value is provided and
  476. * no dbxref then all of the publications currently in the Chado database
  477. * for this remote database will be updated.
  478. *
  479. * @ingroup tripal_pub
  480. */
  481. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  482. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  483. "If the load fails or is terminated prematurely then the entire set of \n" .
  484. "insertions/updates is rolled back and will not be found in the database\n\n";
  485. $transaction = db_transaction();
  486. try {
  487. // get a list of all publications by their Dbxrefs that have supported databases
  488. $sql = "
  489. SELECT DB.name as db_name, DBX.accession
  490. FROM pub P
  491. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  492. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  493. INNER JOIN db DB ON DB.db_id = DBX.db_id
  494. ";
  495. $args = array();
  496. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  497. $dbname = $matches[1];
  498. $accession = $matches[2];
  499. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  500. $args[':accession'] = $accession;
  501. $args[':dbname'] = $dbname;
  502. }
  503. elseif ($db) {
  504. $sql .= " WHERE DB.name = :dbname ";
  505. $args[':dbname'] = $db;
  506. }
  507. $sql .= "ORDER BY DB.name, P.pub_id";
  508. $results = chado_query($sql, $args);
  509. $num_to_retrieve = 100;
  510. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  511. $curr_db = ''; // keeps track of the current current database
  512. $ids = array(); // the list of IDs for the database
  513. $search = array(); // the search array passed to the search function
  514. // iterate through the pub IDs
  515. while ($pub = $results->fetchObject()) {
  516. $accession = $pub->accession;
  517. $remote_db = $pub->db_name;
  518. // here we need to only update publications for databases we support
  519. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  520. if(!in_array($remote_db, $supported_dbs)) {
  521. continue;
  522. }
  523. $search = array(
  524. 'num_criteria' => 1,
  525. 'remote_db' => $remote_db,
  526. 'criteria' => array(
  527. '1' => array(
  528. 'search_terms' => "$remote_db:$accession",
  529. 'scope' => 'id',
  530. 'operation' => '',
  531. 'is_phrase' => 0,
  532. ),
  533. ),
  534. );
  535. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  536. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  537. $i++;
  538. }
  539. // For backwards compatibility check to see if the legacy pub module
  540. // is enabled. If so, then sync the nodes.
  541. if (module_exists('tripal_pub')) {
  542. // sync the newly added publications with Drupal
  543. print "Syncing publications with Drupal...\n";
  544. chado_node_sync_records('pub');
  545. // if the caller wants to create contacts then we should sync them
  546. if ($do_contact) {
  547. print "Syncing contacts with Drupal...\n";
  548. chado_node_sync_records('contact');
  549. }
  550. }
  551. }
  552. catch (Exception $e) {
  553. $transaction->rollback();
  554. print "\n"; // make sure we start errors on new line
  555. watchdog_exception('T_pub_import', $e);
  556. print "FAILED: Rolling back database changes...\n";
  557. return;
  558. }
  559. print "Done.\n";
  560. }