tripal_chado.pub.api.inc 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Chado Publication
  8. * @ingroup tripal_chado_api
  9. * @{
  10. * @}
  11. */
  12. /**
  13. * Retrieves a chado publication array.
  14. *
  15. * @param $identifier
  16. * An array used to uniquely identify a publication. This array has the same
  17. * format as that used by the chado_generate_var(). The following keys can be
  18. * useful for uniquely identifying a publication as they should be unique:
  19. * - pub_id: the chado pub.pub_id primary key.
  20. * - nid: the drupal nid of the publication.
  21. * - uniquename: A value to matach with the pub.uniquename field.
  22. * There are also some specially handled keys. They are:
  23. * - property: An array describing the property to select records for. It
  24. * should at least have either a 'type_name' key (if unique across cvs) or
  25. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name'
  26. * (of the type), 'value' and 'rank'
  27. * - dbxref: The database cross reference accession. It should be in the
  28. * form DB:ACCESSION, where DB is the database name and ACCESSION is the
  29. * unique publication identifier (e.g. PMID:4382934)
  30. * - dbxref_id: The dbxref.dbxref_id of the publication.
  31. * @param $options
  32. * An array of options. Supported keys include:
  33. * - Any keys supported by chado_generate_var(). See that function
  34. * definition for additional details.
  35. *
  36. * NOTE: the $identifier parameter can really be any array similar to $values
  37. * passed into chado_select_record(). It should fully specify the pub record to
  38. * be returned.
  39. *
  40. * @return
  41. * If a singe publication is retreived using the identifiers, then a
  42. * publication array will be returned. The array is of the same format
  43. * returned by the chado_generate_var() function. Otherwise, FALSE will be
  44. * returned.
  45. *
  46. * @ingroup tripal_pub_api
  47. */
  48. function chado_get_publication($identifiers, $options = array()) {
  49. // Error Checking of parameters
  50. if (!is_array($identifiers)) {
  51. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  52. "chado_get_publication: The identifier passed in is expected to be an array with the key
  53. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  54. array('%identifier'=> print_r($identifiers, TRUE))
  55. );
  56. }
  57. elseif (empty($identifiers)) {
  58. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  59. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  60. is expected to be an array with the key matching a column name in the pub table
  61. (ie: pub_id or name). You passed in %identifier.",
  62. array('%identifier'=> print_r($identifiers, TRUE))
  63. );
  64. }
  65. // If one of the identifiers is property then use
  66. // chado_get_record_with_property().
  67. if (array_key_exists('property', $identifiers)) {
  68. $property = $identifiers['property'];
  69. unset($identifiers['property']);
  70. $pub = chado_get_record_with_property(
  71. array('table' => 'pub', 'base_records' => $identifiers),
  72. array('type_name' => $property),
  73. $options
  74. );
  75. }
  76. elseif (array_key_exists('dbxref', $identifiers)) {
  77. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  78. $dbname = $matches[1];
  79. $accession = $matches[2];
  80. // First make sure the dbxref is present.
  81. $values = array(
  82. 'accession' => $accession,
  83. 'db_id' => array(
  84. 'name' => $dbname
  85. ),
  86. );
  87. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  88. if (count($dbxref) == 0) {
  89. return FALSE;
  90. }
  91. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  92. if (count($pub_dbxref) == 0) {
  93. return FALSE;
  94. }
  95. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  96. }
  97. else {
  98. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  99. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  100. array('%identifier'=> print_r($identifiers, TRUE))
  101. );
  102. }
  103. }
  104. elseif (array_key_exists('dbxref_id', $identifiers)) {
  105. // First get the pub_dbxref record.
  106. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  107. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  108. // Now get the pub.
  109. if (count($pub_dbxref) > 0) {
  110. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  111. }
  112. else {
  113. return FALSE;
  114. }
  115. }
  116. // Else we have a simple case and we can just use chado_generate_var to get
  117. // the pub.
  118. else {
  119. // Try to get the pub.
  120. $pub = chado_generate_var('pub', $identifiers, $options);
  121. }
  122. // Ensure the pub is singular. If it's an array then it is not singular.
  123. if (is_array($pub)) {
  124. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  125. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  126. array('%identifier'=> print_r($identifiers, TRUE))
  127. );
  128. }
  129. // Report an error if $pub is FALSE since then chado_generate_var has failed.
  130. elseif ($pub === FALSE) {
  131. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  132. "chado_get_publication: Could not find a publication using the identifiers
  133. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  134. array('%identifier'=> print_r($identifiers, TRUE))
  135. );
  136. }
  137. // Else, as far we know, everything is fine so give them their pub :)
  138. else {
  139. return $pub;
  140. }
  141. }
  142. /**
  143. * The publication table of Chado only has a unique constraint for the
  144. * uniquename of the publiation, but in reality a publication can be considered
  145. * unique by a combination of the title, publication type, published year and
  146. * series name (e.g. journal name or conference name). The site administrator
  147. * can configure how publications are determined to be unique. This function
  148. * uses the configuration specified by the administrator to look for publications
  149. * that match the details specified by the $pub_details argument
  150. * and indicates if one ore more publications match the criteria.
  151. *
  152. * @param $pub_details
  153. * An associative array with details about the publications. The expected keys
  154. * are:
  155. * 'Title': The title of the publication.
  156. * 'Year': The published year of the publication.
  157. * 'Publication Type': An array of publication types. A publication can
  158. * have more than one type.
  159. * 'Series Name': The series name of the publication.
  160. * 'Journal Name': An alternative to 'Series Name'.
  161. * 'Conference Name': An alternative to 'Series Name'.
  162. * 'Citation': The publication citation (this is the value saved
  163. * in the pub.uniquename field and must be unique).
  164. *
  165. * If this key is present it will also be checked
  166. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION
  167. * where DB is the name of the database and ACCESSION
  168. * is the unique identifier (e.g PMID:3483139).
  169. *
  170. * @return
  171. * An array containing the pub_id's of matching publications. Returns an
  172. * empty array if no pubs match.
  173. *
  174. * @ingroup tripal_pub_api
  175. */
  176. function chado_publication_exists($pub_details) {
  177. // First try to find the publication using the accession number if that key
  178. // exists in the details array.
  179. if (array_key_exists('Publication Dbxref', $pub_details)) {
  180. $pub = chado_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  181. if($pub) {
  182. return array($pub->pub_id);
  183. }
  184. }
  185. // Make sure the citation is unique.
  186. if (array_key_exists('Citation', $pub_details)) {
  187. $pub = chado_get_publication(array('uniquename' => $pub_details['Citation']));
  188. if($pub) {
  189. return array($pub->pub_id);
  190. }
  191. }
  192. // Get the publication type (use the first publication type).
  193. if (array_key_exists('Publication Type', $pub_details)) {
  194. $type_name = '';
  195. if(is_array($pub_details['Publication Type'])) {
  196. $type_name = $pub_details['Publication Type'][0];
  197. }
  198. else {
  199. $type_name = $pub_details['Publication Type'];
  200. }
  201. $identifiers = array(
  202. 'name' => $type_name,
  203. 'cv_id' => array(
  204. 'name' => 'tripal_pub',
  205. ),
  206. );
  207. $pub_type = chado_get_cvterm($identifiers);
  208. }
  209. else {
  210. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  211. "chado_publication_exists(): The Publication Type is a " .
  212. "required property but is missing", array());
  213. return array();
  214. }
  215. if (!$pub_type) {
  216. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  217. "chado_publication_exists(): Cannot find publication type: '%type'",
  218. array('%type' => $pub_details['Publication Type'][0]));
  219. return array();
  220. }
  221. // Get the series name. The pub.series_name field is only 255 chars so we
  222. // must truncate to be safe.
  223. $series_name = '';
  224. if (array_key_exists('Series Name', $pub_details)) {
  225. $series_name = substr($pub_details['Series Name'], 0, 255);
  226. }
  227. if (array_key_exists('Journal Name', $pub_details)) {
  228. $series_name = substr($pub_details['Journal Name'], 0, 255);
  229. }
  230. if (array_key_exists('Conference Name', $pub_details)) {
  231. $series_name = substr($pub_details['Conference Name'], 0, 255);
  232. }
  233. // Make sure the publication is unique using the prefereed import
  234. // duplication check.
  235. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  236. $pubs = array();
  237. switch ($import_dups_check) {
  238. case 'title_year':
  239. $identifiers = array(
  240. 'title' => $pub_details['Title'],
  241. 'pyear' => $pub_details['Year']
  242. );
  243. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  244. break;
  245. case 'title_year_type':
  246. $identifiers = array(
  247. 'title' => $pub_details['Title'],
  248. 'pyear' => $pub_details['Year'],
  249. 'type_id' => $pub_type->cvterm_id,
  250. );
  251. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  252. break;
  253. case 'title_year_media':
  254. $identifiers = array(
  255. 'title' => $pub_details['Title'],
  256. 'pyear' => $pub_details['Year'],
  257. 'series_name' => $series_name,
  258. );
  259. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  260. break;
  261. }
  262. $return = array();
  263. foreach ($pubs as $pub) {
  264. $return[] = $pub->pub_id;
  265. }
  266. return $return;
  267. }
  268. /**
  269. * Used for autocomplete in forms for identifying for publications.
  270. *
  271. * @param $field
  272. * The field in the publication to search on.
  273. * @param $string
  274. * The string to search for.
  275. *
  276. * @return
  277. * A json array of terms that begin with the provided string.
  278. *
  279. * @ingroup tripal_pub_api
  280. */
  281. function chado_autocomplete_pub($string = '') {
  282. $items = array();
  283. $sql = "
  284. SELECT pub_id, title, uniquename
  285. FROM {pub}
  286. WHERE lower(title) like lower(:str)
  287. ORDER by title
  288. LIMIT 25 OFFSET 0
  289. ";
  290. $pubs = chado_query($sql, array(':str' => $string . '%'));
  291. while ($pub = $pubs->fetchObject()) {
  292. $val = $pub->title . " [id:" . $pub->pub_id . "]";
  293. $items[$val] = $pub->title;
  294. }
  295. drupal_json_output($items);
  296. }
  297. /**
  298. * Imports a singe publication specified by a remote database cross reference.
  299. *
  300. * @param $pub_dbxref
  301. * The unique database ID for the record to update. This value must
  302. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  303. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  304. * for the record in the database.
  305. * @param $do_contact
  306. * Set to TRUE if authors should automatically have a contact record added
  307. * to Chado.
  308. * @param $do_update
  309. * If set to TRUE then the publication will be updated if it already exists
  310. * in the database.
  311. *
  312. * @ingroup tripal_pub_api
  313. */
  314. function chado_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update = TRUE) {
  315. $num_to_retrieve = 1;
  316. $pager_id = 0;
  317. $page = 0;
  318. $num_pubs = 0;
  319. $pub_id = NULL;
  320. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.pub_importers');
  321. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  322. "If the load fails or is terminated prematurely then the entire set of \n" .
  323. "insertions/updates is rolled back and will not be found in the database\n\n";
  324. $transaction = db_transaction();
  325. try {
  326. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  327. $dbname = $matches[1];
  328. $accession = $matches[2];
  329. $criteria = array(
  330. 'num_criteria' => 1,
  331. 'remote_db' => $dbname,
  332. 'criteria' => array(
  333. '1' => array(
  334. 'search_terms' => "$dbname:$accession",
  335. 'scope' => 'id',
  336. 'operation' => '',
  337. 'is_phrase' => 0,
  338. ),
  339. ),
  340. );
  341. $remote_db = $criteria['remote_db'];
  342. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  343. $pubs = $results['pubs'];
  344. $search_str = $results['search_str'];
  345. $total_records = $results['total_records'];
  346. tripal_pub_add_publications($pubs, $do_contact, $do_update);
  347. }
  348. // For backwards compatibility check to see if the legacy pub module
  349. // is enabled. If so, then sync the nodes.
  350. if (module_exists('tripal_pub')) {
  351. // Sync the newly added publications with Drupal.
  352. print "Syncing publications with Drupal...\n";
  353. chado_node_sync_records('pub');
  354. // If any of the importers wanted to create contacts from the authors
  355. // then sync them.
  356. if($do_contact) {
  357. print "Syncing contacts with Drupal...\n";
  358. chado_node_sync_records('contact');
  359. }
  360. }
  361. }
  362. catch (Exception $e) {
  363. $transaction->rollback();
  364. print "\n"; // make sure we start errors on new line
  365. watchdog_exception('T_pub_import', $e);
  366. print "FAILED: Rolling back database changes...\n";
  367. return;
  368. }
  369. }
  370. /**
  371. * Imports all publications for all active import setups.
  372. *
  373. * @param $report_email
  374. * A list of email address, separated by commas, that should be notified
  375. * once importing has completed.
  376. * @param $do_update
  377. * If set to TRUE then publications that already exist in the Chado database
  378. * will be updated, whereas if FALSE only new publications will be added.
  379. *
  380. * @ingroup tripal_pub_api
  381. */
  382. function chado_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  383. $num_to_retrieve = 100;
  384. $page = 0;
  385. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  386. "If the load fails or is terminated prematurely then the entire set of \n" .
  387. "insertions/updates is rolled back and will not be found in the database\n\n";
  388. // start the transaction
  389. $transaction = db_transaction();
  390. try {
  391. // Get all of the loaders.
  392. $args = array();
  393. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  394. $results = db_query($sql, $args);
  395. $do_contact = FALSE;
  396. $reports = array();
  397. foreach ($results as $import) {
  398. $page = 0;
  399. print "Executing importer: '" . $import->name . "'\n";
  400. // Keep track if any of the importers want to create contacts from authors.
  401. if ($import->do_contact == 1) {
  402. $do_contact = TRUE;
  403. }
  404. $criteria = unserialize($import->criteria);
  405. $remote_db = $criteria['remote_db'];
  406. do {
  407. // Retrieve the pubs for this page. We'll retreive 100 at a time.
  408. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  409. $pubs = $results['pubs'];
  410. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  411. $page++;
  412. }
  413. // Continue looping until we have a $pubs array that does not have
  414. // our requested numer of records. This means we've hit the end.
  415. while (count($pubs) == $num_to_retrieve);
  416. }
  417. // Sync the newly added publications with Drupal. If the user
  418. // requested a report then we don't want to print any syncing information
  419. // so pass 'FALSE' to the sync call.
  420. // For backwards compatibility check to see if the legacy pub module
  421. // is enabled. If so, then sync the nodes.
  422. if (module_exists('tripal_pub')) {
  423. print "Syncing publications with Drupal...\n";
  424. chado_node_sync_records('pub');
  425. }
  426. // Iterate through each of the reports and generate a final report with HTML
  427. // links.
  428. $HTML_report = '';
  429. if ($report_email) {
  430. $HTML_report .= "<html>";
  431. global $base_url;
  432. foreach ($reports as $importer => $report) {
  433. $total = count($report['inserted']);
  434. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  435. foreach ($report['inserted'] as $pub) {
  436. $item = $pub['Title'];
  437. if (array_key_exists('pub_id', $pub)) {
  438. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  439. }
  440. $HTML_report .= "<li>$item</li>\n";
  441. }
  442. $HTML_report .= "</ol>\n";
  443. }
  444. $HTML_report .= "</html>";
  445. $site_email = variable_get('site_mail', '');
  446. $params = array(
  447. 'message' => $HTML_report
  448. );
  449. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  450. }
  451. // For backwards compatibility check to see if the legacy pub module
  452. // is enabled. If so, then sync the nodes.
  453. if (module_exists('tripal_pub')) {
  454. // If any of the importers wanted to create contacts from the authors then
  455. // sync them.
  456. if($do_contact) {
  457. print "Syncing contacts with Drupal...\n";
  458. chado_node_sync_records('contact');
  459. }
  460. }
  461. }
  462. catch (Exception $e) {
  463. $transaction->rollback();
  464. print "\n"; // make sure we start errors on new line
  465. watchdog_exception('T_pub_import', $e);
  466. print "FAILED: Rolling back database changes...\n";
  467. return;
  468. }
  469. print "Done.\n";
  470. }
  471. /**
  472. * Updates publication records.
  473. *
  474. * Updates publication records that currently exist in the Chado pub table
  475. * with the most recent data in the remote database.
  476. *
  477. * @param $do_contact
  478. * Set to TRUE if authors should automatically have a contact record added
  479. * to Chado. Contacts are added using the name provided by the remote
  480. * database.
  481. * @param $dbxref
  482. * The unique database ID for the record to update. This value must
  483. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  484. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  485. * for the record in the database.
  486. * @param $db
  487. * The name of the remote database to update. If this value is provided and
  488. * no dbxref then all of the publications currently in the Chado database
  489. * for this remote database will be updated.
  490. *
  491. * @ingroup tripal_pub_api
  492. */
  493. function chado_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  494. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  495. "If the load fails or is terminated prematurely then the entire set of \n" .
  496. "insertions/updates is rolled back and will not be found in the database\n\n";
  497. $transaction = db_transaction();
  498. try {
  499. // Get a list of all publications by their Dbxrefs that have supported
  500. // databases.
  501. $sql = "
  502. SELECT DB.name as db_name, DBX.accession
  503. FROM pub P
  504. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  505. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  506. INNER JOIN db DB ON DB.db_id = DBX.db_id
  507. ";
  508. $args = array();
  509. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  510. $dbname = $matches[1];
  511. $accession = $matches[2];
  512. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  513. $args[':accession'] = $accession;
  514. $args[':dbname'] = $dbname;
  515. }
  516. elseif ($db) {
  517. $sql .= " WHERE DB.name = :dbname ";
  518. $args[':dbname'] = $db;
  519. }
  520. $sql .= "ORDER BY DB.name, P.pub_id";
  521. $results = chado_query($sql, $args);
  522. $num_to_retrieve = 100;
  523. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query.
  524. $curr_db = ''; // keeps track of the current current database.
  525. $ids = array(); // the list of IDs for the database.
  526. $search = array(); // the search array passed to the search function.
  527. // Iterate through the pub IDs.
  528. while ($pub = $results->fetchObject()) {
  529. $accession = $pub->accession;
  530. $remote_db = $pub->db_name;
  531. // Here we need to only update publications for databases we support.
  532. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  533. if(!in_array($remote_db, $supported_dbs)) {
  534. continue;
  535. }
  536. $search = array(
  537. 'num_criteria' => 1,
  538. 'remote_db' => $remote_db,
  539. 'criteria' => array(
  540. '1' => array(
  541. 'search_terms' => "$remote_db:$accession",
  542. 'scope' => 'id',
  543. 'operation' => '',
  544. 'is_phrase' => 0,
  545. ),
  546. ),
  547. );
  548. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  549. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  550. $i++;
  551. }
  552. // For backwards compatibility check to see if the legacy pub module
  553. // is enabled. If so, then sync the nodes.
  554. if (module_exists('tripal_pub')) {
  555. // Sync the newly added publications with Drupal.
  556. print "Syncing publications with Drupal...\n";
  557. chado_node_sync_records('pub');
  558. // If the caller wants to create contacts then we should sync them.
  559. if ($do_contact) {
  560. print "Syncing contacts with Drupal...\n";
  561. chado_node_sync_records('contact');
  562. }
  563. }
  564. }
  565. catch (Exception $e) {
  566. $transaction->rollback();
  567. print "\n"; // make sure we start errors on new line
  568. watchdog_exception('T_pub_import', $e);
  569. print "FAILED: Rolling back database changes...\n";
  570. return;
  571. }
  572. print "Done.\n";
  573. }
  574. /**
  575. * Launch the Tripal job to generate citations.
  576. *
  577. * This function will recreate citations for all publications currently
  578. * loaded into Tripal. This is useful to create a consistent format for
  579. * all citations.
  580. *
  581. * @param $options
  582. * Options pertaining to what publications to generate citations for.
  583. * One of the following must be present:
  584. * - all: Create and replace citation for all pubs.
  585. * - new: Create citation for pubs that don't already have one.
  586. *
  587. * @ingroup tripal_pub_api
  588. */
  589. function chado_pub_create_citations($options) {
  590. $skip_existing = TRUE;
  591. $sql = "
  592. SELECT cvterm_id
  593. FROM {cvterm}
  594. WHERE
  595. name = 'Citation' AND
  596. cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal_pub')
  597. ";
  598. $citation_type_id = chado_query($sql)->fetchField();
  599. // Create and replace citation for all pubs.
  600. if ($options == 'all') {
  601. $sql = "SELECT pub_id FROM {pub} P WHERE pub_id <> 1";
  602. $skip_existing = FALSE;
  603. }
  604. // Create citation for pubs that don't already have one.
  605. else if ($options == 'new') {
  606. $sql = "
  607. SELECT pub_id
  608. FROM {pub} P
  609. WHERE
  610. (SELECT value
  611. FROM {pubprop} PB
  612. WHERE type_id = :type_id AND P.pub_id = PB.pub_id AND rank = 0) IS NULL
  613. AND pub_id <> 1
  614. ";
  615. $skip_existing = TRUE;
  616. }
  617. $result = chado_query($sql, array(':type_id' => $citation_type_id));
  618. $counter_updated = 0;
  619. $counter_generated = 0;
  620. while ($pub = $result->fetchObject()) {
  621. $pub_arr = tripal_pub_get_publication_array($pub->pub_id, $skip_existing);
  622. if ($pub_arr) {
  623. $citation = chado_pub_create_citation($pub_arr);
  624. print $citation . "\n\n";
  625. // Replace if citation exists. This condition is never TRUE if
  626. // $skip_existing is TRUE.
  627. if ($pub_arr['Citation']) {
  628. $sql = "
  629. UPDATE {pubprop} SET value = :value
  630. WHERE pub_id = :pub_id AND type_id = :type_id AND rank = :rank
  631. ";
  632. chado_query($sql, array(':value' => $citation, ':pub_id' => $pub->pub_id,
  633. ':type_id' => $citation_type_id, ':rank' => 0));
  634. $counter_updated ++;
  635. // Generate a new citation.
  636. } else {
  637. $sql = "
  638. INSERT INTO {pubprop} (pub_id, type_id, value, rank)
  639. VALUES (:pub_id, :type_id, :value, :rank)
  640. ";
  641. chado_query($sql, array(':pub_id' => $pub->pub_id, ':type_id' => $citation_type_id,
  642. ':value' => $citation, ':rank' => 0));
  643. $counter_generated ++;
  644. }
  645. }
  646. }
  647. print "$counter_generated citations generated. $counter_updated citations updated.\n";
  648. }
  649. /**
  650. * This function generates citations for publications. It requires
  651. * an array structure with keys being the terms in the Tripal
  652. * publication ontology. This function is intended to be used
  653. * for any function that needs to generate a citation.
  654. *
  655. * @param $pub
  656. * An array structure containing publication details where the keys
  657. * are the publication ontology term names and values are the
  658. * corresponding details. The pub array can contain the following
  659. * keys with corresponding values:
  660. * - Publication Type: an array of publication types. a publication can
  661. * have more than one type.
  662. * - Authors: a string containing all of the authors of a publication.
  663. * - Journal Name: a string containing the journal name.
  664. * - Journal Abbreviation: a string containing the journal name abbreviation.
  665. * - Series Name: a string containing the series (e.g. conference
  666. * proceedings) name.
  667. * - Series Abbreviation: a string containing the series name abbreviation
  668. * - Volume: the serives volume number.
  669. * - Issue: the series issue number.
  670. * - Pages: the page numbers for the publication.
  671. * - Publication Date: A date in the format "Year Month Day".
  672. *
  673. * @return
  674. * A text string containing the citation.
  675. *
  676. * @ingroup tripal_pub_api
  677. */
  678. function chado_pub_create_citation($pub) {
  679. $citation = '';
  680. $pub_type = '';
  681. // An article may have more than one publication type. For example,
  682. // a publication type can be 'Journal Article' but also a 'Clinical Trial'.
  683. // Therefore, we need to select the type that makes most sense for
  684. // construction of the citation. Here we'll iterate through them all
  685. // and select the one that matches best.
  686. if (is_array($pub['Publication Type'])) {
  687. foreach ($pub['Publication Type'] as $ptype) {
  688. if ($ptype == 'Journal Article' ) {
  689. $pub_type = $ptype;
  690. break;
  691. }
  692. else if ($ptype == 'Conference Proceedings'){
  693. $pub_type = $ptype;
  694. break;
  695. }
  696. else if ($ptype == 'Review') {
  697. $pub_type = $ptype;
  698. break;
  699. }
  700. else if ($ptype == 'Book') {
  701. $pub_type = $ptype;
  702. break;
  703. }
  704. else if ($ptype == 'Letter') {
  705. $pub_type = $ptype;
  706. break;
  707. }
  708. else if ($ptype == 'Book Chapter') {
  709. $pub_type = $ptype;
  710. break;
  711. }
  712. else if ($ptype == "Research Support, Non-U.S. Gov't") {
  713. $pub_type = $ptype;
  714. // We don't break because if the article is also a Journal Article
  715. // we prefer that type.
  716. }
  717. }
  718. // If we don't have a recognized publication type, then just use the
  719. // first one in the list.
  720. if (!$pub_type) {
  721. $pub_type = $pub['Publication Type'][0];
  722. }
  723. }
  724. else {
  725. $pub_type = $pub['Publication Type'];
  726. }
  727. //----------------------
  728. // Journal Article
  729. //----------------------
  730. if ($pub_type == 'Journal Article') {
  731. if (array_key_exists('Authors', $pub)) {
  732. $citation = $pub['Authors'] . '. ';
  733. }
  734. $citation .= $pub['Title'] . '. ';
  735. if (array_key_exists('Journal Name', $pub)) {
  736. $citation .= $pub['Journal Name'] . '. ';
  737. }
  738. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  739. $citation .= $pub['Journal Abbreviation'] . '. ';
  740. }
  741. elseif (array_key_exists('Series Name', $pub)) {
  742. $citation .= $pub['Series Name'] . '. ';
  743. }
  744. elseif (array_key_exists('Series Abbreviation', $pub)) {
  745. $citation .= $pub['Series Abbreviation'] . '. ';
  746. }
  747. if (array_key_exists('Publication Date', $pub)) {
  748. $citation .= $pub['Publication Date'];
  749. }
  750. elseif (array_key_exists('Year', $pub)) {
  751. $citation .= $pub['Year'];
  752. }
  753. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  754. $citation .= '; ';
  755. }
  756. if (array_key_exists('Volume', $pub)) {
  757. $citation .= $pub['Volume'];
  758. }
  759. if (array_key_exists('Issue', $pub)) {
  760. $citation .= '(' . $pub['Issue'] . ')';
  761. }
  762. if (array_key_exists('Pages', $pub)) {
  763. if (array_key_exists('Volume', $pub)) {
  764. $citation .= ':';
  765. }
  766. $citation .= $pub['Pages'];
  767. }
  768. $citation .= '.';
  769. }
  770. //----------------------
  771. // Review
  772. //----------------------
  773. else if ($pub_type == 'Review') {
  774. if (array_key_exists('Authors', $pub)) {
  775. $citation = $pub['Authors'] . '. ';
  776. }
  777. $citation .= $pub['Title'] . '. ';
  778. if (array_key_exists('Journal Name', $pub)) {
  779. $citation .= $pub['Journal Name'] . '. ';
  780. }
  781. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  782. $citation .= $pub['Journal Abbreviation'] . '. ';
  783. }
  784. elseif (array_key_exists('Series Name', $pub)) {
  785. $citation .= $pub['Series Name'] . '. ';
  786. }
  787. elseif (array_key_exists('Series Abbreviation', $pub)) {
  788. $citation .= $pub['Series Abbreviation'] . '. ';
  789. }
  790. if (array_key_exists('Publication Date', $pub)) {
  791. $citation .= $pub['Publication Date'];
  792. }
  793. elseif (array_key_exists('Year', $pub)) {
  794. $citation .= $pub['Year'];
  795. }
  796. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  797. $citation .= '; ';
  798. }
  799. if (array_key_exists('Volume', $pub)) {
  800. $citation .= $pub['Volume'];
  801. }
  802. if (array_key_exists('Issue', $pub)) {
  803. $citation .= '(' . $pub['Issue'] . ')';
  804. }
  805. if (array_key_exists('Pages', $pub)) {
  806. if (array_key_exists('Volume', $pub)) {
  807. $citation .= ':';
  808. }
  809. $citation .= $pub['Pages'];
  810. }
  811. $citation .= '.';
  812. }
  813. //----------------------
  814. // Research Support, Non-U.S. Gov't
  815. //----------------------
  816. elseif ($pub_type == "Research Support, Non-U.S. Gov't") {
  817. if (array_key_exists('Authors', $pub)) {
  818. $citation = $pub['Authors'] . '. ';
  819. }
  820. $citation .= $pub['Title'] . '. ';
  821. if (array_key_exists('Journal Name', $pub)) {
  822. $citation .= $pub['Journal Name'] . '. ';
  823. }
  824. if (array_key_exists('Publication Date', $pub)) {
  825. $citation .= $pub['Publication Date'];
  826. }
  827. elseif (array_key_exists('Year', $pub)) {
  828. $citation .= $pub['Year'];
  829. }
  830. $citation .= '.';
  831. }
  832. //----------------------
  833. // Letter
  834. //----------------------
  835. elseif ($pub_type == 'Letter') {
  836. if (array_key_exists('Authors', $pub)) {
  837. $citation = $pub['Authors'] . '. ';
  838. }
  839. $citation .= $pub['Title'] . '. ';
  840. if (array_key_exists('Journal Name', $pub)) {
  841. $citation .= $pub['Journal Name'] . '. ';
  842. }
  843. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  844. $citation .= $pub['Journal Abbreviation'] . '. ';
  845. }
  846. elseif (array_key_exists('Series Name', $pub)) {
  847. $citation .= $pub['Series Name'] . '. ';
  848. }
  849. elseif (array_key_exists('Series Abbreviation', $pub)) {
  850. $citation .= $pub['Series Abbreviation'] . '. ';
  851. }
  852. if (array_key_exists('Publication Date', $pub)) {
  853. $citation .= $pub['Publication Date'];
  854. }
  855. elseif (array_key_exists('Year', $pub)) {
  856. $citation .= $pub['Year'];
  857. }
  858. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  859. $citation .= '; ';
  860. }
  861. if (array_key_exists('Volume', $pub)) {
  862. $citation .= $pub['Volume'];
  863. }
  864. if (array_key_exists('Issue', $pub)) {
  865. $citation .= '(' . $pub['Issue'] . ')';
  866. }
  867. if (array_key_exists('Pages', $pub)) {
  868. if (array_key_exists('Volume', $pub)) {
  869. $citation .= ':';
  870. }
  871. $citation .= $pub['Pages'];
  872. }
  873. $citation .= '.';
  874. }
  875. //-----------------------
  876. // Conference Proceedings
  877. //-----------------------
  878. elseif ($pub_type == 'Conference Proceedings') {
  879. if (array_key_exists('Authors', $pub)) {
  880. $citation = $pub['Authors'] . '. ';
  881. }
  882. $citation .= $pub['Title'] . '. ';
  883. if (array_key_exists('Conference Name', $pub)) {
  884. $citation .= $pub['Conference Name'] . '. ';
  885. }
  886. elseif (array_key_exists('Series Name', $pub)) {
  887. $citation .= $pub['Series Name'] . '. ';
  888. }
  889. elseif (array_key_exists('Series Abbreviation', $pub)) {
  890. $citation .= $pub['Series Abbreviation'] . '. ';
  891. }
  892. if (array_key_exists('Publication Date', $pub)) {
  893. $citation .= $pub['Publication Date'];
  894. }
  895. elseif (array_key_exists('Year', $pub)) {
  896. $citation .= $pub['Year'];
  897. }
  898. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  899. $citation .= '; ';
  900. }
  901. if (array_key_exists('Volume', $pub)) {
  902. $citation .= $pub['Volume'];
  903. }
  904. if (array_key_exists('Issue', $pub)) {
  905. $citation .= '(' . $pub['Issue'] . ')';
  906. }
  907. if (array_key_exists('Pages', $pub)) {
  908. if (array_key_exists('Volume', $pub)) {
  909. $citation .= ':';
  910. }
  911. $citation .= $pub['Pages'];
  912. }
  913. $citation .= '.';
  914. }
  915. //-----------------------
  916. // Default
  917. //-----------------------
  918. else {
  919. if (array_key_exists('Authors', $pub)) {
  920. $citation = $pub['Authors'] . '. ';
  921. }
  922. $citation .= $pub['Title'] . '. ';
  923. if (array_key_exists('Series Name', $pub)) {
  924. $citation .= $pub['Series Name'] . '. ';
  925. }
  926. elseif (array_key_exists('Series Abbreviation', $pub)) {
  927. $citation .= $pub['Series Abbreviation'] . '. ';
  928. }
  929. if (array_key_exists('Publication Date', $pub)) {
  930. $citation .= $pub['Publication Date'];
  931. }
  932. elseif (array_key_exists('Year', $pub)) {
  933. $citation .= $pub['Year'];
  934. }
  935. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  936. $citation .= '; ';
  937. }
  938. if (array_key_exists('Volume', $pub)) {
  939. $citation .= $pub['Volume'];
  940. }
  941. if (array_key_exists('Issue', $pub)) {
  942. $citation .= '(' . $pub['Issue'] . ')';
  943. }
  944. if (array_key_exists('Pages', $pub)) {
  945. if (array_key_exists('Volume', $pub)) {
  946. $citation .= ':';
  947. }
  948. $citation .= $pub['Pages'];
  949. }
  950. $citation .= '.';
  951. }
  952. return $citation;
  953. }
  954. /**
  955. * Retrieves the minimal information to uniquely describe any publication.
  956. *
  957. * The returned array is an associative array where the keys are
  958. * the controlled vocabulary terms in the form [vocab]:[accession].
  959. *
  960. * @param $pub
  961. * A publication object as created by chado_generate_var().
  962. *
  963. * @return
  964. * An array with the following keys: 'Citation', 'Abstract', 'Authors',
  965. * 'URL'. All keys are term names in the Tripal Publication Ontology :TPUB.
  966. *
  967. * @ingroup tripal_pub_api
  968. */
  969. function chado_get_minimal_pub_info($pub) {
  970. if (!$pub) {
  971. return array();
  972. }
  973. // Chado has a null pub as default. We don't return anything for this.
  974. if (isset($pub->uniquename) && $pub->uniquename == 'null') {
  975. return array();
  976. }
  977. // Expand the title.
  978. $pub = chado_expand_var($pub, 'field', 'pub.title');
  979. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  980. // Get the abstract.
  981. $values = array(
  982. 'pub_id' => $pub->pub_id,
  983. 'type_id' => array(
  984. 'name' => 'Abstract',
  985. ),
  986. );
  987. $options = array(
  988. 'include_fk' => array(
  989. ),
  990. );
  991. $abstract = chado_generate_var('pubprop', $values, $options);
  992. $abstract = chado_expand_var($abstract, 'field', 'pubprop.value');
  993. $abstract_text = '';
  994. if ($abstract) {
  995. $abstract_text = htmlspecialchars($abstract->value);
  996. }
  997. // Get the author list.
  998. $values = array(
  999. 'pub_id' => $pub->pub_id,
  1000. 'type_id' => array(
  1001. 'name' => 'Authors',
  1002. ),
  1003. );
  1004. $options = array(
  1005. 'include_fk' => array(
  1006. ),
  1007. );
  1008. $authors = chado_generate_var('pubprop', $values, $options);
  1009. $authors = chado_expand_var($authors, 'field', 'pubprop.value');
  1010. $authors_list = 'N/A';
  1011. if ($authors) {
  1012. $authors_list = $authors->value;
  1013. }
  1014. // Get the first database cross-reference with a url.
  1015. $options = array('return_array' => 1);
  1016. $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
  1017. $dbxref = NULL;
  1018. if ($pub->pub_dbxref) {
  1019. foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
  1020. if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
  1021. $dbxref = $pub_dbxref->dbxref_id;
  1022. }
  1023. }
  1024. }
  1025. // Get the URL.
  1026. $values = array(
  1027. 'pub_id' => $pub->pub_id,
  1028. 'type_id' => array(
  1029. 'name' => 'URL',
  1030. ),
  1031. );
  1032. $options = array(
  1033. 'return_array' => 1,
  1034. 'include_fk' => array(),
  1035. );
  1036. $url = '';
  1037. $urls = chado_generate_var('pubprop', $values, $options);
  1038. if ($urls) {
  1039. $urls = chado_expand_var($urls, 'field', 'pubprop.value');
  1040. if (count($urls) > 0) {
  1041. $url = $urls[0]->value;
  1042. }
  1043. }
  1044. // Get the list of database cross references.
  1045. $values = array(
  1046. 'pub_id' => $pub->pub_id,
  1047. );
  1048. $options = array(
  1049. 'return_array' => 1,
  1050. );
  1051. $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
  1052. $dbxrefs = array();
  1053. foreach ($pub_dbxrefs as $pub_dbxref) {
  1054. $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
  1055. }
  1056. // Get the citation.
  1057. $values = array(
  1058. 'pub_id' => $pub->pub_id,
  1059. 'type_id' => array(
  1060. 'name' => 'Citation',
  1061. ),
  1062. );
  1063. $options = array(
  1064. 'include_fk' => array(
  1065. ),
  1066. );
  1067. $citation = chado_generate_var('pubprop', $values, $options);
  1068. if ($citation) {
  1069. $citation = chado_expand_var($citation, 'field', 'pubprop.value');
  1070. $citation = $citation->value;
  1071. }
  1072. else {
  1073. $pub_info = array(
  1074. 'Title' => $pub->title,
  1075. 'Publication Type' => $pub->type_id->name,
  1076. 'Authors' => $authors_list,
  1077. 'Series Name' => $pub->series_name,
  1078. 'Volume' => $pub->volume,
  1079. 'Issue' => $pub->issue,
  1080. 'Pages' => $pub->pages,
  1081. 'Publication Date' => $pub->pyear,
  1082. );
  1083. $citation = chado_pub_create_citation($pub_info);
  1084. }
  1085. return array(
  1086. 'TPUB:0000039' => $pub->title,
  1087. 'TPUB:0000003' => $citation,
  1088. 'TPUB:0000050' => $abstract_text,
  1089. 'TPUB:0000047' => $authors_list,
  1090. 'TPUB:0000052' => $url,
  1091. 'SBO:0000554' => $dbxrefs,
  1092. );
  1093. }