tripal_chado.pub.api.inc 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Chado Publication
  8. * @ingroup tripal_chado_api
  9. * @{
  10. * @}
  11. */
  12. /**
  13. * Retrieves a chado publication array
  14. *
  15. * @param $identifier
  16. * An array used to uniquely identify a publication. This array has the same
  17. * format as that used by the chado_generate_var(). The following keys can be
  18. * useful for uniquely identifying a publication as they should be unique:
  19. * - pub_id: the chado pub.pub_id primary key
  20. * - nid: the drupal nid of the publication
  21. * - uniquename: A value to matach with the pub.uniquename field
  22. * There are also some specially handled keys. They are:
  23. * - property: An array describing the property to select records for. It
  24. * should at least have either a 'type_name' key (if unique across cvs) or
  25. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  26. * 'value' and 'rank'
  27. * - dbxref: The database cross reference accession. It should be in the form
  28. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  29. * unique publication identifier (e.g. PMID:4382934)
  30. * - dbxref_id: The dbxref.dbxref_id of the publication.
  31. * @param $options
  32. * An array of options. Supported keys include:
  33. * - Any keys supported by chado_generate_var(). See that function definition for
  34. * additional details.
  35. *
  36. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  37. * chado_select_record(). It should fully specify the pub record to be returned.
  38. *
  39. * @return
  40. * If a singe publication is retreived using the identifiers, then a publication
  41. * array will be returned. The array is of the same format returned by the
  42. * chado_generate_var() function. Otherwise, FALSE will be returned.
  43. *
  44. * @ingroup tripal_pub_api
  45. */
  46. function tripal_get_publication($identifiers, $options = array()) {
  47. // Error Checking of parameters
  48. if (!is_array($identifiers)) {
  49. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  50. "chado_get_publication: The identifier passed in is expected to be an array with the key
  51. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  52. array('%identifier'=> print_r($identifiers, TRUE))
  53. );
  54. }
  55. elseif (empty($identifiers)) {
  56. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  57. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  58. is expected to be an array with the key matching a column name in the pub table
  59. (ie: pub_id or name). You passed in %identifier.",
  60. array('%identifier'=> print_r($identifiers, TRUE))
  61. );
  62. }
  63. // If one of the identifiers is property then use chado_get_record_with_property()
  64. if (array_key_exists('property', $identifiers)) {
  65. $property = $identifiers['property'];
  66. unset($identifiers['property']);
  67. $pub = chado_get_record_with_property(
  68. array('table' => 'pub', 'base_records' => $identifiers),
  69. array('type_name' => $property),
  70. $options
  71. );
  72. }
  73. elseif (array_key_exists('dbxref', $identifiers)) {
  74. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  75. $dbname = $matches[1];
  76. $accession = $matches[2];
  77. // First make sure the dbxref is present.
  78. $values = array(
  79. 'accession' => $accession,
  80. 'db_id' => array(
  81. 'name' => $dbname
  82. ),
  83. );
  84. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  85. if (count($dbxref) == 0) {
  86. return FALSE;
  87. }
  88. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  89. if (count($pub_dbxref) == 0) {
  90. return FALSE;
  91. }
  92. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  93. }
  94. else {
  95. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  96. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  97. array('%identifier'=> print_r($identifiers, TRUE))
  98. );
  99. }
  100. }
  101. elseif (array_key_exists('dbxref_id', $identifiers)) {
  102. // first get the pub_dbxref record
  103. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  104. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  105. // now get the pub
  106. if (count($pub_dbxref) > 0) {
  107. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  108. }
  109. else {
  110. return FALSE;
  111. }
  112. }
  113. // Else we have a simple case and we can just use chado_generate_var to get the pub
  114. else {
  115. // Try to get the pub
  116. $pub = chado_generate_var('pub', $identifiers, $options);
  117. }
  118. // Ensure the pub is singular. If it's an array then it is not singular
  119. if (is_array($pub)) {
  120. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  121. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  122. array('%identifier'=> print_r($identifiers, TRUE))
  123. );
  124. }
  125. // Report an error if $pub is FALSE since then chado_generate_var has failed
  126. elseif ($pub === FALSE) {
  127. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  128. "chado_get_publication: Could not find a publication using the identifiers
  129. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  130. array('%identifier'=> print_r($identifiers, TRUE))
  131. );
  132. }
  133. // Else, as far we know, everything is fine so give them their pub :)
  134. else {
  135. return $pub;
  136. }
  137. }
  138. /**
  139. * The publication table of Chado only has a unique constraint for the
  140. * uniquename of the publiation, but in reality a publication can be considered
  141. * unique by a combination of the title, publication type, published year and
  142. * series name (e.g. journal name or conference name). The site administrator
  143. * can configure how publications are determined to be unique. This function
  144. * uses the configuration specified by the administrator to look for publications
  145. * that match the details specified by the $pub_details argument
  146. * and indicates if one ore more publications match the criteria.
  147. *
  148. * @param $pub_details
  149. * An associative array with details about the publications. The expected keys
  150. * are:
  151. * 'Title': The title of the publication
  152. * 'Year': The published year of the publication
  153. * 'Publication Type': An array of publication types. A publication can have more than one type.
  154. * 'Series Name': The series name of the publication
  155. * 'Journal Name': An alternative to 'Series Name'
  156. * 'Conference Name': An alternative to 'Series Name'
  157. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  158. * If this key is present it will also be checked
  159. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  160. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  161. *
  162. * @return
  163. * An array containing the pub_id's of matching publications. Returns an
  164. * empty array if no pubs match
  165. *
  166. * @ingroup tripal_pub_api
  167. */
  168. function tripal_publication_exists($pub_details) {
  169. // first try to find the publication using the accession number if that key exists in the details array
  170. if (array_key_exists('Publication Dbxref', $pub_details)) {
  171. $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  172. if($pub) {
  173. return array($pub->pub_id);
  174. }
  175. }
  176. // make sure the citation is unique
  177. if (array_key_exists('Citation', $pub_details)) {
  178. $pub = tripal_get_publication(array('uniquename' => $pub_details['Citation']));
  179. if($pub) {
  180. return array($pub->pub_id);
  181. }
  182. }
  183. // get the publication type (use the first publication type)
  184. if (array_key_exists('Publication Type', $pub_details)) {
  185. $type_name = '';
  186. if(is_array($pub_details['Publication Type'])) {
  187. $type_name = $pub_details['Publication Type'][0];
  188. }
  189. else {
  190. $type_name = $pub_details['Publication Type'];
  191. }
  192. $identifiers = array(
  193. 'name' => $type_name,
  194. 'cv_id' => array(
  195. 'name' => 'tripal_pub',
  196. ),
  197. );
  198. $pub_type = tripal_get_cvterm($identifiers);
  199. }
  200. else {
  201. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  202. "tripal_publication_exists(): The Publication Type is a " .
  203. "required property but is missing", array());
  204. return array();
  205. }
  206. if (!$pub_type) {
  207. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  208. "tripal_publication_exists(): Cannot find publication type: '%type'",
  209. array('%type' => $pub_details['Publication Type'][0]));
  210. return array();
  211. }
  212. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  213. $series_name = '';
  214. if (array_key_exists('Series Name', $pub_details)) {
  215. $series_name = substr($pub_details['Series Name'], 0, 255);
  216. }
  217. if (array_key_exists('Journal Name', $pub_details)) {
  218. $series_name = substr($pub_details['Journal Name'], 0, 255);
  219. }
  220. if (array_key_exists('Conference Name', $pub_details)) {
  221. $series_name = substr($pub_details['Conference Name'], 0, 255);
  222. }
  223. // make sure the publication is unique using the prefereed import duplication check
  224. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  225. $pubs = array();
  226. switch ($import_dups_check) {
  227. case 'title_year':
  228. $identifiers = array(
  229. 'title' => $pub_details['Title'],
  230. 'pyear' => $pub_details['Year']
  231. );
  232. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  233. break;
  234. case 'title_year_type':
  235. $identifiers = array(
  236. 'title' => $pub_details['Title'],
  237. 'pyear' => $pub_details['Year'],
  238. 'type_id' => $pub_type->cvterm_id,
  239. );
  240. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  241. break;
  242. case 'title_year_media':
  243. $identifiers = array(
  244. 'title' => $pub_details['Title'],
  245. 'pyear' => $pub_details['Year'],
  246. 'series_name' => $series_name,
  247. );
  248. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  249. break;
  250. }
  251. $return = array();
  252. foreach ($pubs as $pub) {
  253. $return[] = $pub->pub_id;
  254. }
  255. return $return;
  256. }
  257. /**
  258. * Used for autocomplete in forms for identifying for publications.
  259. *
  260. * @param $field
  261. * The field in the publication to search on.
  262. * @param $string
  263. * The string to search for
  264. *
  265. * @return
  266. * A json array of terms that begin with the provided string
  267. *
  268. * @ingroup tripal_chado_api
  269. */
  270. function tripal_autocomplete_pub($string = '') {
  271. $items = array();
  272. $sql = "
  273. SELECT pub_id, title, uniquename
  274. FROM {pub}
  275. WHERE lower(title) like lower(:str)
  276. ORDER by title
  277. LIMIT 25 OFFSET 0
  278. ";
  279. $pubs = chado_query($sql, array(':str' => $string . '%'));
  280. while ($pub = $pubs->fetchObject()) {
  281. $val = $pub->title . " [id:" . $pub->pub_id . "]";
  282. $items[$val] = $pub->title;
  283. }
  284. drupal_json_output($items);
  285. }
  286. /**
  287. * Imports a singe publication specified by a remote database cross reference.
  288. *
  289. * @param $pub_dbxref
  290. * The unique database ID for the record to update. This value must
  291. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  292. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  293. * for the record in the database.
  294. * @param $do_contact
  295. * Set to TRUE if authors should automatically have a contact record added
  296. * to Chado.
  297. * @param $do_update
  298. * If set to TRUE then the publication will be updated if it already exists
  299. * in the database.
  300. *
  301. * @ingroup tripal_pub
  302. */
  303. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update = TRUE) {
  304. $num_to_retrieve = 1;
  305. $pager_id = 0;
  306. $page = 0;
  307. $num_pubs = 0;
  308. $pub_id = NULL;
  309. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.pub_importers');
  310. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  311. "If the load fails or is terminated prematurely then the entire set of \n" .
  312. "insertions/updates is rolled back and will not be found in the database\n\n";
  313. $transaction = db_transaction();
  314. try {
  315. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  316. $dbname = $matches[1];
  317. $accession = $matches[2];
  318. $criteria = array(
  319. 'num_criteria' => 1,
  320. 'remote_db' => $dbname,
  321. 'criteria' => array(
  322. '1' => array(
  323. 'search_terms' => "$dbname:$accession",
  324. 'scope' => 'id',
  325. 'operation' => '',
  326. 'is_phrase' => 0,
  327. ),
  328. ),
  329. );
  330. $remote_db = $criteria['remote_db'];
  331. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  332. $pubs = $results['pubs'];
  333. $search_str = $results['search_str'];
  334. $total_records = $results['total_records'];
  335. tripal_pub_add_publications($pubs, $do_contact, $do_update);
  336. }
  337. // For backwards compatibility check to see if the legacy pub module
  338. // is enabled. If so, then sync the nodes.
  339. if (module_exists('tripal_pub')) {
  340. // sync the newly added publications with Drupal
  341. print "Syncing publications with Drupal...\n";
  342. chado_node_sync_records('pub');
  343. // if any of the importers wanted to create contacts from the authors then sync them
  344. if($do_contact) {
  345. print "Syncing contacts with Drupal...\n";
  346. chado_node_sync_records('contact');
  347. }
  348. }
  349. }
  350. catch (Exception $e) {
  351. $transaction->rollback();
  352. print "\n"; // make sure we start errors on new line
  353. watchdog_exception('T_pub_import', $e);
  354. print "FAILED: Rolling back database changes...\n";
  355. return;
  356. }
  357. }
  358. /**
  359. * Imports all publications for all active import setups.
  360. *
  361. * @param $report_email
  362. * A list of email address, separated by commas, that should be notified
  363. * once importing has completed
  364. * @param $do_update
  365. * If set to TRUE then publications that already exist in the Chado database
  366. * will be updated, whereas if FALSE only new publications will be added
  367. *
  368. * @ingroup tripal_pub
  369. */
  370. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  371. $num_to_retrieve = 100;
  372. $page = 0;
  373. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  374. "If the load fails or is terminated prematurely then the entire set of \n" .
  375. "insertions/updates is rolled back and will not be found in the database\n\n";
  376. // start the transaction
  377. $transaction = db_transaction();
  378. try {
  379. // get all of the loaders
  380. $args = array();
  381. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  382. $results = db_query($sql, $args);
  383. $do_contact = FALSE;
  384. $reports = array();
  385. foreach ($results as $import) {
  386. $page = 0;
  387. print "Executing importer: '" . $import->name . "'\n";
  388. // keep track if any of the importers want to create contacts from authors
  389. if ($import->do_contact == 1) {
  390. $do_contact = TRUE;
  391. }
  392. $criteria = unserialize($import->criteria);
  393. $remote_db = $criteria['remote_db'];
  394. do {
  395. // retrieve the pubs for this page. We'll retreive 100 at a time
  396. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  397. $pubs = $results['pubs'];
  398. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  399. $page++;
  400. }
  401. // continue looping until we have a $pubs array that does not have
  402. // our requested numer of records. This means we've hit the end
  403. while (count($pubs) == $num_to_retrieve);
  404. }
  405. // sync the newly added publications with Drupal. If the user
  406. // requested a report then we don't want to print any syncing information
  407. // so pass 'FALSE' to the sync call
  408. // For backwards compatibility check to see if the legacy pub module
  409. // is enabled. If so, then sync the nodes.
  410. if (module_exists('tripal_pub')) {
  411. print "Syncing publications with Drupal...\n";
  412. chado_node_sync_records('pub');
  413. }
  414. // iterate through each of the reports and generate a final report with HTML links
  415. $HTML_report = '';
  416. if ($report_email) {
  417. $HTML_report .= "<html>";
  418. global $base_url;
  419. foreach ($reports as $importer => $report) {
  420. $total = count($report['inserted']);
  421. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  422. foreach ($report['inserted'] as $pub) {
  423. $item = $pub['Title'];
  424. if (array_key_exists('pub_id', $pub)) {
  425. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  426. }
  427. $HTML_report .= "<li>$item</li>\n";
  428. }
  429. $HTML_report .= "</ol>\n";
  430. }
  431. $HTML_report .= "</html>";
  432. $site_email = variable_get('site_mail', '');
  433. $params = array(
  434. 'message' => $HTML_report
  435. );
  436. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  437. }
  438. // For backwards compatibility check to see if the legacy pub module
  439. // is enabled. If so, then sync the nodes.
  440. if (module_exists('tripal_pub')) {
  441. // if any of the importers wanted to create contacts from the authors then sync them
  442. if($do_contact) {
  443. print "Syncing contacts with Drupal...\n";
  444. chado_node_sync_records('contact');
  445. }
  446. }
  447. }
  448. catch (Exception $e) {
  449. $transaction->rollback();
  450. print "\n"; // make sure we start errors on new line
  451. watchdog_exception('T_pub_import', $e);
  452. print "FAILED: Rolling back database changes...\n";
  453. return;
  454. }
  455. print "Done.\n";
  456. }
  457. /**
  458. * Updates publication records.
  459. *
  460. * Updates publication records that currently exist in the Chado pub table
  461. * with the most recent data in the remote database.
  462. *
  463. * @param $do_contact
  464. * Set to TRUE if authors should automatically have a contact record added
  465. * to Chado. Contacts are added using the name provided by the remote
  466. * database.
  467. * @param $dbxref
  468. * The unique database ID for the record to update. This value must
  469. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  470. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  471. * for the record in the database.
  472. * @param $db
  473. * The name of the remote database to update. If this value is provided and
  474. * no dbxref then all of the publications currently in the Chado database
  475. * for this remote database will be updated.
  476. *
  477. * @ingroup tripal_pub
  478. */
  479. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  480. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  481. "If the load fails or is terminated prematurely then the entire set of \n" .
  482. "insertions/updates is rolled back and will not be found in the database\n\n";
  483. $transaction = db_transaction();
  484. try {
  485. // get a list of all publications by their Dbxrefs that have supported databases
  486. $sql = "
  487. SELECT DB.name as db_name, DBX.accession
  488. FROM pub P
  489. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  490. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  491. INNER JOIN db DB ON DB.db_id = DBX.db_id
  492. ";
  493. $args = array();
  494. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  495. $dbname = $matches[1];
  496. $accession = $matches[2];
  497. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  498. $args[':accession'] = $accession;
  499. $args[':dbname'] = $dbname;
  500. }
  501. elseif ($db) {
  502. $sql .= " WHERE DB.name = :dbname ";
  503. $args[':dbname'] = $db;
  504. }
  505. $sql .= "ORDER BY DB.name, P.pub_id";
  506. $results = chado_query($sql, $args);
  507. $num_to_retrieve = 100;
  508. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  509. $curr_db = ''; // keeps track of the current current database
  510. $ids = array(); // the list of IDs for the database
  511. $search = array(); // the search array passed to the search function
  512. // iterate through the pub IDs
  513. while ($pub = $results->fetchObject()) {
  514. $accession = $pub->accession;
  515. $remote_db = $pub->db_name;
  516. // here we need to only update publications for databases we support
  517. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  518. if(!in_array($remote_db, $supported_dbs)) {
  519. continue;
  520. }
  521. $search = array(
  522. 'num_criteria' => 1,
  523. 'remote_db' => $remote_db,
  524. 'criteria' => array(
  525. '1' => array(
  526. 'search_terms' => "$remote_db:$accession",
  527. 'scope' => 'id',
  528. 'operation' => '',
  529. 'is_phrase' => 0,
  530. ),
  531. ),
  532. );
  533. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  534. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  535. $i++;
  536. }
  537. // For backwards compatibility check to see if the legacy pub module
  538. // is enabled. If so, then sync the nodes.
  539. if (module_exists('tripal_pub')) {
  540. // sync the newly added publications with Drupal
  541. print "Syncing publications with Drupal...\n";
  542. chado_node_sync_records('pub');
  543. // if the caller wants to create contacts then we should sync them
  544. if ($do_contact) {
  545. print "Syncing contacts with Drupal...\n";
  546. chado_node_sync_records('contact');
  547. }
  548. }
  549. }
  550. catch (Exception $e) {
  551. $transaction->rollback();
  552. print "\n"; // make sure we start errors on new line
  553. watchdog_exception('T_pub_import', $e);
  554. print "FAILED: Rolling back database changes...\n";
  555. return;
  556. }
  557. print "Done.\n";
  558. }
  559. /**
  560. * Launch the Tripal job to generate citations.
  561. *
  562. * This function will recreate citations for all publications currently
  563. * loaded into Tripal. This is useful to create a consistent format for
  564. * all citations.
  565. *
  566. * @param $options
  567. * Options pertaining to what publications to generate citations for.
  568. * One of the following must be present:
  569. * - all: Create and replace citation for all pubs
  570. * - new: Create citation for pubs that don't already have one
  571. *
  572. * @ingroup tripal_pub
  573. */
  574. function tripal_pub_create_citations($options) {
  575. $skip_existing = TRUE;
  576. $sql = "
  577. SELECT cvterm_id
  578. FROM {cvterm}
  579. WHERE
  580. name = 'Citation' AND
  581. cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal_pub')
  582. ";
  583. $citation_type_id = chado_query($sql)->fetchField();
  584. // Create and replace citation for all pubs
  585. if ($options == 'all') {
  586. $sql = "SELECT pub_id FROM {pub} P WHERE pub_id <> 1";
  587. $skip_existing = FALSE;
  588. }
  589. // Create citation for pubs that don't already have one
  590. else if ($options == 'new') {
  591. $sql = "
  592. SELECT pub_id
  593. FROM {pub} P
  594. WHERE
  595. (SELECT value
  596. FROM {pubprop} PB
  597. WHERE type_id = :type_id AND P.pub_id = PB.pub_id AND rank = 0) IS NULL
  598. AND pub_id <> 1
  599. ";
  600. $skip_existing = TRUE;
  601. }
  602. $result = chado_query($sql, array(':type_id' => $citation_type_id));
  603. $counter_updated = 0;
  604. $counter_generated = 0;
  605. while ($pub = $result->fetchObject()) {
  606. $pub_arr = tripal_pub_get_publication_array($pub->pub_id, $skip_existing);
  607. if ($pub_arr) {
  608. $citation = tripal_pub_create_citation($pub_arr);
  609. print $citation . "\n\n";
  610. // Replace if citation exists. This condition is never TRUE if $skip_existing is TRUE
  611. if ($pub_arr['Citation']) {
  612. $sql = "
  613. UPDATE {pubprop} SET value = :value
  614. WHERE pub_id = :pub_id AND type_id = :type_id AND rank = :rank
  615. ";
  616. chado_query($sql, array(':value' => $citation, ':pub_id' => $pub->pub_id,
  617. ':type_id' => $citation_type_id, ':rank' => 0));
  618. $counter_updated ++;
  619. // Generate a new citation
  620. } else {
  621. $sql = "
  622. INSERT INTO {pubprop} (pub_id, type_id, value, rank)
  623. VALUES (:pub_id, :type_id, :value, :rank)
  624. ";
  625. chado_query($sql, array(':pub_id' => $pub->pub_id, ':type_id' => $citation_type_id,
  626. ':value' => $citation, ':rank' => 0));
  627. $counter_generated ++;
  628. }
  629. }
  630. }
  631. print "$counter_generated citations generated. $counter_updated citations updated.\n";
  632. }
  633. /**
  634. * This function generates citations for publications. It requires
  635. * an array structure with keys being the terms in the Tripal
  636. * publication ontology. This function is intended to be used
  637. * for any function that needs to generate a citation.
  638. *
  639. * @param $pub
  640. * An array structure containing publication details where the keys
  641. * are the publication ontology term names and values are the
  642. * corresponding details. The pub array can contain the following
  643. * keys with corresponding values:
  644. * - Publication Type: an array of publication types. a publication can have more than one type
  645. * - Authors: a string containing all of the authors of a publication
  646. * - Journal Name: a string containing the journal name
  647. * - Journal Abbreviation: a string containing the journal name abbreviation
  648. * - Series Name: a string containing the series (e.g. conference proceedings) name
  649. * - Series Abbreviation: a string containing the series name abbreviation
  650. * - Volume: the serives volume number
  651. * - Issue: the series issue number
  652. * - Pages: the page numbers for the publication
  653. * - Publication Date: A date in the format "Year Month Day"
  654. *
  655. * @return
  656. * A text string containing the citation
  657. *
  658. * @ingroup tripal_pub
  659. */
  660. function tripal_pub_create_citation($pub) {
  661. $citation = '';
  662. $pub_type = '';
  663. // An article may have more than one publication type. For example,
  664. // a publication type can be 'Journal Article' but also a 'Clinical Trial'.
  665. // Therefore, we need to select the type that makes most sense for
  666. // construction of the citation. Here we'll iterate through them all
  667. // and select the one that matches best.
  668. if (is_array($pub['Publication Type'])) {
  669. foreach ($pub['Publication Type'] as $ptype) {
  670. if ($ptype == 'Journal Article' ) {
  671. $pub_type = $ptype;
  672. break;
  673. }
  674. else if ($ptype == 'Conference Proceedings'){
  675. $pub_type = $ptype;
  676. break;
  677. }
  678. else if ($ptype == 'Review') {
  679. $pub_type = $ptype;
  680. break;
  681. }
  682. else if ($ptype == 'Book') {
  683. $pub_type = $ptype;
  684. break;
  685. }
  686. else if ($ptype == 'Letter') {
  687. $pub_type = $ptype;
  688. break;
  689. }
  690. else if ($ptype == 'Book Chapter') {
  691. $pub_type = $ptype;
  692. break;
  693. }
  694. else if ($ptype == "Research Support, Non-U.S. Gov't") {
  695. $pub_type = $ptype;
  696. // we don't break because if the article is also a Journal Article
  697. // we prefer that type
  698. }
  699. }
  700. // If we don't have a recognized publication type, then just use the
  701. // first one in the list.
  702. if (!$pub_type) {
  703. $pub_type = $pub['Publication Type'][0];
  704. }
  705. }
  706. else {
  707. $pub_type = $pub['Publication Type'];
  708. }
  709. //----------------------
  710. // Journal Article
  711. //----------------------
  712. if ($pub_type == 'Journal Article') {
  713. if (array_key_exists('Authors', $pub)) {
  714. $citation = $pub['Authors'] . '. ';
  715. }
  716. $citation .= $pub['Title'] . '. ';
  717. if (array_key_exists('Journal Name', $pub)) {
  718. $citation .= $pub['Journal Name'] . '. ';
  719. }
  720. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  721. $citation .= $pub['Journal Abbreviation'] . '. ';
  722. }
  723. elseif (array_key_exists('Series Name', $pub)) {
  724. $citation .= $pub['Series Name'] . '. ';
  725. }
  726. elseif (array_key_exists('Series Abbreviation', $pub)) {
  727. $citation .= $pub['Series Abbreviation'] . '. ';
  728. }
  729. if (array_key_exists('Publication Date', $pub)) {
  730. $citation .= $pub['Publication Date'];
  731. }
  732. elseif (array_key_exists('Year', $pub)) {
  733. $citation .= $pub['Year'];
  734. }
  735. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  736. $citation .= '; ';
  737. }
  738. if (array_key_exists('Volume', $pub)) {
  739. $citation .= $pub['Volume'];
  740. }
  741. if (array_key_exists('Issue', $pub)) {
  742. $citation .= '(' . $pub['Issue'] . ')';
  743. }
  744. if (array_key_exists('Pages', $pub)) {
  745. if (array_key_exists('Volume', $pub)) {
  746. $citation .= ':';
  747. }
  748. $citation .= $pub['Pages'];
  749. }
  750. $citation .= '.';
  751. }
  752. //----------------------
  753. // Review
  754. //----------------------
  755. else if ($pub_type == 'Review') {
  756. if (array_key_exists('Authors', $pub)) {
  757. $citation = $pub['Authors'] . '. ';
  758. }
  759. $citation .= $pub['Title'] . '. ';
  760. if (array_key_exists('Journal Name', $pub)) {
  761. $citation .= $pub['Journal Name'] . '. ';
  762. }
  763. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  764. $citation .= $pub['Journal Abbreviation'] . '. ';
  765. }
  766. elseif (array_key_exists('Series Name', $pub)) {
  767. $citation .= $pub['Series Name'] . '. ';
  768. }
  769. elseif (array_key_exists('Series Abbreviation', $pub)) {
  770. $citation .= $pub['Series Abbreviation'] . '. ';
  771. }
  772. if (array_key_exists('Publication Date', $pub)) {
  773. $citation .= $pub['Publication Date'];
  774. }
  775. elseif (array_key_exists('Year', $pub)) {
  776. $citation .= $pub['Year'];
  777. }
  778. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  779. $citation .= '; ';
  780. }
  781. if (array_key_exists('Volume', $pub)) {
  782. $citation .= $pub['Volume'];
  783. }
  784. if (array_key_exists('Issue', $pub)) {
  785. $citation .= '(' . $pub['Issue'] . ')';
  786. }
  787. if (array_key_exists('Pages', $pub)) {
  788. if (array_key_exists('Volume', $pub)) {
  789. $citation .= ':';
  790. }
  791. $citation .= $pub['Pages'];
  792. }
  793. $citation .= '.';
  794. }
  795. //----------------------
  796. // Research Support, Non-U.S. Gov't
  797. //----------------------
  798. elseif ($pub_type == "Research Support, Non-U.S. Gov't") {
  799. if (array_key_exists('Authors', $pub)) {
  800. $citation = $pub['Authors'] . '. ';
  801. }
  802. $citation .= $pub['Title'] . '. ';
  803. if (array_key_exists('Journal Name', $pub)) {
  804. $citation .= $pub['Journal Name'] . '. ';
  805. }
  806. if (array_key_exists('Publication Date', $pub)) {
  807. $citation .= $pub['Publication Date'];
  808. }
  809. elseif (array_key_exists('Year', $pub)) {
  810. $citation .= $pub['Year'];
  811. }
  812. $citation .= '.';
  813. }
  814. //----------------------
  815. // Letter
  816. //----------------------
  817. elseif ($pub_type == 'Letter') {
  818. if (array_key_exists('Authors', $pub)) {
  819. $citation = $pub['Authors'] . '. ';
  820. }
  821. $citation .= $pub['Title'] . '. ';
  822. if (array_key_exists('Journal Name', $pub)) {
  823. $citation .= $pub['Journal Name'] . '. ';
  824. }
  825. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  826. $citation .= $pub['Journal Abbreviation'] . '. ';
  827. }
  828. elseif (array_key_exists('Series Name', $pub)) {
  829. $citation .= $pub['Series Name'] . '. ';
  830. }
  831. elseif (array_key_exists('Series Abbreviation', $pub)) {
  832. $citation .= $pub['Series Abbreviation'] . '. ';
  833. }
  834. if (array_key_exists('Publication Date', $pub)) {
  835. $citation .= $pub['Publication Date'];
  836. }
  837. elseif (array_key_exists('Year', $pub)) {
  838. $citation .= $pub['Year'];
  839. }
  840. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  841. $citation .= '; ';
  842. }
  843. if (array_key_exists('Volume', $pub)) {
  844. $citation .= $pub['Volume'];
  845. }
  846. if (array_key_exists('Issue', $pub)) {
  847. $citation .= '(' . $pub['Issue'] . ')';
  848. }
  849. if (array_key_exists('Pages', $pub)) {
  850. if (array_key_exists('Volume', $pub)) {
  851. $citation .= ':';
  852. }
  853. $citation .= $pub['Pages'];
  854. }
  855. $citation .= '.';
  856. }
  857. //-----------------------
  858. // Conference Proceedings
  859. //-----------------------
  860. elseif ($pub_type == 'Conference Proceedings') {
  861. if (array_key_exists('Authors', $pub)) {
  862. $citation = $pub['Authors'] . '. ';
  863. }
  864. $citation .= $pub['Title'] . '. ';
  865. if (array_key_exists('Conference Name', $pub)) {
  866. $citation .= $pub['Conference Name'] . '. ';
  867. }
  868. elseif (array_key_exists('Series Name', $pub)) {
  869. $citation .= $pub['Series Name'] . '. ';
  870. }
  871. elseif (array_key_exists('Series Abbreviation', $pub)) {
  872. $citation .= $pub['Series Abbreviation'] . '. ';
  873. }
  874. if (array_key_exists('Publication Date', $pub)) {
  875. $citation .= $pub['Publication Date'];
  876. }
  877. elseif (array_key_exists('Year', $pub)) {
  878. $citation .= $pub['Year'];
  879. }
  880. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  881. $citation .= '; ';
  882. }
  883. if (array_key_exists('Volume', $pub)) {
  884. $citation .= $pub['Volume'];
  885. }
  886. if (array_key_exists('Issue', $pub)) {
  887. $citation .= '(' . $pub['Issue'] . ')';
  888. }
  889. if (array_key_exists('Pages', $pub)) {
  890. if (array_key_exists('Volume', $pub)) {
  891. $citation .= ':';
  892. }
  893. $citation .= $pub['Pages'];
  894. }
  895. $citation .= '.';
  896. }
  897. //-----------------------
  898. // Default
  899. //-----------------------
  900. else {
  901. if (array_key_exists('Authors', $pub)) {
  902. $citation = $pub['Authors'] . '. ';
  903. }
  904. $citation .= $pub['Title'] . '. ';
  905. if (array_key_exists('Series Name', $pub)) {
  906. $citation .= $pub['Series Name'] . '. ';
  907. }
  908. elseif (array_key_exists('Series Abbreviation', $pub)) {
  909. $citation .= $pub['Series Abbreviation'] . '. ';
  910. }
  911. if (array_key_exists('Publication Date', $pub)) {
  912. $citation .= $pub['Publication Date'];
  913. }
  914. elseif (array_key_exists('Year', $pub)) {
  915. $citation .= $pub['Year'];
  916. }
  917. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  918. $citation .= '; ';
  919. }
  920. if (array_key_exists('Volume', $pub)) {
  921. $citation .= $pub['Volume'];
  922. }
  923. if (array_key_exists('Issue', $pub)) {
  924. $citation .= '(' . $pub['Issue'] . ')';
  925. }
  926. if (array_key_exists('Pages', $pub)) {
  927. if (array_key_exists('Volume', $pub)) {
  928. $citation .= ':';
  929. }
  930. $citation .= $pub['Pages'];
  931. }
  932. $citation .= '.';
  933. }
  934. return $citation;
  935. }
  936. /**
  937. * Retrieves the minimal information to uniquely describe any publication.
  938. *
  939. * The returned array is an associative array where the keys are
  940. * the controlled vocabulary terms in the form [vocab]:[accession]
  941. *
  942. * @param $pub
  943. * A publication object as created by chado_generate_var()
  944. *
  945. * @return
  946. * An array with the following keys: 'Citation', 'Abstract', 'Authors',
  947. * 'URL'. All keys are term names in the Tripal Publication Ontology :TPUB.
  948. */
  949. function tripal_get_minimal_pub_info($pub) {
  950. if (!$pub) {
  951. return array();
  952. }
  953. // Chado has a null pub as default. We don't return anything for this.
  954. if (isset($pub->uniquename) && $pub->uniquename == 'null') {
  955. return array();
  956. }
  957. // expand the title
  958. $pub = chado_expand_var($pub, 'field', 'pub.title');
  959. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  960. // get the abstract
  961. $values = array(
  962. 'pub_id' => $pub->pub_id,
  963. 'type_id' => array(
  964. 'name' => 'Abstract',
  965. ),
  966. );
  967. $options = array(
  968. 'include_fk' => array(
  969. ),
  970. );
  971. $abstract = chado_generate_var('pubprop', $values, $options);
  972. $abstract = chado_expand_var($abstract, 'field', 'pubprop.value');
  973. $abstract_text = '';
  974. if ($abstract) {
  975. $abstract_text = htmlspecialchars($abstract->value);
  976. }
  977. // Get the author list.
  978. $values = array(
  979. 'pub_id' => $pub->pub_id,
  980. 'type_id' => array(
  981. 'name' => 'Authors',
  982. ),
  983. );
  984. $options = array(
  985. 'include_fk' => array(
  986. ),
  987. );
  988. $authors = chado_generate_var('pubprop', $values, $options);
  989. $authors = chado_expand_var($authors, 'field', 'pubprop.value');
  990. $authors_list = 'N/A';
  991. if ($authors) {
  992. $authors_list = $authors->value;
  993. }
  994. // get the first database cross-reference with a url.
  995. $options = array('return_array' => 1);
  996. $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
  997. $dbxref = NULL;
  998. if ($pub->pub_dbxref) {
  999. foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
  1000. if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
  1001. $dbxref = $pub_dbxref->dbxref_id;
  1002. }
  1003. }
  1004. }
  1005. // get the URL.
  1006. $values = array(
  1007. 'pub_id' => $pub->pub_id,
  1008. 'type_id' => array(
  1009. 'name' => 'URL',
  1010. ),
  1011. );
  1012. $options = array(
  1013. 'return_array' => 1,
  1014. 'include_fk' => array(),
  1015. );
  1016. $url = '';
  1017. $urls = chado_generate_var('pubprop', $values, $options);
  1018. if ($urls) {
  1019. $urls = chado_expand_var($urls, 'field', 'pubprop.value');
  1020. if (count($urls) > 0) {
  1021. $url = $urls[0]->value;
  1022. }
  1023. }
  1024. // Get the list of database cross references.
  1025. $values = array(
  1026. 'pub_id' => $pub->pub_id,
  1027. );
  1028. $options = array(
  1029. 'return_array' => 1,
  1030. );
  1031. $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
  1032. $dbxrefs = array();
  1033. foreach ($pub_dbxrefs as $pub_dbxref) {
  1034. $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
  1035. }
  1036. // get the citation
  1037. $values = array(
  1038. 'pub_id' => $pub->pub_id,
  1039. 'type_id' => array(
  1040. 'name' => 'Citation',
  1041. ),
  1042. );
  1043. $options = array(
  1044. 'include_fk' => array(
  1045. ),
  1046. );
  1047. $citation = chado_generate_var('pubprop', $values, $options);
  1048. if ($citation) {
  1049. $citation = chado_expand_var($citation, 'field', 'pubprop.value');
  1050. $citation = $citation->value;
  1051. }
  1052. else {
  1053. $pub_info = array(
  1054. 'Title' => $pub->title,
  1055. 'Publication Type' => $pub->type_id->name,
  1056. 'Authors' => $authors_list,
  1057. 'Series Name' => $pub->series_name,
  1058. 'Volume' => $pub->volume,
  1059. 'Issue' => $pub->issue,
  1060. 'Pages' => $pub->pages,
  1061. 'Publication Date' => $pub->pyear,
  1062. );
  1063. $citation = tripal_pub_create_citation($pub_info);
  1064. }
  1065. return array(
  1066. 'TPUB:0000039' => $pub->title,
  1067. 'TPUB:0000003' => $citation,
  1068. 'TPUB:0000050' => $abstract_text,
  1069. 'TPUB:0000047' => $authors_list,
  1070. 'TPUB:0000052' => $url,
  1071. 'SBO:0000554' => $dbxrefs,
  1072. );
  1073. }