tripal_chado.pub.api.inc 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Publication Module API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) to manage chado publications
  11. *
  12. * @stephen add documentation here for how to add a new importer.
  13. *
  14. * @}
  15. */
  16. /**
  17. * Retrieves a chado publication array
  18. *
  19. * @param $identifier
  20. * An array used to uniquely identify a publication. This array has the same
  21. * format as that used by the chado_generate_var(). The following keys can be
  22. * useful for uniquely identifying a publication as they should be unique:
  23. * - pub_id: the chado pub.pub_id primary key
  24. * - nid: the drupal nid of the publication
  25. * - uniquename: A value to matach with the pub.uniquename field
  26. * There are also some specially handled keys. They are:
  27. * - property: An array describing the property to select records for. It
  28. * should at least have either a 'type_name' key (if unique across cvs) or
  29. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  30. * 'value' and 'rank'
  31. * - dbxref: The database cross reference accession. It should be in the form
  32. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  33. * unique publication identifier (e.g. PMID:4382934)
  34. * - dbxref_id: The dbxref.dbxref_id of the publication.
  35. * @param $options
  36. * An array of options. Supported keys include:
  37. * - Any keys supported by chado_generate_var(). See that function definition for
  38. * additional details.
  39. *
  40. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  41. * chado_select_record(). It should fully specify the pub record to be returned.
  42. *
  43. * @return
  44. * If a singe publication is retreived using the identifiers, then a publication
  45. * array will be returned. The array is of the same format returned by the
  46. * chado_generate_var() function. Otherwise, FALSE will be returned.
  47. *
  48. * @ingroup tripal_pub_api
  49. */
  50. function tripal_get_publication($identifiers, $options = array()) {
  51. // Error Checking of parameters
  52. if (!is_array($identifiers)) {
  53. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  54. "chado_get_publication: The identifier passed in is expected to be an array with the key
  55. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  56. array('%identifier'=> print_r($identifiers, TRUE))
  57. );
  58. }
  59. elseif (empty($identifiers)) {
  60. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  61. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  62. is expected to be an array with the key matching a column name in the pub table
  63. (ie: pub_id or name). You passed in %identifier.",
  64. array('%identifier'=> print_r($identifiers, TRUE))
  65. );
  66. }
  67. // If one of the identifiers is property then use chado_get_record_with_property()
  68. if (array_key_exists('property', $identifiers)) {
  69. $property = $identifiers['property'];
  70. unset($identifiers['property']);
  71. $pub = chado_get_record_with_property(
  72. array('table' => 'pub', 'base_records' => $identifiers),
  73. array('type_name' => $property),
  74. $options
  75. );
  76. }
  77. elseif (array_key_exists('dbxref', $identifiers)) {
  78. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  79. $dbname = $matches[1];
  80. $accession = $matches[2];
  81. // First make sure the dbxref is present.
  82. $values = array(
  83. 'accession' => $accession,
  84. 'db_id' => array(
  85. 'name' => $dbname
  86. ),
  87. );
  88. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  89. if (count($dbxref) == 0) {
  90. return FALSE;
  91. }
  92. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  93. if (count($pub_dbxref) == 0) {
  94. return FALSE;
  95. }
  96. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  97. }
  98. else {
  99. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  100. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  101. array('%identifier'=> print_r($identifiers, TRUE))
  102. );
  103. }
  104. }
  105. elseif (array_key_exists('dbxref_id', $identifiers)) {
  106. // first get the pub_dbxref record
  107. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  108. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  109. // now get the pub
  110. if (count($pub_dbxref) > 0) {
  111. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  112. }
  113. else {
  114. return FALSE;
  115. }
  116. }
  117. // Else we have a simple case and we can just use chado_generate_var to get the pub
  118. else {
  119. // Try to get the pub
  120. $pub = chado_generate_var('pub', $identifiers, $options);
  121. }
  122. // Ensure the pub is singular. If it's an array then it is not singular
  123. if (is_array($pub)) {
  124. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  125. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  126. array('%identifier'=> print_r($identifiers, TRUE))
  127. );
  128. }
  129. // Report an error if $pub is FALSE since then chado_generate_var has failed
  130. elseif ($pub === FALSE) {
  131. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  132. "chado_get_publication: Could not find a publication using the identifiers
  133. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  134. array('%identifier'=> print_r($identifiers, TRUE))
  135. );
  136. }
  137. // Else, as far we know, everything is fine so give them their pub :)
  138. else {
  139. return $pub;
  140. }
  141. }
  142. /**
  143. * The publication table of Chado only has a unique constraint for the
  144. * uniquename of the publiation, but in reality a publication can be considered
  145. * unique by a combination of the title, publication type, published year and
  146. * series name (e.g. journal name or conference name). The site administrator
  147. * can configure how publications are determined to be unique. This function
  148. * uses the configuration specified by the administrator to look for publications
  149. * that match the details specified by the $pub_details argument
  150. * and indicates if one ore more publications match the criteria.
  151. *
  152. * @param $pub_details
  153. * An associative array with details about the publications. The expected keys
  154. * are:
  155. * 'Title': The title of the publication
  156. * 'Year': The published year of the publication
  157. * 'Publication Type': An array of publication types. A publication can have more than one type.
  158. * 'Series Name': The series name of the publication
  159. * 'Journal Name': An alternative to 'Series Name'
  160. * 'Conference Name': An alternative to 'Series Name'
  161. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  162. * If this key is present it will also be checked
  163. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  164. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  165. *
  166. * @return
  167. * An array containing the pub_id's of matching publications. Returns an
  168. * empty array if no pubs match
  169. *
  170. * @ingroup tripal_pub_api
  171. */
  172. function tripal_publication_exists($pub_details) {
  173. // first try to find the publication using the accession number if that key exists in the details array
  174. if (array_key_exists('Publication Dbxref', $pub_details)) {
  175. $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  176. if($pub) {
  177. return array($pub->pub_id);
  178. }
  179. }
  180. // make sure the citation is unique
  181. if (array_key_exists('Citation', $pub_details)) {
  182. $pub = tripal_get_publication(array('uniquename' => $pub_details['Citation']));
  183. if($pub) {
  184. return array($pub->pub_id);
  185. }
  186. }
  187. // get the publication type (use the first publication type)
  188. if (array_key_exists('Publication Type', $pub_details)) {
  189. $type_name = '';
  190. if(is_array($pub_details['Publication Type'])) {
  191. $type_name = $pub_details['Publication Type'][0];
  192. }
  193. else {
  194. $type_name = $pub_details['Publication Type'];
  195. }
  196. $identifiers = array(
  197. 'name' => $type_name,
  198. 'cv_id' => array(
  199. 'name' => 'tripal_pub',
  200. ),
  201. );
  202. $pub_type = tripal_get_cvterm($identifiers);
  203. }
  204. else {
  205. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  206. "tripal_publication_exists(): The Publication Type is a " .
  207. "required property but is missing", array());
  208. return array();
  209. }
  210. if (!$pub_type) {
  211. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  212. "tripal_publication_exists(): Cannot find publication type: '%type'",
  213. array('%type' => $pub_details['Publication Type'][0]));
  214. return array();
  215. }
  216. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  217. $series_name = '';
  218. if (array_key_exists('Series Name', $pub_details)) {
  219. $series_name = substr($pub_details['Series Name'], 0, 255);
  220. }
  221. if (array_key_exists('Journal Name', $pub_details)) {
  222. $series_name = substr($pub_details['Journal Name'], 0, 255);
  223. }
  224. if (array_key_exists('Conference Name', $pub_details)) {
  225. $series_name = substr($pub_details['Conference Name'], 0, 255);
  226. }
  227. // make sure the publication is unique using the prefereed import duplication check
  228. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  229. $pubs = array();
  230. switch ($import_dups_check) {
  231. case 'title_year':
  232. $identifiers = array(
  233. 'title' => $pub_details['Title'],
  234. 'pyear' => $pub_details['Year']
  235. );
  236. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  237. break;
  238. case 'title_year_type':
  239. $identifiers = array(
  240. 'title' => $pub_details['Title'],
  241. 'pyear' => $pub_details['Year'],
  242. 'type_id' => $pub_type->cvterm_id,
  243. );
  244. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  245. break;
  246. case 'title_year_media':
  247. $identifiers = array(
  248. 'title' => $pub_details['Title'],
  249. 'pyear' => $pub_details['Year'],
  250. 'series_name' => $series_name,
  251. );
  252. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  253. break;
  254. }
  255. $return = array();
  256. foreach ($pubs as $pub) {
  257. $return[] = $pub->pub_id;
  258. }
  259. return $return;
  260. }
  261. /**
  262. * Used for autocomplete in forms for identifying for publications.
  263. *
  264. * @param $field
  265. * The field in the publication to search on.
  266. * @param $string
  267. * The string to search for
  268. *
  269. * @return
  270. * A json array of terms that begin with the provided string
  271. *
  272. * @ingroup tripal_chado_api
  273. */
  274. function tripal_autocomplete_pub($string = '') {
  275. $items = array();
  276. $sql = "
  277. SELECT pub_id, title, uniquename
  278. FROM {pub}
  279. WHERE lower(title) like lower(:str)
  280. ORDER by title
  281. LIMIT 25 OFFSET 0
  282. ";
  283. $pubs = chado_query($sql, array(':str' => $string . '%'));
  284. while ($pub = $pubs->fetchObject()) {
  285. $items[$pub->uniquename] = $pub->uniquename;
  286. }
  287. drupal_json_output($items);
  288. }
  289. /**
  290. * Imports a singe publication specified by a remote database cross reference.
  291. *
  292. * @param $pub_dbxref
  293. * The unique database ID for the record to update. This value must
  294. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  295. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  296. * for the record in the database.
  297. * @param $do_contact
  298. * Set to TRUE if authors should automatically have a contact record added
  299. * to Chado.
  300. * @param $do_update
  301. * If set to TRUE then the publication will be updated if it already exists
  302. * in the database.
  303. *
  304. * @ingroup tripal_pub
  305. */
  306. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update = TRUE) {
  307. $num_to_retrieve = 1;
  308. $pager_id = 0;
  309. $page = 0;
  310. $num_pubs = 0;
  311. $pub_id = NULL;
  312. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.pub_importers');
  313. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  314. "If the load fails or is terminated prematurely then the entire set of \n" .
  315. "insertions/updates is rolled back and will not be found in the database\n\n";
  316. $transaction = db_transaction();
  317. try {
  318. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  319. $dbname = $matches[1];
  320. $accession = $matches[2];
  321. $criteria = array(
  322. 'num_criteria' => 1,
  323. 'remote_db' => $dbname,
  324. 'criteria' => array(
  325. '1' => array(
  326. 'search_terms' => "$dbname:$accession",
  327. 'scope' => 'id',
  328. 'operation' => '',
  329. 'is_phrase' => 0,
  330. ),
  331. ),
  332. );
  333. $remote_db = $criteria['remote_db'];
  334. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  335. $pubs = $results['pubs'];
  336. $search_str = $results['search_str'];
  337. $total_records = $results['total_records'];
  338. tripal_pub_add_publications($pubs, $do_contact, $do_update);
  339. }
  340. // For backwards compatibility check to see if the legacy pub module
  341. // is enabled. If so, then sync the nodes.
  342. if (module_exists('tripal_pub')) {
  343. // sync the newly added publications with Drupal
  344. print "Syncing publications with Drupal...\n";
  345. chado_node_sync_records('pub');
  346. // if any of the importers wanted to create contacts from the authors then sync them
  347. if($do_contact) {
  348. print "Syncing contacts with Drupal...\n";
  349. chado_node_sync_records('contact');
  350. }
  351. }
  352. }
  353. catch (Exception $e) {
  354. $transaction->rollback();
  355. print "\n"; // make sure we start errors on new line
  356. watchdog_exception('T_pub_import', $e);
  357. print "FAILED: Rolling back database changes...\n";
  358. return;
  359. }
  360. }
  361. /**
  362. * Imports all publications for all active import setups.
  363. *
  364. * @param $report_email
  365. * A list of email address, separated by commas, that should be notified
  366. * once importing has completed
  367. * @param $do_update
  368. * If set to TRUE then publications that already exist in the Chado database
  369. * will be updated, whereas if FALSE only new publications will be added
  370. *
  371. * @ingroup tripal_pub
  372. */
  373. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  374. $num_to_retrieve = 100;
  375. $page = 0;
  376. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  377. "If the load fails or is terminated prematurely then the entire set of \n" .
  378. "insertions/updates is rolled back and will not be found in the database\n\n";
  379. // start the transaction
  380. $transaction = db_transaction();
  381. try {
  382. // get all of the loaders
  383. $args = array();
  384. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  385. $results = db_query($sql, $args);
  386. $do_contact = FALSE;
  387. $reports = array();
  388. foreach ($results as $import) {
  389. $page = 0;
  390. print "Executing importer: '" . $import->name . "'\n";
  391. // keep track if any of the importers want to create contacts from authors
  392. if ($import->do_contact == 1) {
  393. $do_contact = TRUE;
  394. }
  395. $criteria = unserialize($import->criteria);
  396. $remote_db = $criteria['remote_db'];
  397. do {
  398. // retrieve the pubs for this page. We'll retreive 100 at a time
  399. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  400. $pubs = $results['pubs'];
  401. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  402. $page++;
  403. }
  404. // continue looping until we have a $pubs array that does not have
  405. // our requested numer of records. This means we've hit the end
  406. while (count($pubs) == $num_to_retrieve);
  407. }
  408. // sync the newly added publications with Drupal. If the user
  409. // requested a report then we don't want to print any syncing information
  410. // so pass 'FALSE' to the sync call
  411. // For backwards compatibility check to see if the legacy pub module
  412. // is enabled. If so, then sync the nodes.
  413. if (module_exists('tripal_pub')) {
  414. print "Syncing publications with Drupal...\n";
  415. chado_node_sync_records('pub');
  416. }
  417. // iterate through each of the reports and generate a final report with HTML links
  418. $HTML_report = '';
  419. if ($report_email) {
  420. $HTML_report .= "<html>";
  421. global $base_url;
  422. foreach ($reports as $importer => $report) {
  423. $total = count($report['inserted']);
  424. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  425. foreach ($report['inserted'] as $pub) {
  426. $item = $pub['Title'];
  427. if (array_key_exists('pub_id', $pub)) {
  428. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  429. }
  430. $HTML_report .= "<li>$item</li>\n";
  431. }
  432. $HTML_report .= "</ol>\n";
  433. }
  434. $HTML_report .= "</html>";
  435. $site_email = variable_get('site_mail', '');
  436. $params = array(
  437. 'message' => $HTML_report
  438. );
  439. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  440. }
  441. // For backwards compatibility check to see if the legacy pub module
  442. // is enabled. If so, then sync the nodes.
  443. if (module_exists('tripal_pub')) {
  444. // if any of the importers wanted to create contacts from the authors then sync them
  445. if($do_contact) {
  446. print "Syncing contacts with Drupal...\n";
  447. chado_node_sync_records('contact');
  448. }
  449. }
  450. }
  451. catch (Exception $e) {
  452. $transaction->rollback();
  453. print "\n"; // make sure we start errors on new line
  454. watchdog_exception('T_pub_import', $e);
  455. print "FAILED: Rolling back database changes...\n";
  456. return;
  457. }
  458. print "Done.\n";
  459. }
  460. /**
  461. * Updates publication records.
  462. *
  463. * Updates publication records that currently exist in the Chado pub table
  464. * with the most recent data in the remote database.
  465. *
  466. * @param $do_contact
  467. * Set to TRUE if authors should automatically have a contact record added
  468. * to Chado. Contacts are added using the name provided by the remote
  469. * database.
  470. * @param $dbxref
  471. * The unique database ID for the record to update. This value must
  472. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  473. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  474. * for the record in the database.
  475. * @param $db
  476. * The name of the remote database to update. If this value is provided and
  477. * no dbxref then all of the publications currently in the Chado database
  478. * for this remote database will be updated.
  479. *
  480. * @ingroup tripal_pub
  481. */
  482. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  483. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  484. "If the load fails or is terminated prematurely then the entire set of \n" .
  485. "insertions/updates is rolled back and will not be found in the database\n\n";
  486. $transaction = db_transaction();
  487. try {
  488. // get a list of all publications by their Dbxrefs that have supported databases
  489. $sql = "
  490. SELECT DB.name as db_name, DBX.accession
  491. FROM pub P
  492. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  493. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  494. INNER JOIN db DB ON DB.db_id = DBX.db_id
  495. ";
  496. $args = array();
  497. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  498. $dbname = $matches[1];
  499. $accession = $matches[2];
  500. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  501. $args[':accession'] = $accession;
  502. $args[':dbname'] = $dbname;
  503. }
  504. elseif ($db) {
  505. $sql .= " WHERE DB.name = :dbname ";
  506. $args[':dbname'] = $db;
  507. }
  508. $sql .= "ORDER BY DB.name, P.pub_id";
  509. $results = chado_query($sql, $args);
  510. $num_to_retrieve = 100;
  511. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  512. $curr_db = ''; // keeps track of the current current database
  513. $ids = array(); // the list of IDs for the database
  514. $search = array(); // the search array passed to the search function
  515. // iterate through the pub IDs
  516. while ($pub = $results->fetchObject()) {
  517. $accession = $pub->accession;
  518. $remote_db = $pub->db_name;
  519. // here we need to only update publications for databases we support
  520. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  521. if(!in_array($remote_db, $supported_dbs)) {
  522. continue;
  523. }
  524. $search = array(
  525. 'num_criteria' => 1,
  526. 'remote_db' => $remote_db,
  527. 'criteria' => array(
  528. '1' => array(
  529. 'search_terms' => "$remote_db:$accession",
  530. 'scope' => 'id',
  531. 'operation' => '',
  532. 'is_phrase' => 0,
  533. ),
  534. ),
  535. );
  536. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  537. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  538. $i++;
  539. }
  540. // For backwards compatibility check to see if the legacy pub module
  541. // is enabled. If so, then sync the nodes.
  542. if (module_exists('tripal_pub')) {
  543. // sync the newly added publications with Drupal
  544. print "Syncing publications with Drupal...\n";
  545. chado_node_sync_records('pub');
  546. // if the caller wants to create contacts then we should sync them
  547. if ($do_contact) {
  548. print "Syncing contacts with Drupal...\n";
  549. chado_node_sync_records('contact');
  550. }
  551. }
  552. }
  553. catch (Exception $e) {
  554. $transaction->rollback();
  555. print "\n"; // make sure we start errors on new line
  556. watchdog_exception('T_pub_import', $e);
  557. print "FAILED: Rolling back database changes...\n";
  558. return;
  559. }
  560. print "Done.\n";
  561. }
  562. /**
  563. * Launch the Tripal job to generate citations.
  564. *
  565. * This function will recreate citations for all publications currently
  566. * loaded into Tripal. This is useful to create a consistent format for
  567. * all citations.
  568. *
  569. * @param $options
  570. * Options pertaining to what publications to generate citations for.
  571. * One of the following must be present:
  572. * - all: Create and replace citation for all pubs
  573. * - new: Create citation for pubs that don't already have one
  574. *
  575. * @ingroup tripal_pub
  576. */
  577. function tripal_pub_create_citations($options) {
  578. $skip_existing = TRUE;
  579. $sql = "
  580. SELECT cvterm_id
  581. FROM {cvterm}
  582. WHERE
  583. name = 'Citation' AND
  584. cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal_pub')
  585. ";
  586. $citation_type_id = chado_query($sql)->fetchField();
  587. // Create and replace citation for all pubs
  588. if ($options == 'all') {
  589. $sql = "SELECT pub_id FROM {pub} P WHERE pub_id <> 1";
  590. $skip_existing = FALSE;
  591. }
  592. // Create citation for pubs that don't already have one
  593. else if ($options == 'new') {
  594. $sql = "
  595. SELECT pub_id
  596. FROM {pub} P
  597. WHERE
  598. (SELECT value
  599. FROM {pubprop} PB
  600. WHERE type_id = :type_id AND P.pub_id = PB.pub_id AND rank = 0) IS NULL
  601. AND pub_id <> 1
  602. ";
  603. $skip_existing = TRUE;
  604. }
  605. $result = chado_query($sql, array(':type_id' => $citation_type_id));
  606. $counter_updated = 0;
  607. $counter_generated = 0;
  608. while ($pub = $result->fetchObject()) {
  609. $pub_arr = tripal_pub_get_publication_array($pub->pub_id, $skip_existing);
  610. if ($pub_arr) {
  611. $citation = tripal_pub_create_citation($pub_arr);
  612. print $citation . "\n\n";
  613. // Replace if citation exists. This condition is never TRUE if $skip_existing is TRUE
  614. if ($pub_arr['Citation']) {
  615. $sql = "
  616. UPDATE {pubprop} SET value = :value
  617. WHERE pub_id = :pub_id AND type_id = :type_id AND rank = :rank
  618. ";
  619. chado_query($sql, array(':value' => $citation, ':pub_id' => $pub->pub_id,
  620. ':type_id' => $citation_type_id, ':rank' => 0));
  621. $counter_updated ++;
  622. // Generate a new citation
  623. } else {
  624. $sql = "
  625. INSERT INTO {pubprop} (pub_id, type_id, value, rank)
  626. VALUES (:pub_id, :type_id, :value, :rank)
  627. ";
  628. chado_query($sql, array(':pub_id' => $pub->pub_id, ':type_id' => $citation_type_id,
  629. ':value' => $citation, ':rank' => 0));
  630. $counter_generated ++;
  631. }
  632. }
  633. }
  634. print "$counter_generated citations generated. $counter_updated citations updated.\n";
  635. }
  636. /**
  637. * This function generates citations for publications. It requires
  638. * an array structure with keys being the terms in the Tripal
  639. * publication ontology. This function is intended to be used
  640. * for any function that needs to generate a citation.
  641. *
  642. * @param $pub
  643. * An array structure containing publication details where the keys
  644. * are the publication ontology term names and values are the
  645. * corresponding details. The pub array can contain the following
  646. * keys with corresponding values:
  647. * - Publication Type: an array of publication types. a publication can have more than one type
  648. * - Authors: a string containing all of the authors of a publication
  649. * - Journal Name: a string containing the journal name
  650. * - Journal Abbreviation: a string containing the journal name abbreviation
  651. * - Series Name: a string containing the series (e.g. conference proceedings) name
  652. * - Series Abbreviation: a string containing the series name abbreviation
  653. * - Volume: the serives volume number
  654. * - Issue: the series issue number
  655. * - Pages: the page numbers for the publication
  656. * - Publication Date: A date in the format "Year Month Day"
  657. *
  658. * @return
  659. * A text string containing the citation
  660. *
  661. * @ingroup tripal_pub
  662. */
  663. function tripal_pub_create_citation($pub) {
  664. $citation = '';
  665. $pub_type = '';
  666. // An article may have more than one publication type. For example,
  667. // a publication type can be 'Journal Article' but also a 'Clinical Trial'.
  668. // Therefore, we need to select the type that makes most sense for
  669. // construction of the citation. Here we'll iterate through them all
  670. // and select the one that matches best.
  671. if (is_array($pub['Publication Type'])) {
  672. foreach ($pub['Publication Type'] as $ptype) {
  673. if ($ptype == 'Journal Article' ) {
  674. $pub_type = $ptype;
  675. break;
  676. }
  677. else if ($ptype == 'Conference Proceedings'){
  678. $pub_type = $ptype;
  679. break;
  680. }
  681. else if ($ptype == 'Review') {
  682. $pub_type = $ptype;
  683. break;
  684. }
  685. else if ($ptype == 'Book') {
  686. $pub_type = $ptype;
  687. break;
  688. }
  689. else if ($ptype == 'Letter') {
  690. $pub_type = $ptype;
  691. break;
  692. }
  693. else if ($ptype == 'Book Chapter') {
  694. $pub_type = $ptype;
  695. break;
  696. }
  697. else if ($ptype == "Research Support, Non-U.S. Gov't") {
  698. $pub_type = $ptype;
  699. // we don't break because if the article is also a Journal Article
  700. // we prefer that type
  701. }
  702. }
  703. // If we don't have a recognized publication type, then just use the
  704. // first one in the list.
  705. if (!$pub_type) {
  706. $pub_type = $pub['Publication Type'][0];
  707. }
  708. }
  709. else {
  710. $pub_type = $pub['Publication Type'];
  711. }
  712. //----------------------
  713. // Journal Article
  714. //----------------------
  715. if ($pub_type == 'Journal Article') {
  716. if (array_key_exists('Authors', $pub)) {
  717. $citation = $pub['Authors'] . '. ';
  718. }
  719. $citation .= $pub['Title'] . '. ';
  720. if (array_key_exists('Journal Name', $pub)) {
  721. $citation .= $pub['Journal Name'] . '. ';
  722. }
  723. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  724. $citation .= $pub['Journal Abbreviation'] . '. ';
  725. }
  726. elseif (array_key_exists('Series Name', $pub)) {
  727. $citation .= $pub['Series Name'] . '. ';
  728. }
  729. elseif (array_key_exists('Series Abbreviation', $pub)) {
  730. $citation .= $pub['Series Abbreviation'] . '. ';
  731. }
  732. if (array_key_exists('Publication Date', $pub)) {
  733. $citation .= $pub['Publication Date'];
  734. }
  735. elseif (array_key_exists('Year', $pub)) {
  736. $citation .= $pub['Year'];
  737. }
  738. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  739. $citation .= '; ';
  740. }
  741. if (array_key_exists('Volume', $pub)) {
  742. $citation .= $pub['Volume'];
  743. }
  744. if (array_key_exists('Issue', $pub)) {
  745. $citation .= '(' . $pub['Issue'] . ')';
  746. }
  747. if (array_key_exists('Pages', $pub)) {
  748. if (array_key_exists('Volume', $pub)) {
  749. $citation .= ':';
  750. }
  751. $citation .= $pub['Pages'];
  752. }
  753. $citation .= '.';
  754. }
  755. //----------------------
  756. // Review
  757. //----------------------
  758. if ($pub_type == 'Review') {
  759. if (array_key_exists('Authors', $pub)) {
  760. $citation = $pub['Authors'] . '. ';
  761. }
  762. $citation .= $pub['Title'] . '. ';
  763. if (array_key_exists('Journal Name', $pub)) {
  764. $citation .= $pub['Journal Name'] . '. ';
  765. }
  766. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  767. $citation .= $pub['Journal Abbreviation'] . '. ';
  768. }
  769. elseif (array_key_exists('Series Name', $pub)) {
  770. $citation .= $pub['Series Name'] . '. ';
  771. }
  772. elseif (array_key_exists('Series Abbreviation', $pub)) {
  773. $citation .= $pub['Series Abbreviation'] . '. ';
  774. }
  775. if (array_key_exists('Publication Date', $pub)) {
  776. $citation .= $pub['Publication Date'];
  777. }
  778. elseif (array_key_exists('Year', $pub)) {
  779. $citation .= $pub['Year'];
  780. }
  781. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  782. $citation .= '; ';
  783. }
  784. if (array_key_exists('Volume', $pub)) {
  785. $citation .= $pub['Volume'];
  786. }
  787. if (array_key_exists('Issue', $pub)) {
  788. $citation .= '(' . $pub['Issue'] . ')';
  789. }
  790. if (array_key_exists('Pages', $pub)) {
  791. if (array_key_exists('Volume', $pub)) {
  792. $citation .= ':';
  793. }
  794. $citation .= $pub['Pages'];
  795. }
  796. $citation .= '.';
  797. }
  798. //----------------------
  799. // Research Support, Non-U.S. Gov't
  800. //----------------------
  801. elseif ($pub_type == "Research Support, Non-U.S. Gov't") {
  802. if (array_key_exists('Authors', $pub)) {
  803. $citation = $pub['Authors'] . '. ';
  804. }
  805. $citation .= $pub['Title'] . '. ';
  806. if (array_key_exists('Journal Name', $pub)) {
  807. $citation .= $pub['Journal Name'] . '. ';
  808. }
  809. if (array_key_exists('Publication Date', $pub)) {
  810. $citation .= $pub['Publication Date'];
  811. }
  812. elseif (array_key_exists('Year', $pub)) {
  813. $citation .= $pub['Year'];
  814. }
  815. $citation .= '.';
  816. }
  817. //----------------------
  818. // Letter
  819. //----------------------
  820. elseif ($pub_type == 'Letter') {
  821. if (array_key_exists('Authors', $pub)) {
  822. $citation = $pub['Authors'] . '. ';
  823. }
  824. $citation .= $pub['Title'] . '. ';
  825. if (array_key_exists('Journal Name', $pub)) {
  826. $citation .= $pub['Journal Name'] . '. ';
  827. }
  828. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  829. $citation .= $pub['Journal Abbreviation'] . '. ';
  830. }
  831. elseif (array_key_exists('Series Name', $pub)) {
  832. $citation .= $pub['Series Name'] . '. ';
  833. }
  834. elseif (array_key_exists('Series Abbreviation', $pub)) {
  835. $citation .= $pub['Series Abbreviation'] . '. ';
  836. }
  837. if (array_key_exists('Publication Date', $pub)) {
  838. $citation .= $pub['Publication Date'];
  839. }
  840. elseif (array_key_exists('Year', $pub)) {
  841. $citation .= $pub['Year'];
  842. }
  843. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  844. $citation .= '; ';
  845. }
  846. if (array_key_exists('Volume', $pub)) {
  847. $citation .= $pub['Volume'];
  848. }
  849. if (array_key_exists('Issue', $pub)) {
  850. $citation .= '(' . $pub['Issue'] . ')';
  851. }
  852. if (array_key_exists('Pages', $pub)) {
  853. if (array_key_exists('Volume', $pub)) {
  854. $citation .= ':';
  855. }
  856. $citation .= $pub['Pages'];
  857. }
  858. $citation .= '.';
  859. }
  860. //-----------------------
  861. // Conference Proceedings
  862. //-----------------------
  863. elseif ($pub_type == 'Conference Proceedings') {
  864. if (array_key_exists('Authors', $pub)) {
  865. $citation = $pub['Authors'] . '. ';
  866. }
  867. $citation .= $pub['Title'] . '. ';
  868. if (array_key_exists('Conference Name', $pub)) {
  869. $citation .= $pub['Conference Name'] . '. ';
  870. }
  871. elseif (array_key_exists('Series Name', $pub)) {
  872. $citation .= $pub['Series Name'] . '. ';
  873. }
  874. elseif (array_key_exists('Series Abbreviation', $pub)) {
  875. $citation .= $pub['Series Abbreviation'] . '. ';
  876. }
  877. if (array_key_exists('Publication Date', $pub)) {
  878. $citation .= $pub['Publication Date'];
  879. }
  880. elseif (array_key_exists('Year', $pub)) {
  881. $citation .= $pub['Year'];
  882. }
  883. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  884. $citation .= '; ';
  885. }
  886. if (array_key_exists('Volume', $pub)) {
  887. $citation .= $pub['Volume'];
  888. }
  889. if (array_key_exists('Issue', $pub)) {
  890. $citation .= '(' . $pub['Issue'] . ')';
  891. }
  892. if (array_key_exists('Pages', $pub)) {
  893. if (array_key_exists('Volume', $pub)) {
  894. $citation .= ':';
  895. }
  896. $citation .= $pub['Pages'];
  897. }
  898. $citation .= '.';
  899. }
  900. //-----------------------
  901. // Default
  902. //-----------------------
  903. else {
  904. if (array_key_exists('Authors', $pub)) {
  905. $citation = $pub['Authors'] . '. ';
  906. }
  907. $citation .= $pub['Title'] . '. ';
  908. if (array_key_exists('Series Name', $pub)) {
  909. $citation .= $pub['Series Name'] . '. ';
  910. }
  911. elseif (array_key_exists('Series Abbreviation', $pub)) {
  912. $citation .= $pub['Series Abbreviation'] . '. ';
  913. }
  914. if (array_key_exists('Publication Date', $pub)) {
  915. $citation .= $pub['Publication Date'];
  916. }
  917. elseif (array_key_exists('Year', $pub)) {
  918. $citation .= $pub['Year'];
  919. }
  920. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  921. $citation .= '; ';
  922. }
  923. if (array_key_exists('Volume', $pub)) {
  924. $citation .= $pub['Volume'];
  925. }
  926. if (array_key_exists('Issue', $pub)) {
  927. $citation .= '(' . $pub['Issue'] . ')';
  928. }
  929. if (array_key_exists('Pages', $pub)) {
  930. if (array_key_exists('Volume', $pub)) {
  931. $citation .= ':';
  932. }
  933. $citation .= $pub['Pages'];
  934. }
  935. $citation .= '.';
  936. }
  937. return $citation;
  938. }
  939. /**
  940. * Retrieves the minimal information to uniquely describe any publication.
  941. *
  942. * The returned array is an associative array where the keys are
  943. * the controlled vocabulary terms in the form [vocab]:[accession]
  944. *
  945. * @param $pub
  946. * A publication object as created by chado_generate_var()
  947. *
  948. * @return
  949. * An array with the following keys: 'Citation', 'Abstract', 'Authors',
  950. * 'URL'. All keys are term names in the Tripal Publication Ontology :TPUB.
  951. */
  952. function tripal_get_minimal_pub_info($pub) {
  953. if (!$pub) {
  954. return array();
  955. }
  956. // Chado has a null pub as default. We don't return anything for this.
  957. if ($pub->uniquename == 'null') {
  958. return array();
  959. }
  960. // expand the title
  961. $pub = chado_expand_var($pub, 'field', 'pub.title');
  962. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  963. // get the citation
  964. $values = array(
  965. 'pub_id' => $pub->pub_id,
  966. 'type_id' => array(
  967. 'name' => 'Citation',
  968. ),
  969. );
  970. $options = array(
  971. 'include_fk' => array(
  972. ),
  973. );
  974. $citation = chado_generate_var('pubprop', $values, $options);
  975. $citation = chado_expand_var($citation, 'field', 'pubprop.value');
  976. $citation = $citation->value;
  977. // get the abstract
  978. $values = array(
  979. 'pub_id' => $pub->pub_id,
  980. 'type_id' => array(
  981. 'name' => 'Abstract',
  982. ),
  983. );
  984. $options = array(
  985. 'include_fk' => array(
  986. ),
  987. );
  988. $abstract = chado_generate_var('pubprop', $values, $options);
  989. $abstract = chado_expand_var($abstract, 'field', 'pubprop.value');
  990. $abstract_text = '';
  991. if ($abstract) {
  992. $abstract_text = htmlspecialchars($abstract->value);
  993. }
  994. // Get the author list.
  995. $values = array(
  996. 'pub_id' => $pub->pub_id,
  997. 'type_id' => array(
  998. 'name' => 'Authors',
  999. ),
  1000. );
  1001. $options = array(
  1002. 'include_fk' => array(
  1003. ),
  1004. );
  1005. $authors = chado_generate_var('pubprop', $values, $options);
  1006. $authors = chado_expand_var($authors, 'field', 'pubprop.value');
  1007. $authors_list = 'N/A';
  1008. if ($authors) {
  1009. $authors_list = $authors->value;
  1010. }
  1011. // get the first database cross-reference with a url.
  1012. $options = array('return_array' => 1);
  1013. $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
  1014. $dbxref = NULL;
  1015. if ($pub->pub_dbxref) {
  1016. foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
  1017. if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
  1018. $dbxref = $pub_dbxref->dbxref_id;
  1019. }
  1020. }
  1021. }
  1022. // get the URL.
  1023. $values = array(
  1024. 'pub_id' => $pub->pub_id,
  1025. 'type_id' => array(
  1026. 'name' => 'URL',
  1027. ),
  1028. );
  1029. $options = array(
  1030. 'return_array' => 1,
  1031. 'include_fk' => array(),
  1032. );
  1033. $url = '';
  1034. $urls = chado_generate_var('pubprop', $values, $options);
  1035. if ($urls) {
  1036. $urls = chado_expand_var($urls, 'field', 'pubprop.value');
  1037. if (count($urls) > 0) {
  1038. $url = $urls[0]->value;
  1039. }
  1040. }
  1041. // Get the list of database cross references.
  1042. $values = array(
  1043. 'pub_id' => $pub->pub_id,
  1044. );
  1045. $options = array(
  1046. 'return_array' => 1,
  1047. );
  1048. $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
  1049. $dbxrefs = array();
  1050. foreach ($pub_dbxrefs as $pub_dbxref) {
  1051. $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
  1052. }
  1053. return array(
  1054. 'TPUB:0000039' => $pub->title,
  1055. 'TPUB:0000003' => $citation,
  1056. 'TPUB:0000050' => $abstract_text,
  1057. 'TPUB:0000047' => $authors_list,
  1058. 'TPUB:0000052' => $url,
  1059. 'SBO:0000554' => $dbxrefs,
  1060. );
  1061. }