tripal_chado.pub.api.inc 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Publication Module API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) to manage chado publications
  11. *
  12. * @stephen add documentation here for how to add a new importer.
  13. *
  14. * @}
  15. */
  16. /**
  17. * Retrieves a chado publication array
  18. *
  19. * @param $identifier
  20. * An array used to uniquely identify a publication. This array has the same
  21. * format as that used by the chado_generate_var(). The following keys can be
  22. * useful for uniquely identifying a publication as they should be unique:
  23. * - pub_id: the chado pub.pub_id primary key
  24. * - nid: the drupal nid of the publication
  25. * - uniquename: A value to matach with the pub.uniquename field
  26. * There are also some specially handled keys. They are:
  27. * - property: An array describing the property to select records for. It
  28. * should at least have either a 'type_name' key (if unique across cvs) or
  29. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  30. * 'value' and 'rank'
  31. * - dbxref: The database cross reference accession. It should be in the form
  32. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  33. * unique publication identifier (e.g. PMID:4382934)
  34. * - dbxref_id: The dbxref.dbxref_id of the publication.
  35. * @param $options
  36. * An array of options. Supported keys include:
  37. * - Any keys supported by chado_generate_var(). See that function definition for
  38. * additional details.
  39. *
  40. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  41. * chado_select_record(). It should fully specify the pub record to be returned.
  42. *
  43. * @return
  44. * If a singe publication is retreived using the identifiers, then a publication
  45. * array will be returned. The array is of the same format returned by the
  46. * chado_generate_var() function. Otherwise, FALSE will be returned.
  47. *
  48. * @ingroup tripal_pub_api
  49. */
  50. function tripal_get_publication($identifiers, $options = array()) {
  51. // Error Checking of parameters
  52. if (!is_array($identifiers)) {
  53. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  54. "chado_get_publication: The identifier passed in is expected to be an array with the key
  55. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  56. array('%identifier'=> print_r($identifiers, TRUE))
  57. );
  58. }
  59. elseif (empty($identifiers)) {
  60. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  61. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  62. is expected to be an array with the key matching a column name in the pub table
  63. (ie: pub_id or name). You passed in %identifier.",
  64. array('%identifier'=> print_r($identifiers, TRUE))
  65. );
  66. }
  67. // If one of the identifiers is property then use chado_get_record_with_property()
  68. if (array_key_exists('property', $identifiers)) {
  69. $property = $identifiers['property'];
  70. unset($identifiers['property']);
  71. $pub = chado_get_record_with_property(
  72. array('table' => 'pub', 'base_records' => $identifiers),
  73. array('type_name' => $property),
  74. $options
  75. );
  76. }
  77. elseif (array_key_exists('dbxref', $identifiers)) {
  78. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  79. $dbname = $matches[1];
  80. $accession = $matches[2];
  81. // First make sure the dbxref is present.
  82. $values = array(
  83. 'accession' => $accession,
  84. 'db_id' => array(
  85. 'name' => $dbname
  86. ),
  87. );
  88. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  89. if (count($dbxref) == 0) {
  90. return FALSE;
  91. }
  92. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  93. if (count($pub_dbxref) == 0) {
  94. return FALSE;
  95. }
  96. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  97. }
  98. else {
  99. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  100. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  101. array('%identifier'=> print_r($identifiers, TRUE))
  102. );
  103. }
  104. }
  105. elseif (array_key_exists('dbxref_id', $identifiers)) {
  106. // first get the pub_dbxref record
  107. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  108. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  109. // now get the pub
  110. if (count($pub_dbxref) > 0) {
  111. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  112. }
  113. else {
  114. return FALSE;
  115. }
  116. }
  117. // Else we have a simple case and we can just use chado_generate_var to get the pub
  118. else {
  119. // Try to get the pub
  120. $pub = chado_generate_var('pub', $identifiers, $options);
  121. }
  122. // Ensure the pub is singular. If it's an array then it is not singular
  123. if (is_array($pub)) {
  124. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  125. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  126. array('%identifier'=> print_r($identifiers, TRUE))
  127. );
  128. }
  129. // Report an error if $pub is FALSE since then chado_generate_var has failed
  130. elseif ($pub === FALSE) {
  131. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  132. "chado_get_publication: Could not find a publication using the identifiers
  133. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  134. array('%identifier'=> print_r($identifiers, TRUE))
  135. );
  136. }
  137. // Else, as far we know, everything is fine so give them their pub :)
  138. else {
  139. return $pub;
  140. }
  141. }
  142. /**
  143. * The publication table of Chado only has a unique constraint for the
  144. * uniquename of the publiation, but in reality a publication can be considered
  145. * unique by a combination of the title, publication type, published year and
  146. * series name (e.g. journal name or conference name). The site administrator
  147. * can configure how publications are determined to be unique. This function
  148. * uses the configuration specified by the administrator to look for publications
  149. * that match the details specified by the $pub_details argument
  150. * and indicates if one ore more publications match the criteria.
  151. *
  152. * @param $pub_details
  153. * An associative array with details about the publications. The expected keys
  154. * are:
  155. * 'Title': The title of the publication
  156. * 'Year': The published year of the publication
  157. * 'Publication Type': An array of publication types. A publication can have more than one type.
  158. * 'Series Name': The series name of the publication
  159. * 'Journal Name': An alternative to 'Series Name'
  160. * 'Conference Name': An alternative to 'Series Name'
  161. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  162. * If this key is present it will also be checked
  163. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  164. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  165. *
  166. * @return
  167. * An array containing the pub_id's of matching publications. Returns an
  168. * empty array if no pubs match
  169. *
  170. * @ingroup tripal_pub_api
  171. */
  172. function tripal_publication_exists($pub_details) {
  173. // first try to find the publication using the accession number if that key exists in the details array
  174. if (array_key_exists('Publication Dbxref', $pub_details)) {
  175. $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  176. if($pub) {
  177. return array($pub->pub_id);
  178. }
  179. }
  180. // make sure the citation is unique
  181. if (array_key_exists('Citation', $pub_details)) {
  182. $pub = tripal_get_publication(array('uniquename' => $pub_details['Citation']));
  183. if($pub) {
  184. return array($pub->pub_id);
  185. }
  186. }
  187. // get the publication type (use the first publication type)
  188. if (array_key_exists('Publication Type', $pub_details)) {
  189. $type_name = '';
  190. if(is_array($pub_details['Publication Type'])) {
  191. $type_name = $pub_details['Publication Type'][0];
  192. }
  193. else {
  194. $type_name = $pub_details['Publication Type'];
  195. }
  196. $identifiers = array(
  197. 'name' => $type_name,
  198. 'cv_id' => array(
  199. 'name' => 'tripal_pub',
  200. ),
  201. );
  202. $pub_type = tripal_get_cvterm($identifiers);
  203. }
  204. else {
  205. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  206. "tripal_publication_exists(): The Publication Type is a " .
  207. "required property but is missing", array());
  208. return array();
  209. }
  210. if (!$pub_type) {
  211. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  212. "tripal_publication_exists(): Cannot find publication type: '%type'",
  213. array('%type' => $pub_details['Publication Type'][0]));
  214. return array();
  215. }
  216. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  217. $series_name = '';
  218. if (array_key_exists('Series Name', $pub_details)) {
  219. $series_name = substr($pub_details['Series Name'], 0, 255);
  220. }
  221. if (array_key_exists('Journal Name', $pub_details)) {
  222. $series_name = substr($pub_details['Journal Name'], 0, 255);
  223. }
  224. if (array_key_exists('Conference Name', $pub_details)) {
  225. $series_name = substr($pub_details['Conference Name'], 0, 255);
  226. }
  227. // make sure the publication is unique using the prefereed import duplication check
  228. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  229. $pubs = array();
  230. switch ($import_dups_check) {
  231. case 'title_year':
  232. $identifiers = array(
  233. 'title' => $pub_details['Title'],
  234. 'pyear' => $pub_details['Year']
  235. );
  236. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  237. break;
  238. case 'title_year_type':
  239. $identifiers = array(
  240. 'title' => $pub_details['Title'],
  241. 'pyear' => $pub_details['Year'],
  242. 'type_id' => $pub_type->cvterm_id,
  243. );
  244. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  245. break;
  246. case 'title_year_media':
  247. $identifiers = array(
  248. 'title' => $pub_details['Title'],
  249. 'pyear' => $pub_details['Year'],
  250. 'series_name' => $series_name,
  251. );
  252. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  253. break;
  254. }
  255. $return = array();
  256. foreach ($pubs as $pub) {
  257. $return[] = $pub->pub_id;
  258. }
  259. return $return;
  260. }
  261. /**
  262. * Used for autocomplete in forms for identifying for publications.
  263. *
  264. * @param $field
  265. * The field in the publication to search on.
  266. * @param $string
  267. * The string to search for
  268. *
  269. * @return
  270. * A json array of terms that begin with the provided string
  271. *
  272. * @ingroup tripal_chado_api
  273. */
  274. function tripal_autocomplete_pub($string = '') {
  275. $items = array();
  276. $sql = "
  277. SELECT pub_id, title, uniquename
  278. FROM {pub}
  279. WHERE lower(title) like lower(:str)
  280. ORDER by title
  281. LIMIT 25 OFFSET 0
  282. ";
  283. $pubs = chado_query($sql, array(':str' => $string . '%'));
  284. while ($pub = $pubs->fetchObject()) {
  285. $val = $pub->title . " [id:" . $pub->pub_id . "]";
  286. $items[$val] = $pub->title;
  287. }
  288. drupal_json_output($items);
  289. }
  290. /**
  291. * Imports a singe publication specified by a remote database cross reference.
  292. *
  293. * @param $pub_dbxref
  294. * The unique database ID for the record to update. This value must
  295. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  296. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  297. * for the record in the database.
  298. * @param $do_contact
  299. * Set to TRUE if authors should automatically have a contact record added
  300. * to Chado.
  301. * @param $do_update
  302. * If set to TRUE then the publication will be updated if it already exists
  303. * in the database.
  304. *
  305. * @ingroup tripal_pub
  306. */
  307. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update = TRUE) {
  308. $num_to_retrieve = 1;
  309. $pager_id = 0;
  310. $page = 0;
  311. $num_pubs = 0;
  312. $pub_id = NULL;
  313. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.pub_importers');
  314. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  315. "If the load fails or is terminated prematurely then the entire set of \n" .
  316. "insertions/updates is rolled back and will not be found in the database\n\n";
  317. $transaction = db_transaction();
  318. try {
  319. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  320. $dbname = $matches[1];
  321. $accession = $matches[2];
  322. $criteria = array(
  323. 'num_criteria' => 1,
  324. 'remote_db' => $dbname,
  325. 'criteria' => array(
  326. '1' => array(
  327. 'search_terms' => "$dbname:$accession",
  328. 'scope' => 'id',
  329. 'operation' => '',
  330. 'is_phrase' => 0,
  331. ),
  332. ),
  333. );
  334. $remote_db = $criteria['remote_db'];
  335. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  336. $pubs = $results['pubs'];
  337. $search_str = $results['search_str'];
  338. $total_records = $results['total_records'];
  339. tripal_pub_add_publications($pubs, $do_contact, $do_update);
  340. }
  341. // For backwards compatibility check to see if the legacy pub module
  342. // is enabled. If so, then sync the nodes.
  343. if (module_exists('tripal_pub')) {
  344. // sync the newly added publications with Drupal
  345. print "Syncing publications with Drupal...\n";
  346. chado_node_sync_records('pub');
  347. // if any of the importers wanted to create contacts from the authors then sync them
  348. if($do_contact) {
  349. print "Syncing contacts with Drupal...\n";
  350. chado_node_sync_records('contact');
  351. }
  352. }
  353. }
  354. catch (Exception $e) {
  355. $transaction->rollback();
  356. print "\n"; // make sure we start errors on new line
  357. watchdog_exception('T_pub_import', $e);
  358. print "FAILED: Rolling back database changes...\n";
  359. return;
  360. }
  361. }
  362. /**
  363. * Imports all publications for all active import setups.
  364. *
  365. * @param $report_email
  366. * A list of email address, separated by commas, that should be notified
  367. * once importing has completed
  368. * @param $do_update
  369. * If set to TRUE then publications that already exist in the Chado database
  370. * will be updated, whereas if FALSE only new publications will be added
  371. *
  372. * @ingroup tripal_pub
  373. */
  374. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  375. $num_to_retrieve = 100;
  376. $page = 0;
  377. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  378. "If the load fails or is terminated prematurely then the entire set of \n" .
  379. "insertions/updates is rolled back and will not be found in the database\n\n";
  380. // start the transaction
  381. $transaction = db_transaction();
  382. try {
  383. // get all of the loaders
  384. $args = array();
  385. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  386. $results = db_query($sql, $args);
  387. $do_contact = FALSE;
  388. $reports = array();
  389. foreach ($results as $import) {
  390. $page = 0;
  391. print "Executing importer: '" . $import->name . "'\n";
  392. // keep track if any of the importers want to create contacts from authors
  393. if ($import->do_contact == 1) {
  394. $do_contact = TRUE;
  395. }
  396. $criteria = unserialize($import->criteria);
  397. $remote_db = $criteria['remote_db'];
  398. do {
  399. // retrieve the pubs for this page. We'll retreive 100 at a time
  400. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  401. $pubs = $results['pubs'];
  402. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  403. $page++;
  404. }
  405. // continue looping until we have a $pubs array that does not have
  406. // our requested numer of records. This means we've hit the end
  407. while (count($pubs) == $num_to_retrieve);
  408. }
  409. // sync the newly added publications with Drupal. If the user
  410. // requested a report then we don't want to print any syncing information
  411. // so pass 'FALSE' to the sync call
  412. // For backwards compatibility check to see if the legacy pub module
  413. // is enabled. If so, then sync the nodes.
  414. if (module_exists('tripal_pub')) {
  415. print "Syncing publications with Drupal...\n";
  416. chado_node_sync_records('pub');
  417. }
  418. // iterate through each of the reports and generate a final report with HTML links
  419. $HTML_report = '';
  420. if ($report_email) {
  421. $HTML_report .= "<html>";
  422. global $base_url;
  423. foreach ($reports as $importer => $report) {
  424. $total = count($report['inserted']);
  425. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  426. foreach ($report['inserted'] as $pub) {
  427. $item = $pub['Title'];
  428. if (array_key_exists('pub_id', $pub)) {
  429. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  430. }
  431. $HTML_report .= "<li>$item</li>\n";
  432. }
  433. $HTML_report .= "</ol>\n";
  434. }
  435. $HTML_report .= "</html>";
  436. $site_email = variable_get('site_mail', '');
  437. $params = array(
  438. 'message' => $HTML_report
  439. );
  440. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  441. }
  442. // For backwards compatibility check to see if the legacy pub module
  443. // is enabled. If so, then sync the nodes.
  444. if (module_exists('tripal_pub')) {
  445. // if any of the importers wanted to create contacts from the authors then sync them
  446. if($do_contact) {
  447. print "Syncing contacts with Drupal...\n";
  448. chado_node_sync_records('contact');
  449. }
  450. }
  451. }
  452. catch (Exception $e) {
  453. $transaction->rollback();
  454. print "\n"; // make sure we start errors on new line
  455. watchdog_exception('T_pub_import', $e);
  456. print "FAILED: Rolling back database changes...\n";
  457. return;
  458. }
  459. print "Done.\n";
  460. }
  461. /**
  462. * Updates publication records.
  463. *
  464. * Updates publication records that currently exist in the Chado pub table
  465. * with the most recent data in the remote database.
  466. *
  467. * @param $do_contact
  468. * Set to TRUE if authors should automatically have a contact record added
  469. * to Chado. Contacts are added using the name provided by the remote
  470. * database.
  471. * @param $dbxref
  472. * The unique database ID for the record to update. This value must
  473. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  474. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  475. * for the record in the database.
  476. * @param $db
  477. * The name of the remote database to update. If this value is provided and
  478. * no dbxref then all of the publications currently in the Chado database
  479. * for this remote database will be updated.
  480. *
  481. * @ingroup tripal_pub
  482. */
  483. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  484. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  485. "If the load fails or is terminated prematurely then the entire set of \n" .
  486. "insertions/updates is rolled back and will not be found in the database\n\n";
  487. $transaction = db_transaction();
  488. try {
  489. // get a list of all publications by their Dbxrefs that have supported databases
  490. $sql = "
  491. SELECT DB.name as db_name, DBX.accession
  492. FROM pub P
  493. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  494. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  495. INNER JOIN db DB ON DB.db_id = DBX.db_id
  496. ";
  497. $args = array();
  498. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  499. $dbname = $matches[1];
  500. $accession = $matches[2];
  501. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  502. $args[':accession'] = $accession;
  503. $args[':dbname'] = $dbname;
  504. }
  505. elseif ($db) {
  506. $sql .= " WHERE DB.name = :dbname ";
  507. $args[':dbname'] = $db;
  508. }
  509. $sql .= "ORDER BY DB.name, P.pub_id";
  510. $results = chado_query($sql, $args);
  511. $num_to_retrieve = 100;
  512. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  513. $curr_db = ''; // keeps track of the current current database
  514. $ids = array(); // the list of IDs for the database
  515. $search = array(); // the search array passed to the search function
  516. // iterate through the pub IDs
  517. while ($pub = $results->fetchObject()) {
  518. $accession = $pub->accession;
  519. $remote_db = $pub->db_name;
  520. // here we need to only update publications for databases we support
  521. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  522. if(!in_array($remote_db, $supported_dbs)) {
  523. continue;
  524. }
  525. $search = array(
  526. 'num_criteria' => 1,
  527. 'remote_db' => $remote_db,
  528. 'criteria' => array(
  529. '1' => array(
  530. 'search_terms' => "$remote_db:$accession",
  531. 'scope' => 'id',
  532. 'operation' => '',
  533. 'is_phrase' => 0,
  534. ),
  535. ),
  536. );
  537. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  538. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  539. $i++;
  540. }
  541. // For backwards compatibility check to see if the legacy pub module
  542. // is enabled. If so, then sync the nodes.
  543. if (module_exists('tripal_pub')) {
  544. // sync the newly added publications with Drupal
  545. print "Syncing publications with Drupal...\n";
  546. chado_node_sync_records('pub');
  547. // if the caller wants to create contacts then we should sync them
  548. if ($do_contact) {
  549. print "Syncing contacts with Drupal...\n";
  550. chado_node_sync_records('contact');
  551. }
  552. }
  553. }
  554. catch (Exception $e) {
  555. $transaction->rollback();
  556. print "\n"; // make sure we start errors on new line
  557. watchdog_exception('T_pub_import', $e);
  558. print "FAILED: Rolling back database changes...\n";
  559. return;
  560. }
  561. print "Done.\n";
  562. }
  563. /**
  564. * Launch the Tripal job to generate citations.
  565. *
  566. * This function will recreate citations for all publications currently
  567. * loaded into Tripal. This is useful to create a consistent format for
  568. * all citations.
  569. *
  570. * @param $options
  571. * Options pertaining to what publications to generate citations for.
  572. * One of the following must be present:
  573. * - all: Create and replace citation for all pubs
  574. * - new: Create citation for pubs that don't already have one
  575. *
  576. * @ingroup tripal_pub
  577. */
  578. function tripal_pub_create_citations($options) {
  579. $skip_existing = TRUE;
  580. $sql = "
  581. SELECT cvterm_id
  582. FROM {cvterm}
  583. WHERE
  584. name = 'Citation' AND
  585. cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal_pub')
  586. ";
  587. $citation_type_id = chado_query($sql)->fetchField();
  588. // Create and replace citation for all pubs
  589. if ($options == 'all') {
  590. $sql = "SELECT pub_id FROM {pub} P WHERE pub_id <> 1";
  591. $skip_existing = FALSE;
  592. }
  593. // Create citation for pubs that don't already have one
  594. else if ($options == 'new') {
  595. $sql = "
  596. SELECT pub_id
  597. FROM {pub} P
  598. WHERE
  599. (SELECT value
  600. FROM {pubprop} PB
  601. WHERE type_id = :type_id AND P.pub_id = PB.pub_id AND rank = 0) IS NULL
  602. AND pub_id <> 1
  603. ";
  604. $skip_existing = TRUE;
  605. }
  606. $result = chado_query($sql, array(':type_id' => $citation_type_id));
  607. $counter_updated = 0;
  608. $counter_generated = 0;
  609. while ($pub = $result->fetchObject()) {
  610. $pub_arr = tripal_pub_get_publication_array($pub->pub_id, $skip_existing);
  611. if ($pub_arr) {
  612. $citation = tripal_pub_create_citation($pub_arr);
  613. print $citation . "\n\n";
  614. // Replace if citation exists. This condition is never TRUE if $skip_existing is TRUE
  615. if ($pub_arr['Citation']) {
  616. $sql = "
  617. UPDATE {pubprop} SET value = :value
  618. WHERE pub_id = :pub_id AND type_id = :type_id AND rank = :rank
  619. ";
  620. chado_query($sql, array(':value' => $citation, ':pub_id' => $pub->pub_id,
  621. ':type_id' => $citation_type_id, ':rank' => 0));
  622. $counter_updated ++;
  623. // Generate a new citation
  624. } else {
  625. $sql = "
  626. INSERT INTO {pubprop} (pub_id, type_id, value, rank)
  627. VALUES (:pub_id, :type_id, :value, :rank)
  628. ";
  629. chado_query($sql, array(':pub_id' => $pub->pub_id, ':type_id' => $citation_type_id,
  630. ':value' => $citation, ':rank' => 0));
  631. $counter_generated ++;
  632. }
  633. }
  634. }
  635. print "$counter_generated citations generated. $counter_updated citations updated.\n";
  636. }
  637. /**
  638. * This function generates citations for publications. It requires
  639. * an array structure with keys being the terms in the Tripal
  640. * publication ontology. This function is intended to be used
  641. * for any function that needs to generate a citation.
  642. *
  643. * @param $pub
  644. * An array structure containing publication details where the keys
  645. * are the publication ontology term names and values are the
  646. * corresponding details. The pub array can contain the following
  647. * keys with corresponding values:
  648. * - Publication Type: an array of publication types. a publication can have more than one type
  649. * - Authors: a string containing all of the authors of a publication
  650. * - Journal Name: a string containing the journal name
  651. * - Journal Abbreviation: a string containing the journal name abbreviation
  652. * - Series Name: a string containing the series (e.g. conference proceedings) name
  653. * - Series Abbreviation: a string containing the series name abbreviation
  654. * - Volume: the serives volume number
  655. * - Issue: the series issue number
  656. * - Pages: the page numbers for the publication
  657. * - Publication Date: A date in the format "Year Month Day"
  658. *
  659. * @return
  660. * A text string containing the citation
  661. *
  662. * @ingroup tripal_pub
  663. */
  664. function tripal_pub_create_citation($pub) {
  665. $citation = '';
  666. $pub_type = '';
  667. // An article may have more than one publication type. For example,
  668. // a publication type can be 'Journal Article' but also a 'Clinical Trial'.
  669. // Therefore, we need to select the type that makes most sense for
  670. // construction of the citation. Here we'll iterate through them all
  671. // and select the one that matches best.
  672. if (is_array($pub['Publication Type'])) {
  673. foreach ($pub['Publication Type'] as $ptype) {
  674. if ($ptype == 'Journal Article' ) {
  675. $pub_type = $ptype;
  676. break;
  677. }
  678. else if ($ptype == 'Conference Proceedings'){
  679. $pub_type = $ptype;
  680. break;
  681. }
  682. else if ($ptype == 'Review') {
  683. $pub_type = $ptype;
  684. break;
  685. }
  686. else if ($ptype == 'Book') {
  687. $pub_type = $ptype;
  688. break;
  689. }
  690. else if ($ptype == 'Letter') {
  691. $pub_type = $ptype;
  692. break;
  693. }
  694. else if ($ptype == 'Book Chapter') {
  695. $pub_type = $ptype;
  696. break;
  697. }
  698. else if ($ptype == "Research Support, Non-U.S. Gov't") {
  699. $pub_type = $ptype;
  700. // we don't break because if the article is also a Journal Article
  701. // we prefer that type
  702. }
  703. }
  704. // If we don't have a recognized publication type, then just use the
  705. // first one in the list.
  706. if (!$pub_type) {
  707. $pub_type = $pub['Publication Type'][0];
  708. }
  709. }
  710. else {
  711. $pub_type = $pub['Publication Type'];
  712. }
  713. //----------------------
  714. // Journal Article
  715. //----------------------
  716. if ($pub_type == 'Journal Article') {
  717. if (array_key_exists('Authors', $pub)) {
  718. $citation = $pub['Authors'] . '. ';
  719. }
  720. $citation .= $pub['Title'] . '. ';
  721. if (array_key_exists('Journal Name', $pub)) {
  722. $citation .= $pub['Journal Name'] . '. ';
  723. }
  724. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  725. $citation .= $pub['Journal Abbreviation'] . '. ';
  726. }
  727. elseif (array_key_exists('Series Name', $pub)) {
  728. $citation .= $pub['Series Name'] . '. ';
  729. }
  730. elseif (array_key_exists('Series Abbreviation', $pub)) {
  731. $citation .= $pub['Series Abbreviation'] . '. ';
  732. }
  733. if (array_key_exists('Publication Date', $pub)) {
  734. $citation .= $pub['Publication Date'];
  735. }
  736. elseif (array_key_exists('Year', $pub)) {
  737. $citation .= $pub['Year'];
  738. }
  739. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  740. $citation .= '; ';
  741. }
  742. if (array_key_exists('Volume', $pub)) {
  743. $citation .= $pub['Volume'];
  744. }
  745. if (array_key_exists('Issue', $pub)) {
  746. $citation .= '(' . $pub['Issue'] . ')';
  747. }
  748. if (array_key_exists('Pages', $pub)) {
  749. if (array_key_exists('Volume', $pub)) {
  750. $citation .= ':';
  751. }
  752. $citation .= $pub['Pages'];
  753. }
  754. $citation .= '.';
  755. }
  756. //----------------------
  757. // Review
  758. //----------------------
  759. else if ($pub_type == 'Review') {
  760. if (array_key_exists('Authors', $pub)) {
  761. $citation = $pub['Authors'] . '. ';
  762. }
  763. $citation .= $pub['Title'] . '. ';
  764. if (array_key_exists('Journal Name', $pub)) {
  765. $citation .= $pub['Journal Name'] . '. ';
  766. }
  767. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  768. $citation .= $pub['Journal Abbreviation'] . '. ';
  769. }
  770. elseif (array_key_exists('Series Name', $pub)) {
  771. $citation .= $pub['Series Name'] . '. ';
  772. }
  773. elseif (array_key_exists('Series Abbreviation', $pub)) {
  774. $citation .= $pub['Series Abbreviation'] . '. ';
  775. }
  776. if (array_key_exists('Publication Date', $pub)) {
  777. $citation .= $pub['Publication Date'];
  778. }
  779. elseif (array_key_exists('Year', $pub)) {
  780. $citation .= $pub['Year'];
  781. }
  782. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  783. $citation .= '; ';
  784. }
  785. if (array_key_exists('Volume', $pub)) {
  786. $citation .= $pub['Volume'];
  787. }
  788. if (array_key_exists('Issue', $pub)) {
  789. $citation .= '(' . $pub['Issue'] . ')';
  790. }
  791. if (array_key_exists('Pages', $pub)) {
  792. if (array_key_exists('Volume', $pub)) {
  793. $citation .= ':';
  794. }
  795. $citation .= $pub['Pages'];
  796. }
  797. $citation .= '.';
  798. }
  799. //----------------------
  800. // Research Support, Non-U.S. Gov't
  801. //----------------------
  802. elseif ($pub_type == "Research Support, Non-U.S. Gov't") {
  803. if (array_key_exists('Authors', $pub)) {
  804. $citation = $pub['Authors'] . '. ';
  805. }
  806. $citation .= $pub['Title'] . '. ';
  807. if (array_key_exists('Journal Name', $pub)) {
  808. $citation .= $pub['Journal Name'] . '. ';
  809. }
  810. if (array_key_exists('Publication Date', $pub)) {
  811. $citation .= $pub['Publication Date'];
  812. }
  813. elseif (array_key_exists('Year', $pub)) {
  814. $citation .= $pub['Year'];
  815. }
  816. $citation .= '.';
  817. }
  818. //----------------------
  819. // Letter
  820. //----------------------
  821. elseif ($pub_type == 'Letter') {
  822. if (array_key_exists('Authors', $pub)) {
  823. $citation = $pub['Authors'] . '. ';
  824. }
  825. $citation .= $pub['Title'] . '. ';
  826. if (array_key_exists('Journal Name', $pub)) {
  827. $citation .= $pub['Journal Name'] . '. ';
  828. }
  829. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  830. $citation .= $pub['Journal Abbreviation'] . '. ';
  831. }
  832. elseif (array_key_exists('Series Name', $pub)) {
  833. $citation .= $pub['Series Name'] . '. ';
  834. }
  835. elseif (array_key_exists('Series Abbreviation', $pub)) {
  836. $citation .= $pub['Series Abbreviation'] . '. ';
  837. }
  838. if (array_key_exists('Publication Date', $pub)) {
  839. $citation .= $pub['Publication Date'];
  840. }
  841. elseif (array_key_exists('Year', $pub)) {
  842. $citation .= $pub['Year'];
  843. }
  844. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  845. $citation .= '; ';
  846. }
  847. if (array_key_exists('Volume', $pub)) {
  848. $citation .= $pub['Volume'];
  849. }
  850. if (array_key_exists('Issue', $pub)) {
  851. $citation .= '(' . $pub['Issue'] . ')';
  852. }
  853. if (array_key_exists('Pages', $pub)) {
  854. if (array_key_exists('Volume', $pub)) {
  855. $citation .= ':';
  856. }
  857. $citation .= $pub['Pages'];
  858. }
  859. $citation .= '.';
  860. }
  861. //-----------------------
  862. // Conference Proceedings
  863. //-----------------------
  864. elseif ($pub_type == 'Conference Proceedings') {
  865. if (array_key_exists('Authors', $pub)) {
  866. $citation = $pub['Authors'] . '. ';
  867. }
  868. $citation .= $pub['Title'] . '. ';
  869. if (array_key_exists('Conference Name', $pub)) {
  870. $citation .= $pub['Conference Name'] . '. ';
  871. }
  872. elseif (array_key_exists('Series Name', $pub)) {
  873. $citation .= $pub['Series Name'] . '. ';
  874. }
  875. elseif (array_key_exists('Series Abbreviation', $pub)) {
  876. $citation .= $pub['Series Abbreviation'] . '. ';
  877. }
  878. if (array_key_exists('Publication Date', $pub)) {
  879. $citation .= $pub['Publication Date'];
  880. }
  881. elseif (array_key_exists('Year', $pub)) {
  882. $citation .= $pub['Year'];
  883. }
  884. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  885. $citation .= '; ';
  886. }
  887. if (array_key_exists('Volume', $pub)) {
  888. $citation .= $pub['Volume'];
  889. }
  890. if (array_key_exists('Issue', $pub)) {
  891. $citation .= '(' . $pub['Issue'] . ')';
  892. }
  893. if (array_key_exists('Pages', $pub)) {
  894. if (array_key_exists('Volume', $pub)) {
  895. $citation .= ':';
  896. }
  897. $citation .= $pub['Pages'];
  898. }
  899. $citation .= '.';
  900. }
  901. //-----------------------
  902. // Default
  903. //-----------------------
  904. else {
  905. if (array_key_exists('Authors', $pub)) {
  906. $citation = $pub['Authors'] . '. ';
  907. }
  908. $citation .= $pub['Title'] . '. ';
  909. if (array_key_exists('Series Name', $pub)) {
  910. $citation .= $pub['Series Name'] . '. ';
  911. }
  912. elseif (array_key_exists('Series Abbreviation', $pub)) {
  913. $citation .= $pub['Series Abbreviation'] . '. ';
  914. }
  915. if (array_key_exists('Publication Date', $pub)) {
  916. $citation .= $pub['Publication Date'];
  917. }
  918. elseif (array_key_exists('Year', $pub)) {
  919. $citation .= $pub['Year'];
  920. }
  921. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  922. $citation .= '; ';
  923. }
  924. if (array_key_exists('Volume', $pub)) {
  925. $citation .= $pub['Volume'];
  926. }
  927. if (array_key_exists('Issue', $pub)) {
  928. $citation .= '(' . $pub['Issue'] . ')';
  929. }
  930. if (array_key_exists('Pages', $pub)) {
  931. if (array_key_exists('Volume', $pub)) {
  932. $citation .= ':';
  933. }
  934. $citation .= $pub['Pages'];
  935. }
  936. $citation .= '.';
  937. }
  938. return $citation;
  939. }
  940. /**
  941. * Retrieves the minimal information to uniquely describe any publication.
  942. *
  943. * The returned array is an associative array where the keys are
  944. * the controlled vocabulary terms in the form [vocab]:[accession]
  945. *
  946. * @param $pub
  947. * A publication object as created by chado_generate_var()
  948. *
  949. * @return
  950. * An array with the following keys: 'Citation', 'Abstract', 'Authors',
  951. * 'URL'. All keys are term names in the Tripal Publication Ontology :TPUB.
  952. */
  953. function tripal_get_minimal_pub_info($pub) {
  954. if (!$pub) {
  955. return array();
  956. }
  957. // Chado has a null pub as default. We don't return anything for this.
  958. if ($pub->uniquename == 'null') {
  959. return array();
  960. }
  961. // expand the title
  962. $pub = chado_expand_var($pub, 'field', 'pub.title');
  963. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  964. // get the abstract
  965. $values = array(
  966. 'pub_id' => $pub->pub_id,
  967. 'type_id' => array(
  968. 'name' => 'Abstract',
  969. ),
  970. );
  971. $options = array(
  972. 'include_fk' => array(
  973. ),
  974. );
  975. $abstract = chado_generate_var('pubprop', $values, $options);
  976. $abstract = chado_expand_var($abstract, 'field', 'pubprop.value');
  977. $abstract_text = '';
  978. if ($abstract) {
  979. $abstract_text = htmlspecialchars($abstract->value);
  980. }
  981. // Get the author list.
  982. $values = array(
  983. 'pub_id' => $pub->pub_id,
  984. 'type_id' => array(
  985. 'name' => 'Authors',
  986. ),
  987. );
  988. $options = array(
  989. 'include_fk' => array(
  990. ),
  991. );
  992. $authors = chado_generate_var('pubprop', $values, $options);
  993. $authors = chado_expand_var($authors, 'field', 'pubprop.value');
  994. $authors_list = 'N/A';
  995. if ($authors) {
  996. $authors_list = $authors->value;
  997. }
  998. // get the first database cross-reference with a url.
  999. $options = array('return_array' => 1);
  1000. $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
  1001. $dbxref = NULL;
  1002. if ($pub->pub_dbxref) {
  1003. foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
  1004. if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
  1005. $dbxref = $pub_dbxref->dbxref_id;
  1006. }
  1007. }
  1008. }
  1009. // get the URL.
  1010. $values = array(
  1011. 'pub_id' => $pub->pub_id,
  1012. 'type_id' => array(
  1013. 'name' => 'URL',
  1014. ),
  1015. );
  1016. $options = array(
  1017. 'return_array' => 1,
  1018. 'include_fk' => array(),
  1019. );
  1020. $url = '';
  1021. $urls = chado_generate_var('pubprop', $values, $options);
  1022. if ($urls) {
  1023. $urls = chado_expand_var($urls, 'field', 'pubprop.value');
  1024. if (count($urls) > 0) {
  1025. $url = $urls[0]->value;
  1026. }
  1027. }
  1028. // Get the list of database cross references.
  1029. $values = array(
  1030. 'pub_id' => $pub->pub_id,
  1031. );
  1032. $options = array(
  1033. 'return_array' => 1,
  1034. );
  1035. $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
  1036. $dbxrefs = array();
  1037. foreach ($pub_dbxrefs as $pub_dbxref) {
  1038. $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
  1039. }
  1040. // get the citation
  1041. $values = array(
  1042. 'pub_id' => $pub->pub_id,
  1043. 'type_id' => array(
  1044. 'name' => 'Citation',
  1045. ),
  1046. );
  1047. $options = array(
  1048. 'include_fk' => array(
  1049. ),
  1050. );
  1051. $citation = chado_generate_var('pubprop', $values, $options);
  1052. if ($citation) {
  1053. $citation = chado_expand_var($citation, 'field', 'pubprop.value');
  1054. $citation = $citation->value;
  1055. }
  1056. else {
  1057. $pub_info = array(
  1058. 'Title' => $pub->title,
  1059. 'Publication Type' => $pub->type_id->name,
  1060. 'Authors' => $authors_list,
  1061. 'Series Name' => $pub->series_name,
  1062. 'Volume' => $pub->volume,
  1063. 'Issue' => $pub->issue,
  1064. 'Pages' => $pub->pages,
  1065. 'Publication Date' => $pub->pyear,
  1066. );
  1067. $citation = tripal_pub_create_citation($pub_info);
  1068. }
  1069. return array(
  1070. 'TPUB:0000039' => $pub->title,
  1071. 'TPUB:0000003' => $citation,
  1072. 'TPUB:0000050' => $abstract_text,
  1073. 'TPUB:0000047' => $authors_list,
  1074. 'TPUB:0000052' => $url,
  1075. 'SBO:0000554' => $dbxrefs,
  1076. );
  1077. }