tripal_chado.pub.api.inc 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109
  1. <?php
  2. /**
  3. * @file
  4. * Provides an application programming interface (API) to manage chado publications
  5. */
  6. /**
  7. * @defgroup tripal_pub_api Publication Module API
  8. * @ingroup tripal_api
  9. * @{
  10. * Provides an application programming interface (API) to manage chado publications
  11. *
  12. * @stephen add documentation here for how to add a new importer.
  13. *
  14. * @}
  15. */
  16. /**
  17. * Retrieves a chado publication array
  18. *
  19. * @param $identifier
  20. * An array used to uniquely identify a publication. This array has the same
  21. * format as that used by the chado_generate_var(). The following keys can be
  22. * useful for uniquely identifying a publication as they should be unique:
  23. * - pub_id: the chado pub.pub_id primary key
  24. * - nid: the drupal nid of the publication
  25. * - uniquename: A value to matach with the pub.uniquename field
  26. * There are also some specially handled keys. They are:
  27. * - property: An array describing the property to select records for. It
  28. * should at least have either a 'type_name' key (if unique across cvs) or
  29. * 'type_id' key. Other supported keys include: 'cv_id', 'cv_name' (of the type),
  30. * 'value' and 'rank'
  31. * - dbxref: The database cross reference accession. It should be in the form
  32. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  33. * unique publication identifier (e.g. PMID:4382934)
  34. * - dbxref_id: The dbxref.dbxref_id of the publication.
  35. * @param $options
  36. * An array of options. Supported keys include:
  37. * - Any keys supported by chado_generate_var(). See that function definition for
  38. * additional details.
  39. *
  40. * NOTE: the $identifier parameter can really be any array similar to $values passed into
  41. * chado_select_record(). It should fully specify the pub record to be returned.
  42. *
  43. * @return
  44. * If a singe publication is retreived using the identifiers, then a publication
  45. * array will be returned. The array is of the same format returned by the
  46. * chado_generate_var() function. Otherwise, FALSE will be returned.
  47. *
  48. * @ingroup tripal_pub_api
  49. */
  50. function tripal_get_publication($identifiers, $options = array()) {
  51. // Error Checking of parameters
  52. if (!is_array($identifiers)) {
  53. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  54. "chado_get_publication: The identifier passed in is expected to be an array with the key
  55. matching a column name in the pub table (ie: pub_id or name). You passed in %identifier.",
  56. array('%identifier'=> print_r($identifiers, TRUE))
  57. );
  58. }
  59. elseif (empty($identifiers)) {
  60. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  61. "chado_get_publication: You did not pass in anything to identify the publication you want. The identifier
  62. is expected to be an array with the key matching a column name in the pub table
  63. (ie: pub_id or name). You passed in %identifier.",
  64. array('%identifier'=> print_r($identifiers, TRUE))
  65. );
  66. }
  67. // If one of the identifiers is property then use chado_get_record_with_property()
  68. if (array_key_exists('property', $identifiers)) {
  69. $property = $identifiers['property'];
  70. unset($identifiers['property']);
  71. $pub = chado_get_record_with_property(
  72. array('table' => 'pub', 'base_records' => $identifiers),
  73. array('type_name' => $property),
  74. $options
  75. );
  76. }
  77. elseif (array_key_exists('dbxref', $identifiers)) {
  78. if(preg_match('/^(.*?):(.*?)$/', $identifiers['dbxref'], $matches)) {
  79. $dbname = $matches[1];
  80. $accession = $matches[2];
  81. // First make sure the dbxref is present.
  82. $values = array(
  83. 'accession' => $accession,
  84. 'db_id' => array(
  85. 'name' => $dbname
  86. ),
  87. );
  88. $dbxref = chado_select_record('dbxref', array('dbxref_id'), $values);
  89. if (count($dbxref) == 0) {
  90. return FALSE;
  91. }
  92. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), array('dbxref_id' => $dbxref[0]->dbxref_id));
  93. if (count($pub_dbxref) == 0) {
  94. return FALSE;
  95. }
  96. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  97. }
  98. else {
  99. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  100. "chado_get_publication: The dbxref identifier is not correctly formatted.",
  101. array('%identifier'=> print_r($identifiers, TRUE))
  102. );
  103. }
  104. }
  105. elseif (array_key_exists('dbxref_id', $identifiers)) {
  106. // first get the pub_dbxref record
  107. $values = array('dbxref_id' => $identifiers['dbxref_id']);
  108. $pub_dbxref = chado_select_record('pub_dbxref', array('pub_id'), $values);
  109. // now get the pub
  110. if (count($pub_dbxref) > 0) {
  111. $pub = chado_generate_var('pub', array('pub_id' => $pub_dbxref[0]->pub_id), $options);
  112. }
  113. else {
  114. return FALSE;
  115. }
  116. }
  117. // Else we have a simple case and we can just use chado_generate_var to get the pub
  118. else {
  119. // Try to get the pub
  120. $pub = chado_generate_var('pub', $identifiers, $options);
  121. }
  122. // Ensure the pub is singular. If it's an array then it is not singular
  123. if (is_array($pub)) {
  124. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  125. "chado_get_publication: The identifiers did not find a single unique record. Identifiers passed: %identifier.",
  126. array('%identifier'=> print_r($identifiers, TRUE))
  127. );
  128. }
  129. // Report an error if $pub is FALSE since then chado_generate_var has failed
  130. elseif ($pub === FALSE) {
  131. tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
  132. "chado_get_publication: Could not find a publication using the identifiers
  133. provided. Check that the identifiers are correct. Identifiers passed: %identifier.",
  134. array('%identifier'=> print_r($identifiers, TRUE))
  135. );
  136. }
  137. // Else, as far we know, everything is fine so give them their pub :)
  138. else {
  139. return $pub;
  140. }
  141. }
  142. /**
  143. * The publication table of Chado only has a unique constraint for the
  144. * uniquename of the publiation, but in reality a publication can be considered
  145. * unique by a combination of the title, publication type, published year and
  146. * series name (e.g. journal name or conference name). The site administrator
  147. * can configure how publications are determined to be unique. This function
  148. * uses the configuration specified by the administrator to look for publications
  149. * that match the details specified by the $pub_details argument
  150. * and indicates if one ore more publications match the criteria.
  151. *
  152. * @param $pub_details
  153. * An associative array with details about the publications. The expected keys
  154. * are:
  155. * 'Title': The title of the publication
  156. * 'Year': The published year of the publication
  157. * 'Publication Type': An array of publication types. A publication can have more than one type.
  158. * 'Series Name': The series name of the publication
  159. * 'Journal Name': An alternative to 'Series Name'
  160. * 'Conference Name': An alternative to 'Series Name'
  161. * 'Citation': The publication citation (this is the value saved in the pub.uniquename field and must be unique)
  162. * If this key is present it will also be checked
  163. * 'Publication Dbxref': A database cross reference of the form DB:ACCESSION where DB is the name
  164. * of the database and ACCESSION is the unique identifier (e.g PMID:3483139)
  165. *
  166. * @return
  167. * An array containing the pub_id's of matching publications. Returns an
  168. * empty array if no pubs match
  169. *
  170. * @ingroup tripal_pub_api
  171. */
  172. function tripal_publication_exists($pub_details) {
  173. // first try to find the publication using the accession number if that key exists in the details array
  174. if (array_key_exists('Publication Dbxref', $pub_details)) {
  175. $pub = tripal_get_publication(array('dbxref' => $pub_details['Publication Dbxref']));
  176. if($pub) {
  177. return array($pub->pub_id);
  178. }
  179. }
  180. // make sure the citation is unique
  181. if (array_key_exists('Citation', $pub_details)) {
  182. $pub = tripal_get_publication(array('uniquename' => $pub_details['Citation']));
  183. if($pub) {
  184. return array($pub->pub_id);
  185. }
  186. }
  187. // get the publication type (use the first publication type)
  188. if (array_key_exists('Publication Type', $pub_details)) {
  189. $type_name = '';
  190. if(is_array($pub_details['Publication Type'])) {
  191. $type_name = $pub_details['Publication Type'][0];
  192. }
  193. else {
  194. $type_name = $pub_details['Publication Type'];
  195. }
  196. $identifiers = array(
  197. 'name' => $type_name,
  198. 'cv_id' => array(
  199. 'name' => 'tripal_pub',
  200. ),
  201. );
  202. $pub_type = tripal_get_cvterm($identifiers);
  203. }
  204. else {
  205. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  206. "tripal_publication_exists(): The Publication Type is a " .
  207. "required property but is missing", array());
  208. return array();
  209. }
  210. if (!$pub_type) {
  211. tripal_report_error('tripal_pub', TRIPAL_ERROR,
  212. "tripal_publication_exists(): Cannot find publication type: '%type'",
  213. array('%type' => $pub_details['Publication Type'][0]));
  214. return array();
  215. }
  216. // get the series name. The pub.series_name field is only 255 chars so we must truncate to be safe
  217. $series_name = '';
  218. if (array_key_exists('Series Name', $pub_details)) {
  219. $series_name = substr($pub_details['Series Name'], 0, 255);
  220. }
  221. if (array_key_exists('Journal Name', $pub_details)) {
  222. $series_name = substr($pub_details['Journal Name'], 0, 255);
  223. }
  224. if (array_key_exists('Conference Name', $pub_details)) {
  225. $series_name = substr($pub_details['Conference Name'], 0, 255);
  226. }
  227. // make sure the publication is unique using the prefereed import duplication check
  228. $import_dups_check = variable_get('tripal_pub_import_duplicate_check', 'title_year_media');
  229. $pubs = array();
  230. switch ($import_dups_check) {
  231. case 'title_year':
  232. $identifiers = array(
  233. 'title' => $pub_details['Title'],
  234. 'pyear' => $pub_details['Year']
  235. );
  236. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  237. break;
  238. case 'title_year_type':
  239. $identifiers = array(
  240. 'title' => $pub_details['Title'],
  241. 'pyear' => $pub_details['Year'],
  242. 'type_id' => $pub_type->cvterm_id,
  243. );
  244. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  245. break;
  246. case 'title_year_media':
  247. $identifiers = array(
  248. 'title' => $pub_details['Title'],
  249. 'pyear' => $pub_details['Year'],
  250. 'series_name' => $series_name,
  251. );
  252. $pubs = chado_select_record('pub', array('pub_id'), $identifiers);
  253. break;
  254. }
  255. $return = array();
  256. foreach ($pubs as $pub) {
  257. $return[] = $pub->pub_id;
  258. }
  259. return $return;
  260. }
  261. /**
  262. * Used for autocomplete in forms for identifying for publications.
  263. *
  264. * @param $field
  265. * The field in the publication to search on.
  266. * @param $string
  267. * The string to search for
  268. *
  269. * @return
  270. * A json array of terms that begin with the provided string
  271. *
  272. * @ingroup tripal_chado_api
  273. */
  274. function tripal_autocomplete_pub($string = '') {
  275. $items = array();
  276. $sql = "
  277. SELECT pub_id, title, uniquename
  278. FROM {pub}
  279. WHERE lower(title) like lower(:str)
  280. ORDER by title
  281. LIMIT 25 OFFSET 0
  282. ";
  283. $pubs = chado_query($sql, array(':str' => $string . '%'));
  284. while ($pub = $pubs->fetchObject()) {
  285. $items[$pub->uniquename] = $pub->uniquename;
  286. }
  287. drupal_json_output($items);
  288. }
  289. /**
  290. * Imports a singe publication specified by a remote database cross reference.
  291. *
  292. * @param $pub_dbxref
  293. * The unique database ID for the record to update. This value must
  294. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  295. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  296. * for the record in the database.
  297. * @param $do_contact
  298. * Set to TRUE if authors should automatically have a contact record added
  299. * to Chado.
  300. * @param $do_update
  301. * If set to TRUE then the publication will be updated if it already exists
  302. * in the database.
  303. *
  304. * @ingroup tripal_pub
  305. */
  306. function tripal_import_pub_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
  307. $num_to_retrieve = 1;
  308. $pager_id = 0;
  309. $page = 0;
  310. $num_pubs = 0;
  311. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  312. "If the load fails or is terminated prematurely then the entire set of \n" .
  313. "insertions/updates is rolled back and will not be found in the database\n\n";
  314. $transaction = db_transaction();
  315. try {
  316. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  317. $dbname = $matches[1];
  318. $accession = $matches[2];
  319. $criteria = array(
  320. 'num_criteria' => 1,
  321. 'remote_db' => $dbname,
  322. 'criteria' => array(
  323. '1' => array(
  324. 'search_terms' => "$dbname:$accession",
  325. 'scope' => 'id',
  326. 'operation' => '',
  327. 'is_phrase' => 0,
  328. ),
  329. ),
  330. );
  331. $remote_db = $criteria['remote_db'];
  332. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  333. $pubs = $results['pubs'];
  334. $search_str = $results['search_str'];
  335. $total_records = $results['total_records'];
  336. $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
  337. }
  338. // For backwards compatibility check to see if the legacy pub module
  339. // is enabled. If so, then sync the nodes.
  340. if (module_exists('tripal_pub')) {
  341. // sync the newly added publications with Drupal
  342. print "Syncing publications with Drupal...\n";
  343. chado_node_sync_records('pub');
  344. // if any of the importers wanted to create contacts from the authors then sync them
  345. if($do_contact) {
  346. print "Syncing contacts with Drupal...\n";
  347. chado_node_sync_records('contact');
  348. }
  349. }
  350. }
  351. catch (Exception $e) {
  352. $transaction->rollback();
  353. print "\n"; // make sure we start errors on new line
  354. watchdog_exception('T_pub_import', $e);
  355. print "FAILED: Rolling back database changes...\n";
  356. return;
  357. }
  358. print "Done.\n";
  359. }
  360. /**
  361. * Imports all publications for all active import setups.
  362. *
  363. * @param $report_email
  364. * A list of email address, separated by commas, that should be notified
  365. * once importing has completed
  366. * @param $do_update
  367. * If set to TRUE then publications that already exist in the Chado database
  368. * will be updated, whereas if FALSE only new publications will be added
  369. *
  370. * @ingroup tripal_pub
  371. */
  372. function tripal_execute_active_pub_importers($report_email = FALSE, $do_update = FALSE) {
  373. $num_to_retrieve = 100;
  374. $page = 0;
  375. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  376. "If the load fails or is terminated prematurely then the entire set of \n" .
  377. "insertions/updates is rolled back and will not be found in the database\n\n";
  378. // start the transaction
  379. $transaction = db_transaction();
  380. try {
  381. // get all of the loaders
  382. $args = array();
  383. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  384. $results = db_query($sql, $args);
  385. $do_contact = FALSE;
  386. $reports = array();
  387. foreach ($results as $import) {
  388. $page = 0;
  389. print "Executing importer: '" . $import->name . "'\n";
  390. // keep track if any of the importers want to create contacts from authors
  391. if ($import->do_contact == 1) {
  392. $do_contact = TRUE;
  393. }
  394. $criteria = unserialize($import->criteria);
  395. $remote_db = $criteria['remote_db'];
  396. do {
  397. // retrieve the pubs for this page. We'll retreive 100 at a time
  398. $results = tripal_get_remote_pubs($remote_db, $criteria, $num_to_retrieve, $page);
  399. $pubs = $results['pubs'];
  400. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  401. $page++;
  402. }
  403. // continue looping until we have a $pubs array that does not have
  404. // our requested numer of records. This means we've hit the end
  405. while (count($pubs) == $num_to_retrieve);
  406. }
  407. // sync the newly added publications with Drupal. If the user
  408. // requested a report then we don't want to print any syncing information
  409. // so pass 'FALSE' to the sync call
  410. // For backwards compatibility check to see if the legacy pub module
  411. // is enabled. If so, then sync the nodes.
  412. if (module_exists('tripal_pub')) {
  413. print "Syncing publications with Drupal...\n";
  414. chado_node_sync_records('pub');
  415. }
  416. // iterate through each of the reports and generate a final report with HTML links
  417. $HTML_report = '';
  418. if ($report_email) {
  419. $HTML_report .= "<html>";
  420. global $base_url;
  421. foreach ($reports as $importer => $report) {
  422. $total = count($report['inserted']);
  423. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  424. foreach ($report['inserted'] as $pub) {
  425. $item = $pub['Title'];
  426. if (array_key_exists('pub_id', $pub)) {
  427. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  428. }
  429. $HTML_report .= "<li>$item</li>\n";
  430. }
  431. $HTML_report .= "</ol>\n";
  432. }
  433. $HTML_report .= "</html>";
  434. $site_email = variable_get('site_mail', '');
  435. $params = array(
  436. 'message' => $HTML_report
  437. );
  438. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  439. }
  440. // For backwards compatibility check to see if the legacy pub module
  441. // is enabled. If so, then sync the nodes.
  442. if (module_exists('tripal_pub')) {
  443. // if any of the importers wanted to create contacts from the authors then sync them
  444. if($do_contact) {
  445. print "Syncing contacts with Drupal...\n";
  446. chado_node_sync_records('contact');
  447. }
  448. }
  449. }
  450. catch (Exception $e) {
  451. $transaction->rollback();
  452. print "\n"; // make sure we start errors on new line
  453. watchdog_exception('T_pub_import', $e);
  454. print "FAILED: Rolling back database changes...\n";
  455. return;
  456. }
  457. print "Done.\n";
  458. }
  459. /**
  460. * Updates publication records.
  461. *
  462. * Updates publication records that currently exist in the Chado pub table
  463. * with the most recent data in the remote database.
  464. *
  465. * @param $do_contact
  466. * Set to TRUE if authors should automatically have a contact record added
  467. * to Chado. Contacts are added using the name provided by the remote
  468. * database.
  469. * @param $dbxref
  470. * The unique database ID for the record to update. This value must
  471. * be of the format DB_NAME:ACCESSION where DB_NAME is the name of the
  472. * database (e.g. PMID or AGL) and the ACCESSION is the unique identifier
  473. * for the record in the database.
  474. * @param $db
  475. * The name of the remote database to update. If this value is provided and
  476. * no dbxref then all of the publications currently in the Chado database
  477. * for this remote database will be updated.
  478. *
  479. * @ingroup tripal_pub
  480. */
  481. function tripal_reimport_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  482. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  483. "If the load fails or is terminated prematurely then the entire set of \n" .
  484. "insertions/updates is rolled back and will not be found in the database\n\n";
  485. $transaction = db_transaction();
  486. try {
  487. // get a list of all publications by their Dbxrefs that have supported databases
  488. $sql = "
  489. SELECT DB.name as db_name, DBX.accession
  490. FROM pub P
  491. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  492. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  493. INNER JOIN db DB ON DB.db_id = DBX.db_id
  494. ";
  495. $args = array();
  496. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  497. $dbname = $matches[1];
  498. $accession = $matches[2];
  499. $sql .= "WHERE DBX.accession = :accession and DB.name = :dbname ";
  500. $args[':accession'] = $accession;
  501. $args[':dbname'] = $dbname;
  502. }
  503. elseif ($db) {
  504. $sql .= " WHERE DB.name = :dbname ";
  505. $args[':dbname'] = $db;
  506. }
  507. $sql .= "ORDER BY DB.name, P.pub_id";
  508. $results = chado_query($sql, $args);
  509. $num_to_retrieve = 100;
  510. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  511. $curr_db = ''; // keeps track of the current current database
  512. $ids = array(); // the list of IDs for the database
  513. $search = array(); // the search array passed to the search function
  514. // iterate through the pub IDs
  515. while ($pub = $results->fetchObject()) {
  516. $accession = $pub->accession;
  517. $remote_db = $pub->db_name;
  518. // here we need to only update publications for databases we support
  519. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  520. if(!in_array($remote_db, $supported_dbs)) {
  521. continue;
  522. }
  523. $search = array(
  524. 'num_criteria' => 1,
  525. 'remote_db' => $remote_db,
  526. 'criteria' => array(
  527. '1' => array(
  528. 'search_terms' => "$remote_db:$accession",
  529. 'scope' => 'id',
  530. 'operation' => '',
  531. 'is_phrase' => 0,
  532. ),
  533. ),
  534. );
  535. $pubs = tripal_get_remote_pubs($remote_db, $search, 1, 0);
  536. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  537. $i++;
  538. }
  539. // For backwards compatibility check to see if the legacy pub module
  540. // is enabled. If so, then sync the nodes.
  541. if (module_exists('tripal_pub')) {
  542. // sync the newly added publications with Drupal
  543. print "Syncing publications with Drupal...\n";
  544. chado_node_sync_records('pub');
  545. // if the caller wants to create contacts then we should sync them
  546. if ($do_contact) {
  547. print "Syncing contacts with Drupal...\n";
  548. chado_node_sync_records('contact');
  549. }
  550. }
  551. }
  552. catch (Exception $e) {
  553. $transaction->rollback();
  554. print "\n"; // make sure we start errors on new line
  555. watchdog_exception('T_pub_import', $e);
  556. print "FAILED: Rolling back database changes...\n";
  557. return;
  558. }
  559. print "Done.\n";
  560. }
  561. /**
  562. * Launch the Tripal job to generate citations.
  563. *
  564. * This function will recreate citations for all publications currently
  565. * loaded into Tripal. This is useful to create a consistent format for
  566. * all citations.
  567. *
  568. * @param $options
  569. * Options pertaining to what publications to generate citations for.
  570. * One of the following must be present:
  571. * - all: Create and replace citation for all pubs
  572. * - new: Create citation for pubs that don't already have one
  573. *
  574. * @ingroup tripal_pub
  575. */
  576. function tripal_pub_create_citations($options) {
  577. $skip_existing = TRUE;
  578. $sql = "
  579. SELECT cvterm_id
  580. FROM {cvterm}
  581. WHERE
  582. name = 'Citation' AND
  583. cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal_pub')
  584. ";
  585. $citation_type_id = chado_query($sql)->fetchField();
  586. // Create and replace citation for all pubs
  587. if ($options == 'all') {
  588. $sql = "SELECT pub_id FROM {pub} P WHERE pub_id <> 1";
  589. $skip_existing = FALSE;
  590. }
  591. // Create citation for pubs that don't already have one
  592. else if ($options == 'new') {
  593. $sql = "
  594. SELECT pub_id
  595. FROM {pub} P
  596. WHERE
  597. (SELECT value
  598. FROM {pubprop} PB
  599. WHERE type_id = :type_id AND P.pub_id = PB.pub_id AND rank = 0) IS NULL
  600. AND pub_id <> 1
  601. ";
  602. $skip_existing = TRUE;
  603. }
  604. $result = chado_query($sql, array(':type_id' => $citation_type_id));
  605. $counter_updated = 0;
  606. $counter_generated = 0;
  607. while ($pub = $result->fetchObject()) {
  608. $pub_arr = tripal_pub_get_publication_array($pub->pub_id, $skip_existing);
  609. if ($pub_arr) {
  610. $citation = tripal_pub_create_citation($pub_arr);
  611. print $citation . "\n\n";
  612. // Replace if citation exists. This condition is never TRUE if $skip_existing is TRUE
  613. if ($pub_arr['Citation']) {
  614. $sql = "
  615. UPDATE {pubprop} SET value = :value
  616. WHERE pub_id = :pub_id AND type_id = :type_id AND rank = :rank
  617. ";
  618. chado_query($sql, array(':value' => $citation, ':pub_id' => $pub->pub_id,
  619. ':type_id' => $citation_type_id, ':rank' => 0));
  620. $counter_updated ++;
  621. // Generate a new citation
  622. } else {
  623. $sql = "
  624. INSERT INTO {pubprop} (pub_id, type_id, value, rank)
  625. VALUES (:pub_id, :type_id, :value, :rank)
  626. ";
  627. chado_query($sql, array(':pub_id' => $pub->pub_id, ':type_id' => $citation_type_id,
  628. ':value' => $citation, ':rank' => 0));
  629. $counter_generated ++;
  630. }
  631. }
  632. }
  633. print "$counter_generated citations generated. $counter_updated citations updated.\n";
  634. }
  635. /**
  636. * This function generates citations for publications. It requires
  637. * an array structure with keys being the terms in the Tripal
  638. * publication ontology. This function is intended to be used
  639. * for any function that needs to generate a citation.
  640. *
  641. * @param $pub
  642. * An array structure containing publication details where the keys
  643. * are the publication ontology term names and values are the
  644. * corresponding details. The pub array can contain the following
  645. * keys with corresponding values:
  646. * - Publication Type: an array of publication types. a publication can have more than one type
  647. * - Authors: a string containing all of the authors of a publication
  648. * - Journal Name: a string containing the journal name
  649. * - Journal Abbreviation: a string containing the journal name abbreviation
  650. * - Series Name: a string containing the series (e.g. conference proceedings) name
  651. * - Series Abbreviation: a string containing the series name abbreviation
  652. * - Volume: the serives volume number
  653. * - Issue: the series issue number
  654. * - Pages: the page numbers for the publication
  655. * - Publication Date: A date in the format "Year Month Day"
  656. *
  657. * @return
  658. * A text string containing the citation
  659. *
  660. * @ingroup tripal_pub
  661. */
  662. function tripal_pub_create_citation($pub) {
  663. $citation = '';
  664. $pub_type = '';
  665. // An article may have more than one publication type. For example,
  666. // a publication type can be 'Journal Article' but also a 'Clinical Trial'.
  667. // Therefore, we need to select the type that makes most sense for
  668. // construction of the citation. Here we'll iterate through them all
  669. // and select the one that matches best.
  670. if (is_array($pub['Publication Type'])) {
  671. foreach ($pub['Publication Type'] as $ptype) {
  672. if ($ptype == 'Journal Article' ) {
  673. $pub_type = $ptype;
  674. break;
  675. }
  676. else if ($ptype == 'Conference Proceedings'){
  677. $pub_type = $ptype;
  678. break;
  679. }
  680. else if ($ptype == 'Review') {
  681. $pub_type = $ptype;
  682. break;
  683. }
  684. else if ($ptype == 'Book') {
  685. $pub_type = $ptype;
  686. break;
  687. }
  688. else if ($ptype == 'Letter') {
  689. $pub_type = $ptype;
  690. break;
  691. }
  692. else if ($ptype == 'Book Chapter') {
  693. $pub_type = $ptype;
  694. break;
  695. }
  696. else if ($ptype == "Research Support, Non-U.S. Gov't") {
  697. $pub_type = $ptype;
  698. // we don't break because if the article is also a Journal Article
  699. // we prefer that type
  700. }
  701. }
  702. // If we don't have a recognized publication type, then just use the
  703. // first one in the list.
  704. if (!$pub_type) {
  705. $pub_type = $pub['Publication Type'][0];
  706. }
  707. }
  708. else {
  709. $pub_type = $pub['Publication Type'];
  710. }
  711. //----------------------
  712. // Journal Article
  713. //----------------------
  714. if ($pub_type == 'Journal Article') {
  715. if (array_key_exists('Authors', $pub)) {
  716. $citation = $pub['Authors'] . '. ';
  717. }
  718. $citation .= $pub['Title'] . '. ';
  719. if (array_key_exists('Journal Name', $pub)) {
  720. $citation .= $pub['Journal Name'] . '. ';
  721. }
  722. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  723. $citation .= $pub['Journal Abbreviation'] . '. ';
  724. }
  725. elseif (array_key_exists('Series Name', $pub)) {
  726. $citation .= $pub['Series Name'] . '. ';
  727. }
  728. elseif (array_key_exists('Series Abbreviation', $pub)) {
  729. $citation .= $pub['Series Abbreviation'] . '. ';
  730. }
  731. if (array_key_exists('Publication Date', $pub)) {
  732. $citation .= $pub['Publication Date'];
  733. }
  734. elseif (array_key_exists('Year', $pub)) {
  735. $citation .= $pub['Year'];
  736. }
  737. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  738. $citation .= '; ';
  739. }
  740. if (array_key_exists('Volume', $pub)) {
  741. $citation .= $pub['Volume'];
  742. }
  743. if (array_key_exists('Issue', $pub)) {
  744. $citation .= '(' . $pub['Issue'] . ')';
  745. }
  746. if (array_key_exists('Pages', $pub)) {
  747. if (array_key_exists('Volume', $pub)) {
  748. $citation .= ':';
  749. }
  750. $citation .= $pub['Pages'];
  751. }
  752. $citation .= '.';
  753. }
  754. //----------------------
  755. // Review
  756. //----------------------
  757. if ($pub_type == 'Review') {
  758. if (array_key_exists('Authors', $pub)) {
  759. $citation = $pub['Authors'] . '. ';
  760. }
  761. $citation .= $pub['Title'] . '. ';
  762. if (array_key_exists('Journal Name', $pub)) {
  763. $citation .= $pub['Journal Name'] . '. ';
  764. }
  765. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  766. $citation .= $pub['Journal Abbreviation'] . '. ';
  767. }
  768. elseif (array_key_exists('Series Name', $pub)) {
  769. $citation .= $pub['Series Name'] . '. ';
  770. }
  771. elseif (array_key_exists('Series Abbreviation', $pub)) {
  772. $citation .= $pub['Series Abbreviation'] . '. ';
  773. }
  774. if (array_key_exists('Publication Date', $pub)) {
  775. $citation .= $pub['Publication Date'];
  776. }
  777. elseif (array_key_exists('Year', $pub)) {
  778. $citation .= $pub['Year'];
  779. }
  780. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  781. $citation .= '; ';
  782. }
  783. if (array_key_exists('Volume', $pub)) {
  784. $citation .= $pub['Volume'];
  785. }
  786. if (array_key_exists('Issue', $pub)) {
  787. $citation .= '(' . $pub['Issue'] . ')';
  788. }
  789. if (array_key_exists('Pages', $pub)) {
  790. if (array_key_exists('Volume', $pub)) {
  791. $citation .= ':';
  792. }
  793. $citation .= $pub['Pages'];
  794. }
  795. $citation .= '.';
  796. }
  797. //----------------------
  798. // Research Support, Non-U.S. Gov't
  799. //----------------------
  800. elseif ($pub_type == "Research Support, Non-U.S. Gov't") {
  801. if (array_key_exists('Authors', $pub)) {
  802. $citation = $pub['Authors'] . '. ';
  803. }
  804. $citation .= $pub['Title'] . '. ';
  805. if (array_key_exists('Journal Name', $pub)) {
  806. $citation .= $pub['Journal Name'] . '. ';
  807. }
  808. if (array_key_exists('Publication Date', $pub)) {
  809. $citation .= $pub['Publication Date'];
  810. }
  811. elseif (array_key_exists('Year', $pub)) {
  812. $citation .= $pub['Year'];
  813. }
  814. $citation .= '.';
  815. }
  816. //----------------------
  817. // Letter
  818. //----------------------
  819. elseif ($pub_type == 'Letter') {
  820. if (array_key_exists('Authors', $pub)) {
  821. $citation = $pub['Authors'] . '. ';
  822. }
  823. $citation .= $pub['Title'] . '. ';
  824. if (array_key_exists('Journal Name', $pub)) {
  825. $citation .= $pub['Journal Name'] . '. ';
  826. }
  827. elseif (array_key_exists('Journal Abbreviation', $pub)) {
  828. $citation .= $pub['Journal Abbreviation'] . '. ';
  829. }
  830. elseif (array_key_exists('Series Name', $pub)) {
  831. $citation .= $pub['Series Name'] . '. ';
  832. }
  833. elseif (array_key_exists('Series Abbreviation', $pub)) {
  834. $citation .= $pub['Series Abbreviation'] . '. ';
  835. }
  836. if (array_key_exists('Publication Date', $pub)) {
  837. $citation .= $pub['Publication Date'];
  838. }
  839. elseif (array_key_exists('Year', $pub)) {
  840. $citation .= $pub['Year'];
  841. }
  842. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  843. $citation .= '; ';
  844. }
  845. if (array_key_exists('Volume', $pub)) {
  846. $citation .= $pub['Volume'];
  847. }
  848. if (array_key_exists('Issue', $pub)) {
  849. $citation .= '(' . $pub['Issue'] . ')';
  850. }
  851. if (array_key_exists('Pages', $pub)) {
  852. if (array_key_exists('Volume', $pub)) {
  853. $citation .= ':';
  854. }
  855. $citation .= $pub['Pages'];
  856. }
  857. $citation .= '.';
  858. }
  859. //-----------------------
  860. // Conference Proceedings
  861. //-----------------------
  862. elseif ($pub_type == 'Conference Proceedings') {
  863. if (array_key_exists('Authors', $pub)) {
  864. $citation = $pub['Authors'] . '. ';
  865. }
  866. $citation .= $pub['Title'] . '. ';
  867. if (array_key_exists('Conference Name', $pub)) {
  868. $citation .= $pub['Conference Name'] . '. ';
  869. }
  870. elseif (array_key_exists('Series Name', $pub)) {
  871. $citation .= $pub['Series Name'] . '. ';
  872. }
  873. elseif (array_key_exists('Series Abbreviation', $pub)) {
  874. $citation .= $pub['Series Abbreviation'] . '. ';
  875. }
  876. if (array_key_exists('Publication Date', $pub)) {
  877. $citation .= $pub['Publication Date'];
  878. }
  879. elseif (array_key_exists('Year', $pub)) {
  880. $citation .= $pub['Year'];
  881. }
  882. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  883. $citation .= '; ';
  884. }
  885. if (array_key_exists('Volume', $pub)) {
  886. $citation .= $pub['Volume'];
  887. }
  888. if (array_key_exists('Issue', $pub)) {
  889. $citation .= '(' . $pub['Issue'] . ')';
  890. }
  891. if (array_key_exists('Pages', $pub)) {
  892. if (array_key_exists('Volume', $pub)) {
  893. $citation .= ':';
  894. }
  895. $citation .= $pub['Pages'];
  896. }
  897. $citation .= '.';
  898. }
  899. //-----------------------
  900. // Default
  901. //-----------------------
  902. else {
  903. if (array_key_exists('Authors', $pub)) {
  904. $citation = $pub['Authors'] . '. ';
  905. }
  906. $citation .= $pub['Title'] . '. ';
  907. if (array_key_exists('Series Name', $pub)) {
  908. $citation .= $pub['Series Name'] . '. ';
  909. }
  910. elseif (array_key_exists('Series Abbreviation', $pub)) {
  911. $citation .= $pub['Series Abbreviation'] . '. ';
  912. }
  913. if (array_key_exists('Publication Date', $pub)) {
  914. $citation .= $pub['Publication Date'];
  915. }
  916. elseif (array_key_exists('Year', $pub)) {
  917. $citation .= $pub['Year'];
  918. }
  919. if (array_key_exists('Volume', $pub) or array_key_exists('Issue', $pub) or array_key_exists('Pages',$pub)) {
  920. $citation .= '; ';
  921. }
  922. if (array_key_exists('Volume', $pub)) {
  923. $citation .= $pub['Volume'];
  924. }
  925. if (array_key_exists('Issue', $pub)) {
  926. $citation .= '(' . $pub['Issue'] . ')';
  927. }
  928. if (array_key_exists('Pages', $pub)) {
  929. if (array_key_exists('Volume', $pub)) {
  930. $citation .= ':';
  931. }
  932. $citation .= $pub['Pages'];
  933. }
  934. $citation .= '.';
  935. }
  936. return $citation;
  937. }
  938. /**
  939. * Retrieves the minimal information to uniquely describe any publication.
  940. *
  941. * @param $pub
  942. * A publication object as created by chado_generate_var()
  943. *
  944. * @return
  945. * An array with the following keys: 'Citation', 'Abstract', 'Authors',
  946. * 'URL'. All keys are term names in the Tripal Publication Ontology :TPUB.
  947. */
  948. function tripal_get_minimal_pub_info($pub) {
  949. // Chado has a null pub as default. We don't return anything for this.
  950. if ($pub->uniquename == 'null') {
  951. return array();
  952. }
  953. // expand the title
  954. $pub = chado_expand_var($pub, 'field', 'pub.title');
  955. $pub = chado_expand_var($pub, 'field', 'pub.volumetitle');
  956. // get the citation
  957. $values = array(
  958. 'pub_id' => $pub->pub_id,
  959. 'type_id' => array(
  960. 'name' => 'Citation',
  961. ),
  962. );
  963. $options = array(
  964. 'include_fk' => array(
  965. ),
  966. );
  967. $citation = chado_generate_var('pubprop', $values, $options);
  968. $citation = chado_expand_var($citation, 'field', 'pubprop.value');
  969. $citation = $citation->value;
  970. // get the abstract
  971. $values = array(
  972. 'pub_id' => $pub->pub_id,
  973. 'type_id' => array(
  974. 'name' => 'Abstract',
  975. ),
  976. );
  977. $options = array(
  978. 'include_fk' => array(
  979. ),
  980. );
  981. $abstract = chado_generate_var('pubprop', $values, $options);
  982. $abstract = chado_expand_var($abstract, 'field', 'pubprop.value');
  983. $abstract_text = '';
  984. if ($abstract) {
  985. $abstract_text = htmlspecialchars($abstract->value);
  986. }
  987. // get the author list
  988. $values = array(
  989. 'pub_id' => $pub->pub_id,
  990. 'type_id' => array(
  991. 'name' => 'Authors',
  992. ),
  993. );
  994. $options = array(
  995. 'include_fk' => array(
  996. ),
  997. );
  998. $authors = chado_generate_var('pubprop', $values, $options);
  999. $authors = chado_expand_var($authors, 'field', 'pubprop.value');
  1000. $authors_list = 'N/A';
  1001. if ($authors) {
  1002. $authors_list = $authors->value;
  1003. }
  1004. // get the first database cross-reference with a url
  1005. $options = array('return_array' => 1);
  1006. $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
  1007. $dbxref = NULL;
  1008. if ($pub->pub_dbxref) {
  1009. foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
  1010. if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
  1011. $dbxref = $pub_dbxref->dbxref_id;
  1012. }
  1013. }
  1014. }
  1015. // get the URL
  1016. // get the author list
  1017. $values = array(
  1018. 'pub_id' => $pub->pub_id,
  1019. 'type_id' => array(
  1020. 'name' => 'URL',
  1021. ),
  1022. );
  1023. $options = array(
  1024. 'return_array' => 1,
  1025. 'include_fk' => array(),
  1026. );
  1027. $urls = chado_generate_var('pubprop', $values, $options);
  1028. $urls = chado_expand_var($urls, 'field', 'pubprop.value');
  1029. $url = '';
  1030. if (count($urls) > 0) {
  1031. $url = $urls[0]->value;
  1032. }
  1033. return array(
  1034. 'Title' => $pub->title,
  1035. 'Citation' => $citation,
  1036. 'Abstract' => $abstract_text,
  1037. 'Authors' => $authors_list,
  1038. 'URL' => $url,
  1039. );
  1040. }