tripal_pub.api.inc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987
  1. <?php
  2. /**
  3. * @file
  4. * The Tripal Pub API
  5. *
  6. * @defgroup tripal_pub_api Publication Module API
  7. * @ingroup tripal_api
  8. */
  9. /*
  10. * Retrieves a list of publications as an associated array where
  11. * keys correspond directly with Tripal Pub CV terms.
  12. *
  13. * @param remote_db
  14. * The name of the remote publication database to query. These names should
  15. * match the name of the databases in the Chado 'db' table. Currently
  16. * supported databass include
  17. * 'PMID': PubMed
  18. *
  19. * @param search_array
  20. * An associate array containing the search criteria. The following key
  21. * are expected
  22. * 'remote_db': Specifies the name of the remote publication database
  23. * 'num_criteria': Specifies the number of criteria present in the search array
  24. * 'days': The number of days to include in the search starting from today
  25. * 'criteria': An associate array containing the search critiera. There should
  26. * be no less than 'num_criteria' elements in this array.
  27. *
  28. * The following keys are expected in the 'criteria' array
  29. * 'search_terms': A list of terms to search on, separated by spaces.
  30. * 'scope': The fields to search in the remote database. Valid values
  31. * include: 'title', 'abstract', 'author' and 'any'
  32. * 'operation': The logical operation to use for this criteria. Valid
  33. * values include: 'AND', 'OR' and 'NOT'.
  34. * @param $num_to_retrieve
  35. * The number of records to retrieve. In cases with large numbers of
  36. * records to retrieve, the remote database may limit the size of each
  37. * retrieval.
  38. * @param $pager_id
  39. * Optional. This function uses the 'tripal_pager_callback' function
  40. * to page a set of results. This is helpful when generating results to
  41. * be view online. The pager works identical to the pager_query function
  42. * of drupal. Simply provide a unique integer value for this argument. Each
  43. * form on a single page should have a unique $pager_id.
  44. * @param $page
  45. * Optional. If this function is called where the
  46. * page for the pager cannot be set using the $_GET variable, use this
  47. * argument to specify the page to retrieve.
  48. *
  49. * @return
  50. * Returns an array of pubs where each element is
  51. * an associative array where the keys are Tripal Pub CV terms.
  52. *
  53. * @ingroup tripal_pub_api
  54. */
  55. function tripal_pub_get_remote_search_results($remote_db, $search_array,
  56. $num_to_retrieve, $pager_id = 0, $page = 0) {
  57. // manually set the $_GET['page'] parameter to trick the pager
  58. // into giving us the requested page
  59. if (is_int($page) and $page > 0) {
  60. $_GET['page'] = $page;
  61. }
  62. // now call the callback function to get the results
  63. $callback = "tripal_pub_remote_search_$remote_db";
  64. $pubs = array();
  65. if (function_exists($callback)) {
  66. $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $pager_id);
  67. }
  68. return $pubs;
  69. }
  70. /*
  71. * @ingroup tripal_pub_api
  72. */
  73. function tripal_pub_get_raw_data($dbxref) {
  74. if(preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  75. $remote_db = $matches[1];
  76. $accession = $matches[2];
  77. // check that the database is supported
  78. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  79. if(!in_array($remote_db, $supported_dbs)) {
  80. return "Unsupported database: $dbxref";
  81. }
  82. $search = array(
  83. 'num_criteria' => 1,
  84. 'remote_db' => $remote_db,
  85. 'criteria' => array(
  86. '1' => array(
  87. 'search_terms' => "$remote_db:$accession",
  88. 'scope' => 'id',
  89. 'operation' => '',
  90. 'is_phrase' => 0,
  91. ),
  92. ),
  93. );
  94. $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
  95. return '<textarea cols=80 rows=20>' . $pubs[0]['raw'] . '</textarea>';
  96. }
  97. return 'Invalid DB xref';
  98. }
  99. /*
  100. * @ingroup tripal_pub_api
  101. */
  102. function tripal_pub_update_publications($do_contact = FALSE, $dbxref = NULL, $db = NULL) {
  103. // get a persistent connection
  104. $connection = tripal_db_persistent_chado();
  105. if (!$connection) {
  106. print "A persistant connection was not obtained. Loading will be slow\n";
  107. }
  108. // if we cannot get a connection then let the user know the loading will be slow
  109. tripal_db_start_transaction();
  110. if ($connection) {
  111. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  112. "If the load fails or is terminated prematurely then the entire set of \n" .
  113. "insertions/updates is rolled back and will not be found in the database\n\n";
  114. }
  115. // get a list of all publications by their Dbxrefs that have supported databases
  116. $sql = "
  117. SELECT DB.name as db_name, DBX.accession
  118. FROM pub P
  119. INNER JOIN pub_dbxref PDBX ON P.pub_id = PDBX.pub_id
  120. INNER JOIN dbxref DBX ON DBX.dbxref_id = PDBX.dbxref_id
  121. INNER JOIN db DB ON DB.db_id = DBX.db_id
  122. ";
  123. $args = array();
  124. if ($dbxref and preg_match('/^(.*?):(.*?)$/', $dbxref, $matches)) {
  125. $dbname = $matches[1];
  126. $accession = $matches[2];
  127. $sql .= "WHERE DBX.accession = '%s' and DB.name = '%s' ";
  128. $args[] = $accession;
  129. $args[] = $dbname;
  130. }
  131. elseif ($db) {
  132. $sql .= " WHERE DB.name = '%s' ";
  133. $args[] = $db;
  134. }
  135. $sql .= "ORDER BY DB.name, P.pub_id";
  136. $results = chado_query($sql, $args);
  137. $num_to_retrieve = 100;
  138. $i = 0; // count the number of IDs. When we hit $num_to_retrieve we'll do the query
  139. $curr_db = ''; // keeps track of the current current database
  140. $ids = array(); // the list of IDs for the database
  141. $search = array(); // the search array passed to the search function
  142. // iterate through the pub IDs
  143. while ($pub = db_fetch_object($results)) {
  144. $accession = $pub->accession;
  145. $remote_db = $pub->db_name;
  146. // here we need to only update publications for databases we support
  147. $supported_dbs = variable_get('tripal_pub_supported_dbs', array());
  148. if(!in_array($remote_db, $supported_dbs)) {
  149. continue;
  150. }
  151. $search = array(
  152. 'num_criteria' => 1,
  153. 'remote_db' => $remote_db,
  154. 'criteria' => array(
  155. '1' => array(
  156. 'search_terms' => "$remote_db:$accession",
  157. 'scope' => 'id',
  158. 'operation' => '',
  159. 'is_phrase' => 0,
  160. ),
  161. ),
  162. );
  163. $pubs = tripal_pub_get_remote_search_results($remote_db, $search, 1, 0);
  164. tripal_pub_add_publications($pubs, $do_contact, TRUE);
  165. $i++;
  166. }
  167. // transaction is complete
  168. tripal_db_commit_transaction();
  169. print "Transaction Complete\n";
  170. // sync the newly added publications with Drupal
  171. print "Syncing publications with Drupal...\n";
  172. tripal_pub_sync_pubs();
  173. // if the caller wants to create contacts then we should sync them
  174. if ($do_contact) {
  175. print "Syncing contacts with Drupal...\n";
  176. tripal_contact_sync_contacts();
  177. }
  178. print "Done.\n";
  179. }
  180. /*
  181. * @ingroup tripal_pub_api
  182. */
  183. function tripal_pub_import_publications($report_email = FALSE, $do_update = FALSE) {
  184. $num_to_retrieve = 100;
  185. $pager_id = 0;
  186. $page = 0;
  187. $num_pubs = 0;
  188. // get a persistent connection
  189. $connection = tripal_db_persistent_chado();
  190. if (!$connection) {
  191. print "A persistant connection was not obtained. Loading will be slow\n";
  192. }
  193. // if we cannot get a connection then let the user know the loading will be slow
  194. tripal_db_start_transaction();
  195. if ($connection) {
  196. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  197. "If the load fails or is terminated prematurely then the entire set of \n" .
  198. "insertions/updates is rolled back and will not be found in the database\n\n";
  199. }
  200. // get all of the loaders
  201. $args = array();
  202. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0 ";
  203. $results = db_query($sql, $args);
  204. $do_contact = FALSE;
  205. $reports = array();
  206. while ($import = db_fetch_object($results)) {
  207. print "Importing: " . $import->name . "\n";
  208. // keep track if any of the importers want to create contacts from authors
  209. if ($import->do_contact == 1) {
  210. $do_contact = TRUE;
  211. }
  212. $criteria = unserialize($import->criteria);
  213. $remote_db = $criteria['remote_db'];
  214. do {
  215. // retrieve the pubs for this page. We'll retreive 10 at a time
  216. $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
  217. $reports[$import->name] = tripal_pub_add_publications($pubs, $import->do_contact, $do_update);
  218. $page++;
  219. }
  220. // continue looping until we have a $pubs array that does not have
  221. // our requested numer of records. This means we've hit the end
  222. while (count($pubs) == $num_to_retrieve);
  223. }
  224. // transaction is complete
  225. tripal_db_commit_transaction();
  226. print "Transaction Complete\n";
  227. // sync the newly added publications with Drupal. If the user
  228. // requested a report then we don't want to print any syncing information
  229. // so pass 'FALSE' to the sync call
  230. print "Syncing publications with Drupal...\n";
  231. tripal_pub_sync_pubs();
  232. // iterate through each of the reports and generate a final report with HTML links
  233. $HTML_report = '';
  234. if ($report_email) {
  235. $HTML_report .= "<html>";
  236. global $base_url;
  237. foreach ($reports as $importer => $report) {
  238. $total = count($report['inserted']);
  239. $HTML_report .= "<b>$total new publications from importer: $importer</b><br><ol>\n";
  240. foreach ($report['inserted'] as $pub) {
  241. $item = $pub['Title'];
  242. if ($pub['pub_id']) {
  243. $item = l($pub['Title'], "$base_url/pub/" . $pub['pub_id']);
  244. }
  245. $HTML_report .= "<li>$item</li>\n";
  246. }
  247. $HTML_report .= "</ol>\n";
  248. }
  249. $HTML_report .= "</html>";
  250. $site_email = variable_get('site_mail', '');
  251. $params = array(
  252. 'message' => $HTML_report
  253. );
  254. drupal_mail('tripal_pub', 'import_report', $report_email, language_default(), $params, $site_email, TRUE);
  255. }
  256. // if any of the importers wanted to create contacts from the authors then sync them
  257. if($do_contact) {
  258. print "Syncing contacts with Drupal...\n";
  259. tripal_contact_sync_contacts();
  260. }
  261. print "Done.\n";
  262. }
  263. /*
  264. * @ingroup tripal_pub_api
  265. */
  266. function tripal_pub_import_by_dbxref($pub_dbxref, $do_contact = FALSE, $do_update) {
  267. $num_to_retrieve = 1;
  268. $pager_id = 0;
  269. $page = 0;
  270. $num_pubs = 0;
  271. // get a persistent connection
  272. $connection = tripal_db_persistent_chado();
  273. if (!$connection) {
  274. print "A persistant connection was not obtained. Loading will be slow\n";
  275. }
  276. // if we cannot get a connection then let the user know the loading will be slow
  277. tripal_db_start_transaction();
  278. if ($connection) {
  279. print "\nNOTE: Loading of the publication is performed using a database transaction. \n" .
  280. "If the load fails or is terminated prematurely then the entire set of \n" .
  281. "insertions/updates is rolled back and will not be found in the database\n\n";
  282. }
  283. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  284. $dbname = $matches[1];
  285. $accession = $matches[2];
  286. $criteria = array(
  287. 'num_criteria' => 1,
  288. 'remote_db' => $dbname,
  289. 'criteria' => array(
  290. '1' => array(
  291. 'search_terms' => "$dbname:$accession",
  292. 'scope' => 'id',
  293. 'operation' => '',
  294. 'is_phrase' => 0,
  295. ),
  296. ),
  297. );
  298. $remote_db = $criteria['remote_db'];
  299. $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
  300. $pub_id = tripal_pub_add_publications($pubs, $do_contact, $do_update);
  301. }
  302. // transaction is complete
  303. tripal_db_commit_transaction();
  304. print "Transaction Complete\n";
  305. // sync the newly added publications with Drupal
  306. print "Syncing publications with Drupal...\n";
  307. tripal_pub_sync_pubs();
  308. // if any of the importers wanted to create contacts from the authors then sync them
  309. if($do_contact) {
  310. print "Syncing contacts with Drupal...\n";
  311. tripal_contact_sync_contacts();
  312. }
  313. print "Done.\n";
  314. }
  315. /*
  316. *
  317. */
  318. function tripal_pub_add_publications($pubs, $do_contact, $update = FALSE) {
  319. $report = array();
  320. $report['error'] = 0;
  321. $report['inserted'] = array();
  322. $report['skipped'] = array();
  323. $total_pubs = count($pubs);
  324. // iterate through the publications and add each one
  325. $i = 1;
  326. foreach ($pubs as $pub) {
  327. $memory = number_format(memory_get_usage()) . " bytes";
  328. print "Processing $i of $total_pubs. Memory usage: $memory.\r";
  329. // add the publication to Chado
  330. $action = '';
  331. $pub_id = tripal_pub_add_publication($pub, $action, $do_contact, $update);
  332. if ($pub_id){
  333. // add the publication cross reference (e.g. to PubMed)
  334. if ($pub_id and $pub['Publication Dbxref']) {
  335. $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, $pub['Publication Dbxref']);
  336. }
  337. $pub['pub_id'] = $pub_id;
  338. }
  339. switch ($action) {
  340. case 'error':
  341. $report['error']++;
  342. break;
  343. case 'inserted':
  344. $report['inserted'][] = $pub;
  345. break;
  346. case 'updated':
  347. $report['updated'][] = $pub;
  348. break;
  349. case 'skipped':
  350. $report['skipped'][] = $pub;
  351. break;
  352. }
  353. $i++;
  354. }
  355. print "\n";
  356. return $report;
  357. }
  358. /*
  359. *
  360. */
  361. function tripal_pub_add_pub_dbxref($pub_id, $pub_dbxref) {
  362. // break apart the dbxref
  363. $dbname = '';
  364. $accession = '';
  365. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  366. $dbname = $matches[1];
  367. $accession = $matches[2];
  368. }
  369. else {
  370. return FALSE;
  371. }
  372. // check to see if the pub_dbxref record already exist
  373. $values = array(
  374. 'dbxref_id' => array(
  375. 'accession' => $accession,
  376. 'db_id' => array(
  377. 'name' => $dbname,
  378. ),
  379. ),
  380. 'pub_id' => $pub_id,
  381. );
  382. $options = array('statement_name' => 'sel_pubdbxref_dbpu');
  383. $results = tripal_core_chado_select('pub_dbxref', array('*'), $values, $options);
  384. // if the pub_dbxref record exist then we don't need to re-add it.
  385. if(count($results) > 0) {
  386. return $results[0];
  387. }
  388. // make sure our database already exists
  389. $db = tripal_db_add_db($dbname);
  390. // get the database cross-reference
  391. $dbxvalues = array(
  392. 'accession' => $accession,
  393. 'db_id' => $db->db_id,
  394. );
  395. $dbxoptions = array('statement_name' => 'sel_dbxref_acdb');
  396. $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $dbxvalues, $dbxoptions);
  397. // if the accession doesn't exist then add it
  398. if(count($results) == 0){
  399. $dbxref = tripal_db_add_dbxref($db->db_id, $accession);
  400. }
  401. else {
  402. $dbxref = $results[0];
  403. }
  404. // now add the record
  405. $options = array('statement_name' => 'ins_pubdbxref_dbpu');
  406. $results = tripal_core_chado_insert('pub_dbxref', $values, $options);
  407. if (!$results) {
  408. watchdog('tripal_pub', "Cannot add publication dbxref: %db:%accession.",
  409. array('%db' => $dbname, '%accession' => $accession). WATCHDOG_ERROR);
  410. return FALSE;
  411. }
  412. return $results;
  413. }
  414. /**
  415. * Returns the list of publications that are assigned the database
  416. * cross-reference provided
  417. *
  418. * @param $pub_dbxref
  419. * The database cross reference accession. It should be in the form
  420. * DB:ACCESSION, where DB is the database name and ACCESSION is the
  421. * unique publication identifier (e.g. PMID:4382934)
  422. *
  423. * @return
  424. * Returns an array of all the publications that have the provided
  425. * cross reference. If no publications match, then an empty array
  426. * is returned.
  427. *
  428. * @ingroup tripal_pub_api
  429. *
  430. */
  431. function tripal_pub_get_pubs_by_dbxref($pub_dbxref) {
  432. $return = array();
  433. if(preg_match('/^(.*?):(.*?)$/', $pub_dbxref, $matches)) {
  434. $dbname = $matches[1];
  435. $accession = $matches[2];
  436. $values = array(
  437. 'dbxref_id' => array (
  438. 'accession' => $accession,
  439. 'db_id' => array(
  440. 'name' => $dbname
  441. ),
  442. ),
  443. );
  444. $options = array('statement_name' => 'sel_pubdbxref_db');
  445. $results = tripal_core_chado_select('pub_dbxref', array('pub_id'), $values, $options);
  446. foreach ($results as $index => $pub) {
  447. $return[] = $pub->pub_id;
  448. }
  449. }
  450. return $return;
  451. }
  452. /**
  453. * Returns the list of publications that match a given title, type and year
  454. *
  455. * @param title
  456. * The title of the publication to look for
  457. * @param type
  458. * Optional. The publication type. The value of this field should come from
  459. * the Tripal Pub vocabulary. This should be the type name (e.g. cvterm.name)
  460. * @param year
  461. * Optional. The year the publication was published.
  462. * @param series_name
  463. * Optional. The name of the series (e.g. Journal name)
  464. *
  465. * @return
  466. * Returns an array of all the publications that have the provided
  467. * cross reference. If no publications match, then an empty array
  468. * is returned.
  469. *
  470. * @ingroup tripal_pub_api
  471. *
  472. */
  473. function tripal_pub_get_pubs_by_title_type_pyear_series($title, $type = NULL, $pyear = NULL, $series_name = NULL) {
  474. $return = array();
  475. // build the values array for the query.
  476. $values = array(
  477. 'title' => $title,
  478. );
  479. $stmnt_suffix = 'ti';
  480. if ($type) {
  481. $values['type_id'] = array(
  482. 'name' => $type,
  483. 'cv_id' => array(
  484. 'name' => 'tripal_pub'
  485. )
  486. );
  487. $stmnt_suffix .= 'ty';
  488. }
  489. if ($pyear) {
  490. $values['pyear'] = $pyear;
  491. $stmnt_suffix .= 'py';
  492. }
  493. if ($series_name) {
  494. $values['series_name'] = strtolower($series_name);
  495. $stmnt_suffix .= 'se';
  496. }
  497. $options = array(
  498. 'statement_name' => 'sel_pub_' . $stmnt_suffix,
  499. 'case_insensitive_columns' => array('title', 'series_name'),
  500. );
  501. $results = tripal_core_chado_select('pub', array('pub_id'), $values, $options);
  502. // iterate through any matches and pull out the pub_id
  503. foreach ($results as $index => $pub) {
  504. $return[] = $pub->pub_id;
  505. }
  506. return $return;
  507. }
  508. /**
  509. * Returns the list of publications that match a given title, type and year
  510. *
  511. * @param title
  512. * The title of the publication to look for
  513. * @param type
  514. * Optional. The publication type. The value of this field should come from
  515. * the Tripal Pub vocabulary. This should be the type name (e.g. cvterm.name)
  516. * @param year
  517. * Optional. The year the publication was published.
  518. * @param series_name
  519. * Optional. The name of the series (e.g. Journal name)
  520. *
  521. * @return
  522. * Returns an array of all the publications that have the provided
  523. * cross reference. If no publications match, then an empty array
  524. * is returned.
  525. *
  526. * @ingroup tripal_pub_api
  527. *
  528. */
  529. function tripal_pub_get_pub_by_uniquename($name) {
  530. $return = array();
  531. // build the values array for the query.
  532. $values = array(
  533. 'uniquename' => $name,
  534. );
  535. $options = array(
  536. 'statement_name' => 'sel_pub_un',
  537. 'case_insensitive_columns' => array('uniquename'),
  538. );
  539. $results = tripal_core_chado_select('pub', array('pub_id'), $values, $options);
  540. // iterate through any matches and pull out the pub_id
  541. foreach ($results as $index => $pub) {
  542. $return[] = $pub->pub_id;
  543. }
  544. return $return;
  545. }
  546. /**
  547. * Adds a new publication to the Chado, along with all properties and
  548. * database cross-references. If the publication does not already exist
  549. * in Chado then it is added. If it does exist nothing is done. If
  550. * the $update parameter is TRUE then the publication is updated if it exists.
  551. *
  552. * @param $pub_details
  553. * An associative array containing all of the details about the publication.
  554. * @param $action
  555. * This variable will get set to a text value indicating the action that was
  556. * performed. The values include 'skipped', 'inserted', 'updated' or 'error'.
  557. * @param $do_contact
  558. * Optional. Set to TRUE if a contact entry should be added to the Chado contact table
  559. * for authors of the publication.
  560. * @param $update_if_exists
  561. * Optional. If the publication already exists then this function will return
  562. * without adding a new publication. However, set this value to TRUE to force
  563. * the function to pudate the publication using the $pub_details that are provided.
  564. *
  565. * @return
  566. * If the publication already exists, is inserted or updated then the publication
  567. * ID is returned, otherwise FALSE is returned. If the publication already exists
  568. * and $update_if_exists is not TRUE then the $action variable is set to 'skipped'.
  569. * If the publication already exists and $update_if_exists is TRUE and if the update
  570. * was successful then $action is set to 'updated'. Otherwise on successful insert
  571. * the $action variable is set to 'inserted'. If the function failes then the
  572. * $action variable is set to 'error'
  573. *
  574. */
  575. function tripal_pub_add_publication($pub_details, &$action, $do_contact = FALSE, $update_if_exists = FALSE) {
  576. $pub_id = 0;
  577. // first try to find the publication using the accession number. It will have
  578. // one if the pub has already been loaded for the publication database
  579. if ($pub_details['Publication Dbxref']) {
  580. $results = tripal_pub_get_pubs_by_dbxref($pub_details['Publication Dbxref']);
  581. if(count($results) == 1) {
  582. $pub_id = $results[0];
  583. }
  584. elseif (count($results) > 1) {
  585. watchdog('tripal_pub', "There are two publications with this accession: %db:%accession. Cannot determine which to update.",
  586. array('%db' => $dbname, '%accession' => $accession), WATCHDOG_ERROR);
  587. $action = 'error';
  588. return FALSE;
  589. }
  590. }
  591. // if we couldn't find a publication by the accession (which means it doesn't
  592. // yet exist or it has been added using a different publication database) then
  593. // try to find it using the title and publication year.
  594. if (!$pub_id and $pub_details['Title']) {
  595. $results = tripal_pub_get_pubs_by_title_type_pyear_series($pub_details['Title'], NULL, $pub_details['Year']);
  596. if (count($results) == 1) {
  597. $pub_id = $results[0];
  598. }
  599. elseif (count($results) > 1) {
  600. watchdog('tripal_pub', "The publication with the same title, type and year is present multiple times. Cannot ".
  601. "determine which to use. Title: '%title'. Type: '%type'. Year: '%year'",
  602. array('%title' => $pub_details['Title'], '%type' => $pub_details['Publication Type'], '%year' => $pub_details['Year']), WATCHDOG_ERROR);
  603. $action = 'error';
  604. return FALSE;
  605. }
  606. }
  607. print "PUB ID: $pub_id\n";
  608. // if there is a pub id and we've been told not to update then return
  609. if ($pub_id and !$update_if_exists) {
  610. $action = 'skipped';
  611. return $pub_id;
  612. }
  613. // get the publication type (use the first publication type, any others will get stored as properties)
  614. if (is_array($pub_details['Publication Type'])) {
  615. $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
  616. }
  617. elseif ($pub_details['Publication Type']) {
  618. $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'], NULL, 'tripal_pub');
  619. }
  620. else {
  621. watchdog('tripal_pub', "The Publication Type is a required property but is missing", array(), WATCHDOG_ERROR);
  622. $action = 'error';
  623. return FALSE;
  624. }
  625. if (!$pub_type) {
  626. watchdog('tripal_pub', "Cannot find publication type: '%type'",
  627. array('%type' => $pub_details['Publication Type'][0]), WATCHDOG_ERROR);
  628. $action = 'error';
  629. return FALSE;
  630. }
  631. // build the values array for inserting or updating
  632. $values = array(
  633. 'title' => $pub_details['Title'],
  634. 'volume' => $pub_details['Volume'],
  635. 'series_name' => $pub_details['Journal Name'],
  636. 'issue' => $pub_details['Issue'],
  637. 'pyear' => $pub_details['Year'],
  638. 'pages' => $pub_details['Pages'],
  639. 'uniquename' => $pub_details['Citation'],
  640. 'type_id' => $pub_type->cvterm_id,
  641. );
  642. // if there is no pub_id then we need to do an insert.
  643. if (!$pub_id) {
  644. $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
  645. $pub = tripal_core_chado_insert('pub', $values, $options);
  646. if (!$pub) {
  647. watchdog('tripal_pub', "Cannot insert the publication with title: %title",
  648. array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  649. $action = 'error';
  650. return FALSE;
  651. }
  652. $pub_id = $pub['pub_id'];
  653. $action = 'inserted';
  654. }
  655. // if there is a pub_id and we've been told to update, then do the update
  656. if ($pub_id and $update_if_exists) {
  657. $match = array('pub_id' => $pub_id);
  658. $options = array('statement_name' => 'up_pub_tivoseispypaunty');
  659. $success = tripal_core_chado_update('pub', $match, $values, $options);
  660. if (!$success) {
  661. watchdog('tripal_pub', "Cannot update the publication with title: %title",
  662. array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  663. $action = 'error';
  664. return FALSE;
  665. }
  666. $action = 'updated';
  667. }
  668. // before we add any new properties we need to remove those that are there if this
  669. // is an update. The only thing we don't want to remove are the 'Publication Dbxref'
  670. if ($update_if_exists) {
  671. $sql = "
  672. DELETE FROM {pubprop}
  673. WHERE
  674. pub_id = %d AND
  675. NOT type_id in (
  676. SELECT cvterm_id
  677. FROM {cvterm}
  678. WHERE name = 'Publication Dbxref'
  679. )
  680. ";
  681. chado_query($sql, $pub_id);
  682. }
  683. // iterate through the properties and add them
  684. foreach ($pub_details as $key => $value) {
  685. // the pub_details may have the raw search data (e.g. in XML from PubMed. We'll irgnore this for now
  686. if($key == 'raw') {
  687. continue;
  688. }
  689. // get the cvterm by name or synonym
  690. $cvterm = tripal_cv_get_cvterm_by_name($key, NULL, 'tripal_pub');
  691. if (!$cvterm) {
  692. $cvterm = tripal_cv_get_cvterm_by_synonym($key, NULL, 'tripal_pub');
  693. }
  694. if (!$cvterm) {
  695. watchdog('tripal_pub', "Cannot find term: '%prop'. Skipping.", array('%prop' => $key), WATCHDOG_ERROR);
  696. continue;
  697. }
  698. // skip details that won't be stored as properties
  699. if ($key == 'Author List') {
  700. tripal_pub_add_authors($pub_id, $value, $do_contact);
  701. continue;
  702. }
  703. if ($key == 'Title' or $key == 'Volume' or $key == 'Journal Name' or $key == 'Issue' or
  704. $key == 'Year' or $key == 'Pages') {
  705. continue;
  706. }
  707. $success = 0;
  708. if (is_array($value)) {
  709. foreach ($value as $subkey => $subvalue) {
  710. // if the key is an integer then this array is a simple list and
  711. // we will insert using the primary key. Otheriwse, use the new key
  712. if(is_int($subkey)) {
  713. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $subvalue, FALSE);
  714. }
  715. else {
  716. $success = tripal_core_insert_property('pub', $pub_id, $subkey, 'tripal_pub', $subvalue, FALSE);
  717. }
  718. }
  719. }
  720. else {
  721. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $value, TRUE);
  722. }
  723. if (!$success) {
  724. watchdog('tripal_pub', "Cannot add property '%prop' to publication. Skipping.",
  725. array('%prop' => $key), WATCHDOG_ERROR);
  726. continue;
  727. }
  728. }
  729. return $pub_id;
  730. }
  731. /*
  732. *
  733. */
  734. function tripal_pub_add_authors($pub_id, $authors, $do_contact) {
  735. $rank = 0;
  736. // first remove any of the existing pubauthor entires
  737. $sql = "DELETE FROM {pubauthor} WHERE pub_id = %d";
  738. chado_query($sql, $pub_id);
  739. // iterate through the authors and add them to the pubauthors and contact
  740. // tables of chado, then link them through the custom pubauthors_contact table
  741. foreach ($authors as $author) {
  742. // skip invalid author entires
  743. if ($author['valid'] == 'N') {
  744. continue;
  745. }
  746. // remove the 'valid' property as we don't have a CV term for it
  747. unset($author['valid']);
  748. // construct the contact.name field using the author information
  749. $name = '';
  750. $type = 'Person';
  751. if ($author['Given Name']) {
  752. $name .= $author['Given Name'];
  753. }
  754. if ($author['Surname']) {
  755. $name .= ' ' . $author['Surname'];
  756. }
  757. if ($author['Suffix']) {
  758. $name .= ' ' . $author['Suffix'];
  759. }
  760. if ($author['Collective']) {
  761. $name = $author['Collective'];
  762. $type = 'Collective';
  763. }
  764. $name = trim($name);
  765. // add an entry to the pubauthors table
  766. $values = array(
  767. 'pub_id' => $pub_id,
  768. 'rank' => $rank,
  769. 'surname' => $author['Surname'] ? $author['Surname'] : $author['Collective'],
  770. 'givennames' => $author['Given Name'],
  771. 'suffix' => $author['Suffix'],
  772. );
  773. $options = array('statement_name' => 'ins_pubauthor_idrasugisu');
  774. $pubauthor = tripal_core_chado_insert('pubauthor', $values, $options);
  775. // if the user wants us to create a contact for each author then do it.
  776. if ($do_contact) {
  777. // Add the contact
  778. $contact = tripal_contact_add_contact($name, '', $type, $author);
  779. // if we have succesfully added the contact and the pubauthor entries then we want to
  780. // link them together
  781. if ($contact and $pubauthor) {
  782. // link the pubauthor entry to the contact
  783. $values = array(
  784. 'pubauthor_id' => $pubauthor['pubauthor_id'],
  785. 'contact_id' => $contact['contact_id'],
  786. );
  787. $options = array('statement_name' => 'ins_pubauthorcontact_puco');
  788. $pubauthor_contact = tripal_core_chado_insert('pubauthor_contact', $values, $options);
  789. if (!$pubauthor_contact) {
  790. watchdog('tripal_pub', "Cannot link pub authro and contact.", array(), WATCHDOG_ERROR);
  791. }
  792. }
  793. }
  794. $rank++;
  795. }
  796. }
  797. /**
  798. * Retrieve properties of a given type for a given pub
  799. *
  800. * @param $pub_id
  801. * The pub_id of the properties you would like to retrieve
  802. * @param $property
  803. * The cvterm name of the properties to retrieve
  804. *
  805. * @return
  806. * An pub chado variable with the specified properties expanded
  807. *
  808. * @ingroup tripal_pub_api
  809. */
  810. function tripal_pub_get_property($pub_id, $property) {
  811. return tripal_core_get_property('pub', $pub_id, $property, 'tripal_pub');
  812. }
  813. /**
  814. * Insert a given property
  815. *
  816. * @param $pub_id
  817. * The pub_id of the property to insert
  818. * @param $property
  819. * The cvterm name of the property to insert
  820. * @param $value
  821. * The value of the property to insert
  822. * @param $update_if_present
  823. * A boolean indicated whether to update the record if it's already present
  824. *
  825. * @return
  826. * True of success, False otherwise
  827. *
  828. * @ingroup tripal_pub_api
  829. */
  830. function tripal_pub_insert_property($pub_id, $property, $value, $update_if_present = 0) {
  831. return tripal_core_insert_property('pub', $pub_id, $property, 'tripal_pub', $value, $update_if_present);
  832. }
  833. /**
  834. * Update a given property
  835. *
  836. * @param $pub_id
  837. * The pub_id of the property to update
  838. * @param $property
  839. * The cvterm name of the property to update
  840. * @param $value
  841. * The value of the property to update
  842. * @param $insert_if_missing
  843. * A boolean indicated whether to insert the record if it's absent
  844. *
  845. * Note: The property will be identified using the unique combination of the $pub_id and $property
  846. * and then it will be updated with the supplied value
  847. *
  848. * @return
  849. * True of success, False otherwise
  850. *
  851. * @ingroup tripal_pub_api
  852. */
  853. function tripal_pub_update_property($pub_id, $property, $value, $insert_if_missing = 0) {
  854. return tripal_core_update_property('pub', $pub_id, $property, 'tripal_pub', $value, $insert_if_missing);
  855. }
  856. /**
  857. * Delete a given property
  858. *
  859. * @param $pub_id
  860. * The pub_id of the property to delete
  861. * @param $property
  862. * The cvterm name of the property to delete
  863. *
  864. * Note: The property will be identified using the unique combination of the $pub_id and $property
  865. * and then it will be deleted
  866. *
  867. * @return
  868. * True of success, False otherwise
  869. *
  870. * @ingroup tripal_pub_api
  871. */
  872. function tripal_pub_delete_property($pub_id, $property) {
  873. return tripal_core_delete_property('pub', $pub_id, $property, 'tripal_pub');
  874. }
  875. /*
  876. *
  877. */
  878. function tripal_pub_create_citation($pub) {
  879. $citation = $pub['Authors'] . '. ' . $pub['Title'] . '. ';
  880. if ($pub['Journal Name']) {
  881. $citation .= $pub['Journal Name'] . '. ';
  882. }
  883. elseif ($pub['Journal Abbreviation']) {
  884. $citation .= $pub['Journal Abbreviation'] . '. ';
  885. }
  886. $citation .= $pub['Publication Date'];
  887. if ($pub['Volume'] or $pub['Issue'] or $pub['Pages']) {
  888. $citation .= '; ';
  889. }
  890. if ($pub['Volume']) {
  891. $citation .= $pub['Volume'];
  892. }
  893. if ($pub['Issue']) {
  894. $citation .= '(' . $pub['Issue'] . ')';
  895. }
  896. if ($pub['Pages']) {
  897. if($pub['Volume']) {
  898. $citation .= ':';
  899. }
  900. $citation .= $pub['Pages'];
  901. }
  902. $citation .= '.';
  903. return $citation;
  904. }