tripal_pub.api.inc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. <?php
  2. /**
  3. * @file
  4. * The Tripal Pub API
  5. *
  6. * @defgroup tripal_pub_api Publication Module API
  7. * @ingroup tripal_api
  8. */
  9. /*
  10. * Retrieves a list of publications as an associated array where
  11. * keys correspond directly with Tripal Pub CV terms.
  12. *
  13. * @param remote_db
  14. * The name of the remote publication database to query. Valid values
  15. * include: 'pubmed'.
  16. * @param search_array
  17. * An associate array containing the search criteria. The following key
  18. * are expected
  19. * 'remote_db': Specifies the name of the remote publication database
  20. * 'num_criteria': Specifies the number of criteria present in the search array
  21. * 'days': The number of days to include in the search starting from today
  22. * 'criteria': An associate array containing the search critiera. There should
  23. * be no less than 'num_criteria' elements in this array.
  24. *
  25. * The following keys are expected in the 'criteria' array
  26. * 'search_terms': A list of terms to search on, separated by spaces.
  27. * 'scope': The fields to search in the remote database. Valid values
  28. * include: 'title', 'abstract', 'author' and 'any'
  29. * 'operation': The logical operation to use for this criteria. Valid
  30. * values include: 'AND', 'OR' and 'NOT'.
  31. * @param $num_to_retrieve
  32. * The number of records to retrieve. In cases with large numbers of
  33. * records to retrieve, the remote database may limit the size of each
  34. * retrieval.
  35. * @param $pager_id
  36. * Optional. This function uses the 'tripal_pager_callback' function
  37. * to page a set of results. This is helpful when generating results to
  38. * be view online. The pager works identical to the pager_query function
  39. * of drupal. Simply provide a unique integer value for this argument. Each
  40. * form on a single page should have a unique $pager_id.
  41. * @param $page
  42. * Optional. If this function is called where the
  43. * page for the pager cannot be set using the $_GET variable, use this
  44. * argument to specify the page to retrieve.
  45. *
  46. * @return
  47. * Returns an array of pubs where each element is
  48. * an associative array where the keys are Tripal Pub CV terms.
  49. *
  50. * @ingroup tripal_pub_api
  51. */
  52. function tripal_pub_get_remote_search_results($remote_db, $search_array,
  53. $num_to_retrieve, $pager_id = 0, $page = 0) {
  54. // construct the callback function using the remote database name
  55. $callback = 'tripal_pub_remote_search_' . strtolower($remote_db);
  56. // manually set the $_GET['page'] parameter to trick the pager
  57. // into giving us the requested page
  58. if (is_int($page) and $page > 0) {
  59. $_GET['page'] = $page;
  60. }
  61. // now call the callback function to get the rsults
  62. $pubs = array();
  63. if (function_exists($callback)) {
  64. $pubs = call_user_func($callback, $search_array, $num_to_retrieve, $pager_id);
  65. }
  66. return $pubs;
  67. }
  68. /*
  69. * @ingroup tripal_pub_api
  70. */
  71. function tripal_pub_import_publications() {
  72. $num_to_retrieve = 10;
  73. $pager_id = 0;
  74. $page = 0;
  75. $num_pubs = 0;
  76. // get a persistent connection
  77. $connection = tripal_db_persistent_chado();
  78. if (!$connection) {
  79. print "A persistant connection was not obtained. Loading will be slow\n";
  80. }
  81. // if we cannot get a connection then let the user know the loading will be slow
  82. tripal_db_start_transaction();
  83. if ($connection) {
  84. print "\nNOTE: Loading of publications is performed using a database transaction. \n" .
  85. "If the load fails or is terminated prematurely then the entire set of \n" .
  86. "insertions/updates is rolled back and will not be found in the database\n\n";
  87. }
  88. // get all of the loaders
  89. $sql = "SELECT * FROM {tripal_pub_import} WHERE disabled = 0";
  90. $results = db_query($sql);
  91. while ($import = db_fetch_object($results)) {
  92. $criteria = unserialize($import->criteria);
  93. $remote_db = $criteria['remote_db'];
  94. do {
  95. // get the number of records
  96. // retrieve the pubs for this page. We'll retreive 10 at a time
  97. $pubs = tripal_pub_get_remote_search_results($remote_db, $criteria, $num_to_retrieve, $pager_id, $page);
  98. // now add the publications
  99. foreach ($pubs as $pub) {
  100. $pub_id = tripal_pub_add_publication($pub);
  101. if ($pub_id) {
  102. $pub_dbxref = tripal_pub_add_pub_dbxref($pub_id, $pub);
  103. }
  104. $num_pubs++;
  105. print $num_pubs . ". " . $pub['Publication Database'] . ' ' . $pub['Pub Accession'] . "\n";
  106. } // end for loop
  107. $page++;
  108. }
  109. // continue looping until we have a $pubs array that does not have
  110. // our requested numer of records. This means we've hit the end
  111. while (count($pubs) == $num_to_retrieve);
  112. }
  113. // transaction is complete
  114. tripal_db_commit_transaction();
  115. print "Done.\n";
  116. }
  117. /*
  118. *
  119. */
  120. function tripal_pub_add_pub_dbxref($pub_id, $pub) {
  121. // check to see if the pub_dbxref record already exist
  122. $values = array(
  123. 'dbxref_id' => array(
  124. 'accession' => $pub['Pub Accession'],
  125. 'db_id' => array(
  126. 'name' => $pub['Publication Database'],
  127. ),
  128. ),
  129. 'pub_id' => $pub_id,
  130. );
  131. $options = array('statement_name' => 'sel_pubdbxref_dbpu');
  132. $results = tripal_core_chado_select('pub_dbxref', array('*'), $values, $options);
  133. // if the pub_dbxref record exist then we don't need to re-add it.
  134. if(count($results) > 0) {
  135. return $results[0];
  136. }
  137. // make sure our database already exists
  138. $db = tripal_db_add_db($pub['Publication Database']);
  139. // get the database cross-reference
  140. $dbxvalues = array(
  141. 'accession' => $pub['Pub Accession'],
  142. 'db_id' => $db->db_id,
  143. );
  144. $dbxoptions = array('statement_name' => 'sel_dbxref_acdb');
  145. $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $dbxvalues, $dbxoptions);
  146. // if the accession doesn't exist then add it
  147. if(count($results) == 0){
  148. $dbxref = tripal_db_add_dbxref($db->db_id, $pub['Pub Accession']);
  149. }
  150. else {
  151. $dbxref = $results[0];
  152. }
  153. // now add the record
  154. $options = array('statement_name' => 'ins_pubdbxref_dbpu');
  155. $results = tripal_core_chado_insert('pub_dbxref', $values, $options);
  156. if (!$results) {
  157. watchdog('tripal_pub', "Cannot add publication dbxref: %db:%accession.",
  158. array('%db' => $pub['Publication Database'], '%accession' => $pub['Pub Accession']). WATCHDOG_ERROR);
  159. return FALSE;
  160. }
  161. return $results;
  162. }
  163. /*
  164. *
  165. */
  166. function tripal_pub_add_publication($pub_details) {
  167. // check to see if the publication already exists
  168. $pub_id = 0;
  169. $values = array(
  170. 'title' => $pub_details['Title'],
  171. 'pyear' => $pub_details['Year'],
  172. );
  173. $options = array('statement_name' => 'sel_pub_tipy');
  174. $results = tripal_core_chado_select('pub', array('*'), $values, $options);
  175. if (count($results) == 1) {
  176. $pub_id = $results[0]->pub_id;
  177. }
  178. elseif (count($results) > 1) {
  179. watchdog('tripal_pub', "The publication with the same title is present multiple times. Cannot ".
  180. "determine which to use. Title: %title", array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  181. return FALSE;
  182. }
  183. // add the publication if it doens't exist
  184. elseif(count($results) == 0) {
  185. // get the publication type (use the first publication type, any others will get stored as properties)
  186. $pub_type = tripal_cv_get_cvterm_by_name($pub_details['Publication Type'][0], NULL, 'tripal_pub');
  187. if (!$pub_type) {
  188. watchdog('tripal_pub', "Cannot find publication type: %type",
  189. array('%type' => $pub_type), WATCHDOG_ERROR);
  190. return FALSE;
  191. }
  192. // if the publication does not exist then create it.
  193. $values = array(
  194. 'title' => $pub_details['Title'],
  195. 'volume' => $pub_details['Volume'],
  196. 'series_name' => $pub_details['Journal Name'],
  197. 'issue' => $pub_details['Issue'],
  198. 'pyear' => $pub_details['Year'],
  199. 'pages' => $pub_details['Pages'],
  200. 'uniquename' => $pub_details['Citation'],
  201. 'type_id' => $pub_type->cvterm_id,
  202. );
  203. $options = array('statement_name' => 'ins_pub_tivoseispypaunty');
  204. $pub = tripal_core_chado_insert('pub', $values, $options);
  205. if (!$pub) {
  206. watchdog('tripal_pub', "Cannot insert the publication with title: %title",
  207. array('%title' => $pub_details['Title']), WATCHDOG_ERROR);
  208. return FALSE;
  209. }
  210. $pub_id = $pub['pub_id'];
  211. }
  212. // now add in any other items that remain as properties of the publication
  213. foreach ($pub_details as $key => $value) {
  214. // get the cvterm by name or synonym
  215. $cvterm = tripal_cv_get_cvterm_by_name($key, NULL, 'tripal_pub');
  216. if (!$cvterm) {
  217. $cvterm = tripal_cv_get_cvterm_by_synonym($key, NULL, 'tripal_pub');
  218. }
  219. if (!$cvterm) {
  220. watchdog('tripal_pub', "Cannot find term: '%prop'. Skipping.",
  221. array('%prop' => $key), WATCHDOG_ERROR);
  222. continue;
  223. }
  224. // skip details that won't be stored as properties
  225. if ($key == 'Authors') {
  226. tripal_pub_add_authors($pub_id, $value);
  227. continue;
  228. }
  229. if ($key == 'Title' or $key == 'Volume' or $key == 'Journal Name' or $key == 'Issue' or
  230. $key == 'Year' or $key == 'Pages') {
  231. continue;
  232. }
  233. $success = 0;
  234. if (is_array($value)) {
  235. foreach ($value as $subkey => $subvalue) {
  236. // if the key is an integer then this array is a simple list and
  237. // we will insert using the primary key. Otheriwse, use the new key
  238. if(is_int($subkey)) {
  239. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $subvalue, TRUE);
  240. }
  241. else {
  242. $success = tripal_core_insert_property('pub', $pub_id, $subkey, 'tripal_pub', $subvalue, TRUE);
  243. }
  244. }
  245. }
  246. else {
  247. $success = tripal_core_insert_property('pub', $pub_id, $key, 'tripal_pub', $value, TRUE);
  248. }
  249. if (!$success) {
  250. print_r($success);
  251. watchdog('tripal_pub', "Cannot add property '%prop' to publication. Skipping.",
  252. array('%prop' => $key), WATCHDOG_ERROR);
  253. continue;
  254. }
  255. }
  256. return $pub_id;
  257. }
  258. /*
  259. *
  260. */
  261. function tripal_pub_add_authors($pub_id, $authors) {
  262. print_r($authors);
  263. }