pubmed.inc 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. <?php
  2. /**
  3. * @file
  4. * Tripal Pub PubMed Interface
  5. *
  6. * @defgroup tripal_pub_pubmed PubMed Interface
  7. * @ingroup tripal_pub
  8. */
  9. /**
  10. *
  11. */
  12. function tripal_pub_remote_search_pubmed($terms_str, $num_to_retrieve, $pager_id) {
  13. // convert the terms list provicded by the caller into a string with words
  14. // separated by a '+' symbol.
  15. $search_terms = implode("+", preg_split('/\s+/', trim($terms_str)));
  16. // we want to get the list of pubs using the search terms but using a Drupal style pager
  17. $pubs = tripal_pager_callback('tripal_pub_remote_search_pubmed_range',
  18. 'tripal_pub_remote_search_pubmed_count', $num_to_retrieve, $pager_id, $search_terms);
  19. if ($pubs) {
  20. foreach ($pubs as $pub) {
  21. /*
  22. $pmid = $output[$i];
  23. //aquiring the pubmed id from the pub table based on the uniquename
  24. $values = array( 'uniquename' => $pmid);
  25. $pubmed_id = tripal_core_chado_select('pub', array('pub_id'), $values); */
  26. }
  27. }
  28. return $pubs;
  29. }
  30. /*
  31. * This function is used as the callback function when used with the
  32. * tripal_pager_callback function. This function returns a count of
  33. * the dataset to be paged.
  34. */
  35. function tripal_pub_remote_search_pubmed_count($terms) {
  36. // do a quick query using the provided terms, set the session variables
  37. // so we can reuse this query and then return the total number of records.
  38. $results = tripal_pub_remote_search_pubmed_query($terms);
  39. $_SESSION['tripal_pub_pubmed_query']['WebEnv'] = $results['WebEnv'];
  40. $_SESSION['tripal_pub_pubmed_query']['QueryKey'] = $results['QueryKey'];
  41. return $total_records;
  42. }
  43. /*
  44. * This function is used as the callback function when used with the
  45. * tripal_pager_callback function. This function returns the results
  46. * within the specified range
  47. */
  48. function tripal_pub_remote_search_pubmed_range($terms, $start = 0, $limit = 10) {
  49. // get the query_key and the web_env from the previous count query.
  50. $query_key = $_SESSION['tripal_pub_pubmed_query']['QueryKey'];
  51. $web_env = $_SESSION['tripal_pub_pubmed_query']['WebEnv'];
  52. // repeat the search performed previously (using WebEnv & QueryKey) to retrieve
  53. // the PMID's within the range specied. The PMIDs will be returned as a text list
  54. $pmids_txt = tripal_pub_remote_search_pubmed_fetch($terms, $query_key, $web_env, 'uilist', $start, $limit);
  55. // iterate through each PMID and get the publication record. This requires a new search and new fetch
  56. $pmids = explode("\n", trim($pmids_txt));
  57. $pubs = array();
  58. foreach ($pmids as $pmid) {
  59. // first intialize the search for a single PMID. This will give us a new query key and Web env
  60. $term = $pmid . "[uid]";
  61. $query = tripal_pub_remote_search_pubmed_query($terms);
  62. // second retrieve the individual record
  63. $pub_xml = tripal_pub_remote_search_pubmed_fetch($terms, $query['QueryKey'], $query['WebEnv'], 'xml', 0, 1);
  64. $pub = tripal_pub_remote_search_pubmed_parse_pubxml($pub_xml);
  65. $pubs[] = $pub;
  66. }
  67. dpm($pubs);
  68. return $pubs;
  69. }
  70. /*
  71. *
  72. */
  73. function tripal_pub_remote_search_pubmed_query($terms){
  74. // do a search for a single result so that we can establish a history, and get
  75. // the number of records. Once we have the number of records we can retrieve
  76. // those requested in the range.
  77. $query_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=Pubmed&retmax=1&usehistory=y&term=$terms";
  78. $rfh = fopen($query_url, "r");
  79. if (!$rfh) {
  80. drupal_set_message('Could not perform Pubmed query. Cannot connect to Entrez.', 'error');
  81. return 0;
  82. }
  83. // retrieve the XML results
  84. $query_xml = '';
  85. while (!feof($rfh)) {
  86. $query_xml .= fread($rfh, 255);
  87. }
  88. fclose($rfh);
  89. $xml = new XMLReader();
  90. $xml->xml($query_xml);
  91. // iterate though the child nodes of the <eSearchResult> tag and get the count, history and query_id
  92. $result = array();
  93. while ($xml->read()) {
  94. if ($xml->nodeType == XMLReader::ELEMENT) {
  95. $element = $xml->name;
  96. $xml->read();
  97. $value = $xml->value;
  98. switch ($element) {
  99. case 'Count':
  100. $result['Count'] = $value;
  101. break;
  102. case 'WebEnv':
  103. $result['WebEnv'] = $value;
  104. break;
  105. case 'QueryKey':
  106. $result['QueryKey'] = $value;
  107. break;
  108. }
  109. }
  110. }
  111. return $result;
  112. }
  113. /*
  114. *
  115. */
  116. function tripal_pub_remote_search_pubmed_fetch($terms, $query_key, $web_env, $rettype = 'xml', $start = 0, $limit = 10){
  117. // repeat the search performed previously (using WebEnv & QueryKey) to retrieve
  118. // the PMID's within the range specied. The PMIDs will be returned as a text list
  119. $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=$rettype&retmode=text&retstart=$start&retmax=$limit&db=Pubmed&query_key=$query_key&WebEnv=$web_env";
  120. $rfh = fopen($fetch_url, "r");
  121. $results = '';
  122. while (!feof($rfh)) {
  123. $results .= fread($rfh, 255);
  124. }
  125. fclose($rfh);
  126. return $results;
  127. }
  128. /*
  129. * This function parses the XML containing details of a publication and
  130. * converts it into an associative array of where keys are Tripal Pub
  131. * ontology terms and the values are extracted from the XML. The
  132. * XML should contain only a single publication record.
  133. */
  134. function tripal_pub_remote_search_pubmed_parse_pubxml($pub_xml) {
  135. $pub = array();
  136. // read the XML and iterate through it.
  137. $xml = new XMLReader();
  138. $xml->xml($pub_xml);
  139. while ($xml->read()) {
  140. $element = $xml->name;
  141. if ($xml->nodeType == XMLReader::ELEMENT) {
  142. switch ($element) {
  143. case 'PMID':
  144. $xml->read(); // get the value for this element
  145. $pub['pub_accession'] = $xml->value;
  146. $pub['pub_database'] = 'PMID';
  147. break;
  148. case 'Article':
  149. tripal_pub_remote_search_pubmed_parse_article($xml, $pub);
  150. break;
  151. }
  152. }
  153. }
  154. return $pub;
  155. }
  156. /*
  157. *
  158. */
  159. function tripal_pub_remote_search_pubmed_parse_article($xml, &$pub) {
  160. while ($xml->read()) {
  161. // get this element name
  162. $element = $xml->name;
  163. // if we're at the </Article> element then we're done with the article...
  164. if ($xml->nodeType == XMLReader::END_ELEMENT and $element == 'Article') {
  165. return;
  166. }
  167. if ($xml->nodeType == XMLReader::ELEMENT) {
  168. switch ($element) {
  169. case 'Journal':
  170. tripal_pub_remote_search_pubmed_parse_journal($xml, $pub);
  171. break;
  172. case 'ArticleTitle':
  173. $xml->read();
  174. $pub['title'] = $xml->value;
  175. break;
  176. case 'AbstractText':
  177. $xml->read();
  178. $pub['abstract'] = $xml->value;
  179. break;
  180. case 'Affiliation':
  181. break;
  182. case 'AuthorList':
  183. tripal_pub_remote_search_pubmed_parse_authorlist($xml, $pub);
  184. break;
  185. case 'Language':
  186. $xml->read();
  187. $pub['language'] = $xml->value;
  188. break;
  189. case 'ArticleDate':
  190. break;
  191. default:
  192. break;
  193. }
  194. }
  195. }
  196. }
  197. /*
  198. *
  199. */
  200. function tripal_pub_remote_search_pubmed_parse_journal($xml, &$pub) {
  201. while ($xml->read()) {
  202. $element = $xml->name;
  203. if ($xml->nodeType == XMLReader::END_ELEMENT){
  204. // if we're at the </AuthorList> element then we're done with the article...
  205. if($element == 'Journal') {
  206. return;
  207. }
  208. }
  209. if ($xml->nodeType == XMLReader::ELEMENT) {
  210. $xml->read();
  211. $value = $xml->value;
  212. switch ($element) {
  213. case 'ISSN':
  214. break;
  215. case 'Volume':
  216. break;
  217. case 'Year':
  218. break;
  219. case 'Title':
  220. break;
  221. case 'ISOAbbreviation':
  222. break;
  223. default:
  224. break;
  225. }
  226. }
  227. }
  228. }
  229. /*
  230. *
  231. */
  232. function tripal_pub_remote_search_pubmed_parse_authorlist($xml, &$pub) {
  233. $authors = array();
  234. $author = array();
  235. $author_list = '';
  236. while ($xml->read()) {
  237. $element = $xml->name;
  238. if ($xml->nodeType == XMLReader::END_ELEMENT){
  239. // if we're at the </AuthorList> element then we're done with the article...
  240. if($element = 'Article') {
  241. return;
  242. }
  243. // if we're at the end </Author> element then we're done with the author and we can
  244. // start a new one.
  245. if($element = 'Author') {
  246. return;
  247. }
  248. }
  249. if ($xml->nodeType == XMLReader::ELEMENT) {
  250. switch ($element) {
  251. case 'Author':
  252. break;
  253. case 'LastName':
  254. break;
  255. case 'ForeName':
  256. break;
  257. case 'Initials':
  258. break;
  259. default:
  260. break;
  261. }
  262. }
  263. }
  264. }