parseInterpro.inc 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. <?php
  2. /*******************************************************************************
  3. * Parse Interpro HTML Output file into analysisfeatureprop table
  4. */
  5. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  6. // Prepare log
  7. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  8. $logfile = tempnam(sys_get_temp_dir(),"tripal_analysis_interpro_import");
  9. $log = fopen($logfile, 'a'); // append parsing results to log file
  10. if(!$log){
  11. print "ERROR: cannot open log file: $logfile\n";
  12. exit;
  13. }
  14. // Parsing started
  15. print "Parsing File:".$interprofile." ...\n";
  16. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  17. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  18. // for inserting into the analysisfeatureprop table
  19. $previous_db = tripal_db_set_active('chado'); // use chado database
  20. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  21. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  22. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  23. "AND CV.name = 'tripal'";
  24. $type_id = db_result(db_query($sql));
  25. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  26. // Load the HTML file and convert it into XML for loading
  27. $dom = new domDocument;
  28. $dom->loadHTMLFile($interprofile);
  29. $xml = $dom->saveXML();
  30. $interproput = simplexml_load_string($xml);
  31. // Get html tables for parsing
  32. $tables = $interproput->children()->children();
  33. // Count the number of tables to be processed
  34. $no_iterations = 0;
  35. foreach($tables as $tmp) {
  36. if ($tmp->getName() == 'table') {
  37. $no_iterations ++;
  38. }
  39. }
  40. print "$no_iterations html tables to be processed.\n";
  41. $interval = intval($no_iterations * 0.01);
  42. $idx_iterations = 0;
  43. // Processed the tables
  44. foreach ($tables as $table) {
  45. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  46. //print "skipping this table b/c no hits are reported\n";
  47. //}
  48. // make sure we are looking at a table and its not an empty table
  49. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  50. $idx_iterations ++;
  51. if ($idx_iterations % $interval == 0) {
  52. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  53. tripal_db_set_active($previous_db);
  54. tripal_job_set_progress($job_id, $percentage);
  55. $previous_db = tripal_db_set_active('chado');
  56. print $percentage."% ";
  57. }
  58. // Set job status
  59. // Get the first row and match its name with the feature name
  60. $firsttd = $table->children()->children()->children();
  61. $feature_id = 0;
  62. foreach($firsttd as $b) {
  63. foreach($b->children() as $a) {
  64. if ($a->getName() == 'a') {
  65. // Remove _ORF from the sequence name
  66. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  67. print "seqname is $seqname\n";
  68. // Find out how many features match this uniquename
  69. $sql = "SELECT count(feature_id) FROM {feature} ".
  70. "WHERE uniquename = '%s' ";
  71. $no_features = db_result(db_query($sql, $seqname));
  72. // If there is only one match, get the feature_id
  73. if ($no_features == 1) {
  74. $sql = "SELECT feature_id FROM {feature} ".
  75. "WHERE uniquename = '%s' ";
  76. $feature_id = db_result(db_query($sql, $seqname));
  77. print "\tfeature id is $feature_id\n";
  78. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  79. } else if ($no_features > 1) {
  80. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  81. continue;
  82. // If the uniquename did not match, skip and print 'Failed'
  83. } else {
  84. fwrite($log, "Failed: ".$seqname."\n");
  85. }
  86. }
  87. }
  88. }
  89. // Successfully matched. print 'Succeeded'. Add analysis_id and
  90. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  91. if ($feature_id) {
  92. //------------------------------------
  93. // Clease unwanted rows from the table
  94. //------------------------------------
  95. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  96. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  97. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  98. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  99. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  100. $table_txt = $table->asXML();
  101. $table_txt = preg_replace($parent_row, "", $table_txt);
  102. $table_txt = preg_replace($children_row, "", $table_txt);
  103. $table_txt = preg_replace($found_row, "", $table_txt);
  104. $table_txt = preg_replace($contains_row, "", $table_txt);
  105. $table_txt = preg_replace($go_row, "", $table_txt);
  106. //------------------------------------
  107. // Clease unwanted ORF link from table
  108. //------------------------------------
  109. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  110. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  111. //print "----------------------------\n";
  112. //print "old: ".$table->asXML()."\n\n\n";
  113. //print "----------------------------\n";
  114. //print "Fixed: $table_txt\n";
  115. //print "----------------------------\n";
  116. //------------------------------------
  117. // If this feature has already been associated with this analysis, do not reinsert
  118. // Otherwise, Insert into analysisfeature table
  119. //------------------------------------
  120. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  121. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  122. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  123. if(!$analysisfeature_id){
  124. print "inserting analysisfeature\n";
  125. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  126. "VALUES (%d, %d)";
  127. db_query ($sql, $feature_id, $analysis_id);
  128. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  129. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  130. $analysisfeature_id = $analysisfeature->id;
  131. }
  132. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  133. // Get the higest rank for this feature_id in analysisfeatureprop table.
  134. // If the value of the inserting content is not duplicate, add it to
  135. // analysisfeaturepro with 'higest_rank + 1'
  136. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  137. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  138. "WHERE feature_id=%d ".
  139. "AND analysis_id=%d ".
  140. "AND type_id=%d ";
  141. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  142. $hi_rank = 0;
  143. if ($afp) {
  144. $hi_rank = $afp->max + 1;
  145. }
  146. //------------------------------------------------------------
  147. // Insert interpro html tags into analysisfeatureprop table
  148. //------------------------------------------------------------
  149. // Before inserting, make sure it's not a duplicate
  150. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  151. $result = db_query($sql, $analysisfeature_id, $type_id);
  152. $duplicate = 0;
  153. while ($afp_value = db_fetch_object($result)) {
  154. if ($table_txt == $afp_value->value) {
  155. $duplicate = 1;
  156. }
  157. }
  158. if (!$duplicate) {
  159. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  160. "VALUES (%d, %d, '%s', %d)";
  161. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  162. fwrite($log, " (Insert)\n"); // write to log
  163. print "\twriting table\n";
  164. } else {
  165. fwrite($log, " (Skipped)\n");
  166. print "\tskipping table - dup\n";
  167. }
  168. // Parse GO terms. Make sure GO database schema is installed in chado
  169. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  170. if (!$go_db_id) {
  171. print 'GO schema not installed in chado. GO terms are not processed.';
  172. }
  173. if ($go_db_id && $parsego) {
  174. $trs = $table->children();
  175. foreach ($trs as $tr) {
  176. $tds = $tr->children();
  177. foreach($tds as $td) {
  178. $gotags = $td->children();
  179. foreach ($gotags as $gotag) {
  180. // Look for 'GO:accession#'
  181. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  182. // Find cvterm_id for the matched GO term
  183. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  184. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  185. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  186. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  187. //-------------------------------------------
  188. // Insert GO terms into feature_cvterm table
  189. //-------------------------------------------
  190. // Default pub_id = 1 (NULL) was used
  191. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  192. VALUES (%d, %d, 1)";
  193. db_query($sql, $feature_id, $goterm_id);
  194. //------------------------------------------------
  195. // Insert GO terms into analysisfeatureprop table
  196. //------------------------------------------------
  197. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  198. "VALUES (%d, %d, '%s', 0)";
  199. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  200. }
  201. }
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. tripal_db_set_active ($previous_db); // Use drupal database
  209. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  210. fwrite($log, "\n");
  211. fclose($log);
  212. return;
  213. }
  214. /**
  215. *
  216. */
  217. function tripal_analysis_interpro_parseXMLFile ($analysis_id, $interproxmlfile,
  218. $parsego, $query_re, $query_type, $query_uniquename, $job_id)
  219. {
  220. // clear out the anslysisfeature table for this analysis before getting started
  221. tripal_core_chado_delete('analysisfeature',array('analysis_id' => $analysis_id));
  222. // If user input a file (e.g. blast.xml)
  223. if (is_file($interproxmlfile)) {
  224. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  225. $parsego, $query_re, $query_type, $query_uniquename, $job_id);
  226. }
  227. else {
  228. $dir_handle = @opendir($interproxmlfile) or die("Unable to open $interproxmlfile");
  229. $pattern = sql_regcase($interproxmlfile . "/*.xml");
  230. $total_files = count(glob($pattern));
  231. print "$total_files file(s) to be parsed.\n";
  232. $interval = intval($total_files * 0.01);
  233. if($interval == 0){
  234. $interval = 1;
  235. }
  236. $no_file = 0;
  237. // Parsing all files in the directory
  238. while ($file = readdir($dir_handle)) {
  239. if(preg_match("/^.*\.xml/i",$file)){
  240. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, "$interproxmlfile/$file",
  241. $parsego, $query_re, $query_type, $query_uniquename, $job_id,0);
  242. // Set job status
  243. if ($no_file % $interval == 0) {
  244. $percentage = (int) (($no_file / $total_files) * 100);
  245. tripal_job_set_progress($job_id, $percentage);
  246. print $percentage."% ";
  247. }
  248. }
  249. $no_file ++;
  250. }
  251. }
  252. print "Done.";
  253. }
  254. /**
  255. *
  256. */
  257. function tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  258. $parsego, $query_re, $query_type, $query_uniquename, $job_id,$uptate_status = 1)
  259. {
  260. // Parsing started
  261. print "Parsing File:".$interproxmlfile." ...\n";
  262. // Get cvterm_id for 'analysis_interpro_xmloutput_hits' which is required
  263. // for inserting into the analysisfeatureprop table
  264. $previous_db = db_set_active('chado'); // use chado database
  265. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  266. " INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  267. "WHERE CVT.name = 'analysis_interpro_xmloutput_hit' ".
  268. " AND CV.name = 'tripal'";
  269. $type_id = db_result(db_query($sql));
  270. // Load the XML file
  271. $xml = simplexml_load_file($interproxmlfile);
  272. // If there is an EBI header then we need to skip that
  273. // and set our proteins array to be the second element of the array. This
  274. // occurs if results were generated with the online InterProScan tool.
  275. // if the XML starts in with the results then this happens when InterProScan
  276. // is used command-line and we can just use the object as is
  277. if(preg_match('/^EBIInterProScanResults/',$xml->getname())){
  278. $children = $xml->children();
  279. $header = $children[0];
  280. $proteins = $children[1];
  281. }
  282. // if the XML starts with the <interpro_matches> tag
  283. elseif(preg_match('/^interpro_matches/',$xml->getname())) {
  284. $proteins = $xml;
  285. }
  286. else {
  287. print "ERROR: cannot parse XML file format is not recognized\n";
  288. return;
  289. }
  290. // Count the number of entires to be processed
  291. $no_iterations = 0;
  292. foreach($proteins as $protein) {
  293. $no_iterations ++;
  294. }
  295. print " Found results for $no_iterations sequences\n";
  296. $interval = intval($no_iterations * 0.01);
  297. if($interval == 0){
  298. $interval = 1;
  299. }
  300. $idx_iterations = 0;
  301. // get the DB id for the GO database
  302. $parsego = tripal_analysis_get_property($analysis_id,'analysis_interpro_parsego');
  303. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  304. if ($parsego and !$go_db_id) {
  305. print 'GO schema not installed in chado. GO terms are not processed.';;
  306. }
  307. // Processed each protein
  308. foreach ($proteins as $protein) {
  309. // Set job status
  310. $idx_iterations ++;
  311. if ($idx_iterations % $interval == 0 and $update_status) {
  312. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  313. db_set_active($previous_db);
  314. tripal_job_set_progress($job_id, $percentage);
  315. $previous_db = db_set_active('chado');
  316. print $percentage."% ";
  317. }
  318. // match the protein id with the feature name
  319. $feature_id = 0;
  320. $attr = $protein->attributes();
  321. $seqname =$attr ['id'];
  322. // is the sequence name a generic name (i.e. 'Sequence_1') then the
  323. // blast results do not contain the original sequence names. The only
  324. // option we have is to use the filename. This will work in the case of
  325. // Blast2GO which stores the XML for each sequence in a file with the
  326. // the filename the name of the sequence
  327. if(preg_match('/Sequence_\d+/',$seqname)){
  328. $filename = preg_replace('/^.*\/(.*).xml$/', '$1', $interproxmlfile);
  329. print " Sequence name is not specific, using filename: $filename\n";
  330. $seqname = $filename;
  331. }
  332. // Remove _ORF from the sequence name
  333. $seqname = preg_replace('/^(.+)_\d+_ORF\d+.*/', '$1', $seqname);
  334. // if a regular expression is provided then pick out the portion requested
  335. if ($query_re and preg_match("/$query_re/", $seqname, $matches)) {
  336. $feature = $matches[1];
  337. }
  338. // If no match by the regular expression then get everything up to the first space
  339. else {
  340. if (preg_match('/^(.*?)\s.*$/', $seqname, $matches)) {
  341. $feature = $matches[1];
  342. }
  343. // if no match up to the first space then just use the entire string
  344. else {
  345. $feature = $seqname;
  346. }
  347. }
  348. if(!$feature and $query_re){
  349. print "Failed: Cannot find feature for '$seqname' using the regular expression: $query_re\n";
  350. continue;
  351. }
  352. // now find the feature in chado
  353. $select = array();
  354. if($query_uniquename){
  355. $select['uniquename'] = $feature;
  356. } else {
  357. $select['name'] = $feature;
  358. }
  359. if($query_type){
  360. $select['type_id'] = array(
  361. 'cv_id' => array(
  362. 'name' => 'sequence'
  363. ),
  364. 'name' => $query_type,
  365. );
  366. }
  367. $feature_arr = tripal_core_chado_select('feature',array('feature_id'),$select);
  368. if(count($feature_arr) > 1){
  369. print "Ambiguous: '$feature' matches more than one feature and is being skipped.\n";
  370. continue;
  371. }
  372. if(count($feature_arr) == 0){
  373. print "Failed: cannot find a matching feature for '$feature' in the database.\n";
  374. continue;
  375. }
  376. $feature_id = $feature_arr[0]->feature_id;
  377. // Successfully matched. print 'Succeeded'. Add analysis_id and
  378. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  379. if ($feature_id) {
  380. print " Adding InterPro results for feature '$seqname' ($feature_id)\n";
  381. // Insert into analysisfeature table only if it doesn't already exist
  382. $values = array('feature_id' => $feature_id, 'analysis_id' => $analysis_id);
  383. $analysisfeature = tripal_core_chado_select('analysisfeature',array('*'),$values);
  384. if(sizeof($analysisfeature) == 0){
  385. $analysisfeature = tripal_core_chado_insert('analysisfeature',$values);
  386. $analysisfeature_id = $analysisfeature['analysisfeature_id'];
  387. } else {
  388. $analysisfeature_id = $analysisfeature[0]->analysisfeature_id;
  389. }
  390. // Insert interpro xml results into analysisfeatureprop table
  391. // Check to see if we have an existing entry
  392. $sql = "SELECT analysisfeatureprop_id,rank
  393. FROM {analysisfeatureprop}
  394. WHERE analysisfeature_id = %d AND type_id = %d
  395. ORDER BY rank DESC";
  396. $result = db_fetch_object(db_query($sql, $analysisfeature_id, $type_id));
  397. $rank = 0;
  398. if($result){
  399. $afp_id = $result->analysisfeatureprop_id;
  400. $rank = $result->rank + 1;
  401. }
  402. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  403. "VALUES (%d, %d, '%s', %d)";
  404. db_query($sql, $analysisfeature_id, $type_id, $protein->asXML(), $rank);
  405. // parse the XML for each protein if GO terms are requested
  406. if($parsego and $go_db_id){
  407. $protein = tripal_analysis_interpro_get_result_object($protein->asXML(),$feature_id);
  408. $goterms = $protein['goterms'];
  409. // cycle through the GO terms and add them to the database
  410. foreach($goterms as $goterm){
  411. // seperate the 'GO:' from the term
  412. if (preg_match("/^.*?GO:(\d+).*$/", $goterm, $matches)) {
  413. // Find cvterm_id for the matched GO term
  414. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  415. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  416. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  417. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  418. // Insert GO terms into feature_cvterm table
  419. // Default pub_id = 1 (NULL) was used
  420. $values = array('feature_id' => $feature_id, 'cvterm_id' => $goterm_id, 'pub_id' => 1);
  421. $feature_cvterm = tripal_core_chado_select('feature_cvterm',array('*'),$values);
  422. if(sizeof($feature_cvterm) == 0){
  423. $feature_cvterm = tripal_core_chado_insert('feature_cvterm',$values);
  424. }
  425. // Insert GO terms into analysisfeatureprop table
  426. $values = array('analysisfeature_id' => $analysisfeature_id,
  427. 'type_id' => $goterm_id,
  428. 'rank' => 0);
  429. $analysisfeatureprop = tripal_core_chado_select('analysisfeatureprop',array('*'),$values);
  430. if(sizeof($analysisfeatureprop) == 0){
  431. $values['value'] = $matches[1];
  432. $analysisfeatureprop = tripal_core_chado_insert('analysisfeatureprop',$values);
  433. }
  434. } // end if preg_match
  435. } // end for each goterm
  436. } // end if($parsego and $go_db_id)
  437. } // end if($feature_id)
  438. } // end foreach ($proteins as $protein)
  439. db_set_active ($previous_db); // Use drupal database
  440. return;
  441. }
  442. /********************************************************************************
  443. *
  444. */
  445. function tripal_analysis_interpro_get_result_object($interpro_xml,$feature_id){
  446. // Load the XML into an object
  447. $xmlObj = simplexml_load_string($interpro_xml);
  448. // iterate through each interpro results for this protein
  449. $results = array();
  450. $terms = array();
  451. $protein = array();
  452. $iprterms = array();
  453. $goterms = array();
  454. $term_count = 0;
  455. $match_count = 0;
  456. // get the properties of this result
  457. $attr = $xmlObj->attributes();
  458. $protein['orf_id'] = (string) $attr["id"];
  459. $protein['orf_length'] = (string) $attr["length"];
  460. $protein['orf_crc64'] = (string) $attr["crc64"];
  461. foreach($xmlObj->children() as $intepro){
  462. // get the interpro term for this match
  463. $attr = $intepro->attributes();
  464. $terms[$term_count]['ipr_id'] = (string) $attr["id"];
  465. $terms[$term_count]['ipr_name'] = (string) $attr["name"];
  466. $terms[$term_count]['ipr_type'] = (string) $attr["type"];
  467. $iprterms[] = array($terms[$term_count]['ipr_id'],$terms[$term_count]['ipr_name']);
  468. // iterate through the elements of the interpro result
  469. $matches[$term_count]['matches'] = array();
  470. $match_count = 0;
  471. foreach($intepro->children() as $level1){
  472. $element_name = $level1->getName();
  473. if($element_name == 'match'){
  474. // get the match name for this match
  475. $attr = $level1->attributes();
  476. $terms[$term_count]['matches'][$match_count]['match_id'] = (string) $attr["id"];
  477. $terms[$term_count]['matches'][$match_count]['match_name'] = (string) $attr["name"];
  478. $terms[$term_count]['matches'][$match_count]['match_dbname'] = (string) $attr["dbname"];
  479. // get the location information for this match
  480. $loc_count = 0;
  481. foreach($level1->children() as $level2){
  482. $element_name = $level2->getName();
  483. if($element_name == 'location'){
  484. $attr = $level2->attributes();
  485. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_start'] = (string) $attr["start"];
  486. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_end'] = (string) $attr["end"];
  487. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_score'] = (string) $attr["score"];
  488. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_status'] = (string) $attr["status"];
  489. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_evidence'] = (string) $attr["evidence"];
  490. $loc_count++;
  491. }
  492. }
  493. $match_count++;
  494. }
  495. if($element_name == 'classification'){
  496. $attr = $level1->attributes();
  497. if($attr['class_type'] == 'GO'){
  498. $terms[$term_count]['matches'][$match_count]['go_terms'][] = (string) $attr['id'];
  499. $goterms[] = (string) $attr['id'];
  500. }
  501. }
  502. }
  503. $term_count++;
  504. }
  505. $results['terms'] = $terms;
  506. $results['orf'] = $protein;
  507. $results['iprterms'] = $iprterms;
  508. $results['goterms'] = $goterms;
  509. return $results;
  510. }