parseInterpro.inc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. <?php
  2. /*******************************************************************************
  3. * Parse Interpro HTML Output file into analysisfeatureprop table
  4. */
  5. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  6. // Prepare log
  7. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  8. $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log";
  9. $log = fopen($logfile, 'a'); // append parsing results to log file
  10. // Parsing started
  11. print "Parsing File:".$interprofile." ...\n";
  12. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  13. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  14. // for inserting into the analysisfeatureprop table
  15. $previous_db = tripal_db_set_active('chado'); // use chado database
  16. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  17. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  18. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  19. "AND CV.name = 'tripal'";
  20. $type_id = db_result(db_query($sql));
  21. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  22. // Load the HTML file and convert it into XML for loading
  23. $dom = new domDocument;
  24. $dom->loadHTMLFile($interprofile);
  25. $xml = $dom->saveXML();
  26. $interproput = simplexml_load_string($xml);
  27. // Get html tables for parsing
  28. $tables = $interproput->children()->children();
  29. // Count the number of tables to be processed
  30. $no_iterations = 0;
  31. foreach($tables as $tmp) {
  32. if ($tmp->getName() == 'table') {
  33. $no_iterations ++;
  34. }
  35. }
  36. print "$no_iterations html tables to be processed.\n";
  37. $interval = intval($no_iterations * 0.01);
  38. $idx_iterations = 0;
  39. // Processed the tables
  40. foreach ($tables as $table) {
  41. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  42. //print "skipping this table b/c no hits are reported\n";
  43. //}
  44. // make sure we are looking at a table and its not an empty table
  45. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  46. $idx_iterations ++;
  47. if ($idx_iterations % $interval == 0) {
  48. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  49. tripal_db_set_active($previous_db);
  50. tripal_job_set_progress($job_id, $percentage);
  51. $previous_db = tripal_db_set_active('chado');
  52. print $percentage."% ";
  53. }
  54. // Set job status
  55. // Get the first row and match its name with the feature name
  56. $firsttd = $table->children()->children()->children();
  57. $feature_id = 0;
  58. foreach($firsttd as $b) {
  59. foreach($b->children() as $a) {
  60. if ($a->getName() == 'a') {
  61. // Remove _ORF from the sequence name
  62. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  63. print "seqname is $seqname\n";
  64. // Find out how many features match this uniquename
  65. $sql = "SELECT count(feature_id) FROM {feature} ".
  66. "WHERE uniquename = '%s' ";
  67. $no_features = db_result(db_query($sql, $seqname));
  68. // If there is only one match, get the feature_id
  69. if ($no_features == 1) {
  70. $sql = "SELECT feature_id FROM {feature} ".
  71. "WHERE uniquename = '%s' ";
  72. $feature_id = db_result(db_query($sql, $seqname));
  73. print "\tfeature id is $feature_id\n";
  74. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  75. } else if ($no_features > 1) {
  76. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  77. continue;
  78. // If the uniquename did not match, skip and print 'Failed'
  79. } else {
  80. fwrite($log, "Failed: ".$seqname."\n");
  81. }
  82. }
  83. }
  84. }
  85. // Successfully matched. print 'Succeeded'. Add analysis_id and
  86. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  87. if ($feature_id) {
  88. //------------------------------------
  89. // Clease unwanted rows from the table
  90. //------------------------------------
  91. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  92. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  93. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  94. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  95. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  96. $table_txt = $table->asXML();
  97. $table_txt = preg_replace($parent_row, "", $table_txt);
  98. $table_txt = preg_replace($children_row, "", $table_txt);
  99. $table_txt = preg_replace($found_row, "", $table_txt);
  100. $table_txt = preg_replace($contains_row, "", $table_txt);
  101. $table_txt = preg_replace($go_row, "", $table_txt);
  102. //------------------------------------
  103. // Clease unwanted ORF link from table
  104. //------------------------------------
  105. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  106. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  107. //print "----------------------------\n";
  108. //print "old: ".$table->asXML()."\n\n\n";
  109. //print "----------------------------\n";
  110. //print "Fixed: $table_txt\n";
  111. //print "----------------------------\n";
  112. //------------------------------------
  113. // If this feature has already been associated with this analysis, do not reinsert
  114. // Otherwise, Insert into analysisfeature table
  115. //------------------------------------
  116. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  117. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  118. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  119. if(!$analysisfeature_id){
  120. print "inserting analysisfeature\n";
  121. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  122. "VALUES (%d, %d)";
  123. db_query ($sql, $feature_id, $analysis_id);
  124. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  125. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  126. $analysisfeature_id = $analysisfeature->id;
  127. }
  128. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  129. // Get the higest rank for this feature_id in analysisfeatureprop table.
  130. // If the value of the inserting content is not duplicate, add it to
  131. // analysisfeaturepro with 'higest_rank + 1'
  132. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  133. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  134. "WHERE feature_id=%d ".
  135. "AND analysis_id=%d ".
  136. "AND type_id=%d ";
  137. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  138. $hi_rank = 0;
  139. if ($afp) {
  140. $hi_rank = $afp->max + 1;
  141. }
  142. //------------------------------------------------------------
  143. // Insert interpro html tags into analysisfeatureprop table
  144. //------------------------------------------------------------
  145. // Before inserting, make sure it's not a duplicate
  146. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  147. $result = db_query($sql, $analysisfeature_id, $type_id);
  148. $duplicate = 0;
  149. while ($afp_value = db_fetch_object($result)) {
  150. if ($table_txt == $afp_value->value) {
  151. $duplicate = 1;
  152. }
  153. }
  154. if (!$duplicate) {
  155. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  156. "VALUES (%d, %d, '%s', %d)";
  157. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  158. fwrite($log, " (Insert)\n"); // write to log
  159. print "\twriting table\n";
  160. } else {
  161. fwrite($log, " (Skipped)\n");
  162. print "\tskipping table - dup\n";
  163. }
  164. // Parse GO terms. Make sure GO database schema is installed in chado
  165. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  166. if (!$go_db_id) {
  167. print 'GO schema not installed in chado. GO terms are not processed.';
  168. }
  169. if ($go_db_id && $parsego) {
  170. $trs = $table->children();
  171. foreach ($trs as $tr) {
  172. $tds = $tr->children();
  173. foreach($tds as $td) {
  174. $gotags = $td->children();
  175. foreach ($gotags as $gotag) {
  176. // Look for 'GO:accession#'
  177. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  178. // Find cvterm_id for the matched GO term
  179. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  180. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  181. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  182. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  183. //-------------------------------------------
  184. // Insert GO terms into feature_cvterm table
  185. //-------------------------------------------
  186. // Default pub_id = 1 (NULL) was used
  187. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  188. VALUES (%d, %d, 1)";
  189. db_query($sql, $feature_id, $goterm_id);
  190. //------------------------------------------------
  191. // Insert GO terms into analysisfeatureprop table
  192. //------------------------------------------------
  193. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  194. "VALUES (%d, %d, '%s', 0)";
  195. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  196. }
  197. }
  198. }
  199. }
  200. }
  201. }
  202. }
  203. }
  204. tripal_db_set_active ($previous_db); // Use drupal database
  205. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  206. fwrite($log, "\n");
  207. fclose($log);
  208. return;
  209. }
  210. /**
  211. *
  212. */
  213. function tripal_analysis_interpro_parseXMLFile ($analysis_id, $interproxmlfile,
  214. $parsego, $query_re, $query_type, $query_uniquename, $job_id)
  215. {
  216. // clear out the anslysisfeature table for this analysis before getting started
  217. tripal_core_chado_delete('analysisfeature',array('analysis_id' => $analysis_id));
  218. // If user input a file (e.g. blast.xml)
  219. if (is_file($interproxmlfile)) {
  220. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  221. $parsego, $query_re, $query_type, $query_uniquename, $job_id);
  222. }
  223. else {
  224. $dir_handle = @opendir($interproxmlfile) or die("Unable to open $interproxmlfile");
  225. $pattern = sql_regcase($interproxmlfile . "/*.xml");
  226. $total_files = count(glob($pattern));
  227. print "$total_files file(s) to be parsed.\n";
  228. $interval = intval($total_files * 0.01);
  229. if($interval == 0){
  230. $interval = 1;
  231. }
  232. $no_file = 0;
  233. // Parsing all files in the directory
  234. while ($file = readdir($dir_handle)) {
  235. if(preg_match("/^.*\.xml/i",$file)){
  236. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, "$interproxmlfile/$file",
  237. $parsego, $query_re, $query_type, $query_uniquename, $job_id,0);
  238. // Set job status
  239. if ($no_file % $interval == 0) {
  240. $percentage = (int) (($no_file / $total_files) * 100);
  241. tripal_job_set_progress($job_id, $percentage);
  242. print $percentage."% ";
  243. }
  244. }
  245. $no_file ++;
  246. }
  247. }
  248. print "Done.";
  249. }
  250. /**
  251. *
  252. */
  253. function tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  254. $parsego, $query_re, $query_type, $query_uniquename, $job_id,$uptate_status = 1)
  255. {
  256. // Parsing started
  257. print "Parsing File:".$interproxmlfile." ...\n";
  258. // Get cvterm_id for 'analysis_interpro_xmloutput_hits' which is required
  259. // for inserting into the analysisfeatureprop table
  260. $previous_db = db_set_active('chado'); // use chado database
  261. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  262. " INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  263. "WHERE CVT.name = 'analysis_interpro_xmloutput_hit' ".
  264. " AND CV.name = 'tripal'";
  265. $type_id = db_result(db_query($sql));
  266. // Load the XML file
  267. $xml = simplexml_load_file($interproxmlfile);
  268. // If there is an EBI header then we need to skip that
  269. // and set our proteins array to be the second element of the array. This
  270. // occurs if results were generated with the online InterProScan tool.
  271. // if the XML starts in with the results then this happens when InterProScan
  272. // is used command-line and we can just use the object as is
  273. if(preg_match('/^EBIInterProScanResults/',$xml->getname())){
  274. $children = $xml->children();
  275. $header = $children[0];
  276. $proteins = $children[1];
  277. }
  278. // if the XML starts with the <interpro_matches> tag
  279. elseif(preg_match('/^interpro_matches/',$xml->getname())) {
  280. $proteins = $xml;
  281. }
  282. else {
  283. print "ERROR: cannot parse XML file format is not recognized\n";
  284. return;
  285. }
  286. // Count the number of entires to be processed
  287. $no_iterations = 0;
  288. foreach($proteins as $protein) {
  289. $no_iterations ++;
  290. }
  291. print " Found results for $no_iterations sequences\n";
  292. $interval = intval($no_iterations * 0.01);
  293. if($interval == 0){
  294. $interval = 1;
  295. }
  296. $idx_iterations = 0;
  297. // get the DB id for the GO database
  298. $parsego = tripal_analysis_get_property($analysis_id,'analysis_interpro_parsego');
  299. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  300. if ($parsego and !$go_db_id) {
  301. print 'GO schema not installed in chado. GO terms are not processed.';;
  302. }
  303. // Processed each protein
  304. foreach ($proteins as $protein) {
  305. // Set job status
  306. $idx_iterations ++;
  307. if ($idx_iterations % $interval == 0 and $update_status) {
  308. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  309. db_set_active($previous_db);
  310. tripal_job_set_progress($job_id, $percentage);
  311. $previous_db = db_set_active('chado');
  312. print $percentage."% ";
  313. }
  314. // match the protein id with the feature name
  315. $feature_id = 0;
  316. $attr = $protein->attributes();
  317. $seqname =$attr ['id'];
  318. // is the sequence name a generic name (i.e. 'Sequence_1') then the
  319. // blast results do not contain the original sequence names. The only
  320. // option we have is to use the filename. This will work in the case of
  321. // Blast2GO which stores the XML for each sequence in a file with the
  322. // the filename the name of the sequence
  323. if(preg_match('/Sequence_\d+/',$seqname)){
  324. $filename = preg_replace('/^.*\/(.*).xml$/', '$1', $interproxmlfile);
  325. print " Sequence name is not specific, using filename: $filename\n";
  326. $seqname = $filename;
  327. }
  328. // Remove _ORF from the sequence name
  329. $seqname = preg_replace('/^(.+)_\d+_ORF\d+.*/', '$1', $seqname);
  330. // if a regular expression is provided then pick out the portion requested
  331. if ($query_re and preg_match("/$query_re/", $seqname, $matches)) {
  332. $feature = $matches[1];
  333. }
  334. // If no match by the regular expression then get everything up to the first space
  335. else {
  336. if (preg_match('/^(.*?)\s.*$/', $seqname, $matches)) {
  337. $feature = $matches[1];
  338. }
  339. // if no match up to the first space then just use the entire string
  340. else {
  341. $feature = $seqname;
  342. }
  343. }
  344. if(!$feature and $query_re){
  345. print "Failed: Cannot find feature for '$seqname' using the regular expression: $query_re\n";
  346. continue;
  347. }
  348. // now find the feature in chado
  349. $select = array();
  350. if($query_uniquename){
  351. $select['uniquename'] = $feature;
  352. } else {
  353. $select['name'] = $feature;
  354. }
  355. if($query_type){
  356. $select['type_id'] = array(
  357. 'cv_id' => array(
  358. 'name' => 'sequence'
  359. ),
  360. 'name' => $query_type,
  361. );
  362. }
  363. $feature_arr = tripal_core_chado_select('feature',array('feature_id'),$select);
  364. if(count($feature_arr) > 1){
  365. print "Ambiguous: '$feature' matches more than one feature and is being skipped.\n";
  366. continue;
  367. }
  368. if(count($feature_arr) == 0){
  369. print "Failed: cannot find a matching feature for '$feature' in the database.\n";
  370. continue;
  371. }
  372. $feature_id = $feature_arr[0]->feature_id;
  373. // Successfully matched. print 'Succeeded'. Add analysis_id and
  374. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  375. if ($feature_id) {
  376. print " Adding InterPro results for feature '$seqname' ($feature_id)\n";
  377. // Insert into analysisfeature table only if it doesn't already exist
  378. $values = array('feature_id' => $feature_id, 'analysis_id' => $analysis_id);
  379. $analysisfeature = tripal_core_chado_select('analysisfeature',array('*'),$values);
  380. if(sizeof($analysisfeature) == 0){
  381. $analysisfeature = tripal_core_chado_insert('analysisfeature',$values);
  382. $analysisfeature_id = $analysisfeature['analysisfeature_id'];
  383. } else {
  384. $analysisfeature_id = $analysisfeature[0]->analysisfeature_id;
  385. }
  386. // Insert interpro xml results into analysisfeatureprop table
  387. // Check to see if we have an existing entry
  388. $sql = "SELECT analysisfeatureprop_id,rank
  389. FROM {analysisfeatureprop}
  390. WHERE analysisfeature_id = %d AND type_id = %d
  391. ORDER BY rank DESC";
  392. $result = db_fetch_object(db_query($sql, $analysisfeature_id, $type_id));
  393. $rank = 0;
  394. if($result){
  395. $afp_id = $result->analysisfeatureprop_id;
  396. $rank = $result->rank + 1;
  397. }
  398. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  399. "VALUES (%d, %d, '%s', %d)";
  400. db_query($sql, $analysisfeature_id, $type_id, $protein->asXML(), $rank);
  401. // parse the XML for each protein if GO terms are requested
  402. if($parsego and $go_db_id){
  403. $protein = tripal_analysis_interpro_get_result_object($protein->asXML(),$feature_id);
  404. $goterms = $protein['goterms'];
  405. // cycle through the GO terms and add them to the database
  406. foreach($goterms as $goterm){
  407. // seperate the 'GO:' from the term
  408. if (preg_match("/^.*?GO:(\d+).*$/", $goterm, $matches)) {
  409. // Find cvterm_id for the matched GO term
  410. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  411. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  412. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  413. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  414. // Insert GO terms into feature_cvterm table
  415. // Default pub_id = 1 (NULL) was used
  416. $values = array('feature_id' => $feature_id, 'cvterm_id' => $goterm_id, 'pub_id' => 1);
  417. $feature_cvterm = tripal_core_chado_select('feature_cvterm',array('*'),$values);
  418. if(sizeof($feature_cvterm) == 0){
  419. $feature_cvterm = tripal_core_chado_insert('feature_cvterm',$values);
  420. }
  421. // Insert GO terms into analysisfeatureprop table
  422. $values = array('analysisfeature_id' => $analysisfeature_id,
  423. 'type_id' => $goterm_id,
  424. 'rank' => 0);
  425. $analysisfeatureprop = tripal_core_chado_select('analysisfeatureprop',array('*'),$values);
  426. if(sizeof($analysisfeatureprop) == 0){
  427. $values['value'] = $matches[1];
  428. $analysisfeatureprop = tripal_core_chado_insert('analysisfeatureprop',$values);
  429. }
  430. } // end if preg_match
  431. } // end for each goterm
  432. } // end if($parsego and $go_db_id)
  433. } // end if($feature_id)
  434. } // end foreach ($proteins as $protein)
  435. db_set_active ($previous_db); // Use drupal database
  436. return;
  437. }
  438. /********************************************************************************
  439. *
  440. */
  441. function tripal_analysis_interpro_get_result_object($interpro_xml,$feature_id){
  442. // Load the XML into an object
  443. $xmlObj = simplexml_load_string($interpro_xml);
  444. // iterate through each interpro results for this protein
  445. $results = array();
  446. $terms = array();
  447. $protein = array();
  448. $iprterms = array();
  449. $goterms = array();
  450. $term_count = 0;
  451. $match_count = 0;
  452. // get the properties of this result
  453. $attr = $xmlObj->attributes();
  454. $protein['orf_id'] = (string) $attr["id"];
  455. $protein['orf_length'] = (string) $attr["length"];
  456. $protein['orf_crc64'] = (string) $attr["crc64"];
  457. foreach($xmlObj->children() as $intepro){
  458. // get the interpro term for this match
  459. $attr = $intepro->attributes();
  460. $terms[$term_count]['ipr_id'] = (string) $attr["id"];
  461. $terms[$term_count]['ipr_name'] = (string) $attr["name"];
  462. $terms[$term_count]['ipr_type'] = (string) $attr["type"];
  463. $iprterms[] = array($terms[$term_count]['ipr_id'],$terms[$term_count]['ipr_name']);
  464. // iterate through the elements of the interpro result
  465. $matches[$term_count]['matches'] = array();
  466. $match_count = 0;
  467. foreach($intepro->children() as $level1){
  468. $element_name = $level1->getName();
  469. if($element_name == 'match'){
  470. // get the match name for this match
  471. $attr = $level1->attributes();
  472. $terms[$term_count]['matches'][$match_count]['match_id'] = (string) $attr["id"];
  473. $terms[$term_count]['matches'][$match_count]['match_name'] = (string) $attr["name"];
  474. $terms[$term_count]['matches'][$match_count]['match_dbname'] = (string) $attr["dbname"];
  475. // get the location information for this match
  476. $loc_count = 0;
  477. foreach($level1->children() as $level2){
  478. $element_name = $level2->getName();
  479. if($element_name == 'location'){
  480. $attr = $level2->attributes();
  481. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_start'] = (string) $attr["start"];
  482. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_end'] = (string) $attr["end"];
  483. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_score'] = (string) $attr["score"];
  484. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_status'] = (string) $attr["status"];
  485. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_evidence'] = (string) $attr["evidence"];
  486. $loc_count++;
  487. }
  488. }
  489. $match_count++;
  490. }
  491. if($element_name == 'classification'){
  492. $attr = $level1->attributes();
  493. if($attr['class_type'] == 'GO'){
  494. $terms[$term_count]['matches'][$match_count]['go_terms'][] = (string) $attr['id'];
  495. $goterms[] = (string) $attr['id'];
  496. }
  497. }
  498. }
  499. $term_count++;
  500. }
  501. $results['terms'] = $terms;
  502. $results['orf'] = $protein;
  503. $results['iprterms'] = $iprterms;
  504. $results['goterms'] = $goterms;
  505. return $results;
  506. }