parseInterpro.inc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. <?php
  2. /*******************************************************************************
  3. * Parse Interpro HTML Output file into analysisfeatureprop table
  4. */
  5. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  6. // Prepare log
  7. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  8. $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log";
  9. $log = fopen($logfile, 'a'); // append parsing results to log file
  10. // Parsing started
  11. print "Parsing File:".$interprofile." ...\n";
  12. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  13. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  14. // for inserting into the analysisfeatureprop table
  15. $previous_db = tripal_db_set_active('chado'); // use chado database
  16. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  17. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  18. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  19. "AND CV.name = 'tripal'";
  20. $type_id = db_result(db_query($sql));
  21. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  22. // Load the HTML file and convert it into XML for loading
  23. $dom = new domDocument;
  24. $dom->loadHTMLFile($interprofile);
  25. $xml = $dom->saveXML();
  26. $interproput = simplexml_load_string($xml);
  27. // Get html tables for parsing
  28. $tables = $interproput->children()->children();
  29. // Count the number of tables to be processed
  30. $no_iterations = 0;
  31. foreach($tables as $tmp) {
  32. if ($tmp->getName() == 'table') {
  33. $no_iterations ++;
  34. }
  35. }
  36. print "$no_iterations html tables to be processed.\n";
  37. $interval = intval($no_iterations * 0.01);
  38. $idx_iterations = 0;
  39. // Processed the tables
  40. foreach ($tables as $table) {
  41. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  42. //print "skipping this table b/c no hits are reported\n";
  43. //}
  44. // make sure we are looking at a table and its not an empty table
  45. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  46. $idx_iterations ++;
  47. if ($idx_iterations % $interval == 0) {
  48. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  49. tripal_db_set_active($previous_db);
  50. tripal_job_set_progress($job_id, $percentage);
  51. $previous_db = tripal_db_set_active('chado');
  52. print $percentage."% ";
  53. }
  54. // Set job status
  55. // Get the first row and match its name with the feature name
  56. $firsttd = $table->children()->children()->children();
  57. $feature_id = 0;
  58. foreach($firsttd as $b) {
  59. foreach($b->children() as $a) {
  60. if ($a->getName() == 'a') {
  61. // Remove _ORF from the sequence name
  62. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  63. print "seqname is $seqname\n";
  64. // Find out how many features match this uniquename
  65. $sql = "SELECT count(feature_id) FROM {feature} ".
  66. "WHERE uniquename = '%s' ";
  67. $no_features = db_result(db_query($sql, $seqname));
  68. // If there is only one match, get the feature_id
  69. if ($no_features == 1) {
  70. $sql = "SELECT feature_id FROM {feature} ".
  71. "WHERE uniquename = '%s' ";
  72. $feature_id = db_result(db_query($sql, $seqname));
  73. print "\tfeature id is $feature_id\n";
  74. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  75. } else if ($no_features > 1) {
  76. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  77. continue;
  78. // If the uniquename did not match, skip and print 'Failed'
  79. } else {
  80. fwrite($log, "Failed: ".$seqname."\n");
  81. }
  82. }
  83. }
  84. }
  85. // Successfully matched. print 'Succeeded'. Add analysis_id and
  86. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  87. if ($feature_id) {
  88. //------------------------------------
  89. // Clease unwanted rows from the table
  90. //------------------------------------
  91. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  92. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  93. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  94. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  95. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  96. $table_txt = $table->asXML();
  97. $table_txt = preg_replace($parent_row, "", $table_txt);
  98. $table_txt = preg_replace($children_row, "", $table_txt);
  99. $table_txt = preg_replace($found_row, "", $table_txt);
  100. $table_txt = preg_replace($contains_row, "", $table_txt);
  101. $table_txt = preg_replace($go_row, "", $table_txt);
  102. //------------------------------------
  103. // Clease unwanted ORF link from table
  104. //------------------------------------
  105. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  106. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  107. //print "----------------------------\n";
  108. //print "old: ".$table->asXML()."\n\n\n";
  109. //print "----------------------------\n";
  110. //print "Fixed: $table_txt\n";
  111. //print "----------------------------\n";
  112. //------------------------------------
  113. // If this feature has already been associated with this analysis, do not reinsert
  114. // Otherwise, Insert into analysisfeature table
  115. //------------------------------------
  116. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  117. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  118. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  119. if(!$analysisfeature_id){
  120. print "inserting analysisfeature\n";
  121. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  122. "VALUES (%d, %d)";
  123. db_query ($sql, $feature_id, $analysis_id);
  124. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  125. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  126. $analysisfeature_id = $analysisfeature->id;
  127. }
  128. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  129. // Get the higest rank for this feature_id in analysisfeatureprop table.
  130. // If the value of the inserting content is not duplicate, add it to
  131. // analysisfeaturepro with 'higest_rank + 1'
  132. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  133. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  134. "WHERE feature_id=%d ".
  135. "AND analysis_id=%d ".
  136. "AND type_id=%d ";
  137. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  138. $hi_rank = 0;
  139. if ($afp) {
  140. $hi_rank = $afp->max + 1;
  141. }
  142. //------------------------------------------------------------
  143. // Insert interpro html tags into analysisfeatureprop table
  144. //------------------------------------------------------------
  145. // Before inserting, make sure it's not a duplicate
  146. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  147. $result = db_query($sql, $analysisfeature_id, $type_id);
  148. $duplicate = 0;
  149. while ($afp_value = db_fetch_object($result)) {
  150. if ($table_txt == $afp_value->value) {
  151. $duplicate = 1;
  152. }
  153. }
  154. if (!$duplicate) {
  155. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  156. "VALUES (%d, %d, '%s', %d)";
  157. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  158. fwrite($log, " (Insert)\n"); // write to log
  159. print "\twriting table\n";
  160. } else {
  161. fwrite($log, " (Skipped)\n");
  162. print "\tskipping table - dup\n";
  163. }
  164. // Parse GO terms. Make sure GO database schema is installed in chado
  165. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  166. if (!$go_db_id) {
  167. print 'GO schema not installed in chado. GO terms are not processed.';
  168. }
  169. if ($go_db_id && $parsego) {
  170. $trs = $table->children();
  171. foreach ($trs as $tr) {
  172. $tds = $tr->children();
  173. foreach($tds as $td) {
  174. $gotags = $td->children();
  175. foreach ($gotags as $gotag) {
  176. // Look for 'GO:accession#'
  177. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  178. // Find cvterm_id for the matched GO term
  179. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  180. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  181. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  182. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  183. //-------------------------------------------
  184. // Insert GO terms into feature_cvterm table
  185. //-------------------------------------------
  186. // Default pub_id = 1 (NULL) was used
  187. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  188. VALUES (%d, %d, 1)";
  189. db_query($sql, $feature_id, $goterm_id);
  190. //------------------------------------------------
  191. // Insert GO terms into analysisfeatureprop table
  192. //------------------------------------------------
  193. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  194. "VALUES (%d, %d, '%s', 0)";
  195. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  196. }
  197. }
  198. }
  199. }
  200. }
  201. }
  202. }
  203. }
  204. tripal_db_set_active ($previous_db); // Use drupal database
  205. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  206. fwrite($log, "\n");
  207. fclose($log);
  208. return;
  209. }
  210. /**
  211. *
  212. */
  213. function tripal_analysis_interpro_parseXMLFile ($analysis_id, $interproxmlfile,
  214. $parsego, $query_re, $query_type, $query_uniquename, $job_id)
  215. {
  216. // clear out the anslysisfeature table for this analysis before getting started
  217. tripal_core_chado_delete('analysisfeature',array('analysis_id' => $analysis_id));
  218. // If user input a file (e.g. blast.xml)
  219. if (is_file($interproxmlfile)) {
  220. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  221. $parsego, $query_re, $query_type, $query_uniquename, $job_id);
  222. }
  223. else {
  224. $dir_handle = @opendir($interproxmlfile) or die("Unable to open $interproxmlfile");
  225. $pattern = sql_regcase($interproxmlfile . "/*.xml");
  226. $total_files = count(glob($pattern));
  227. print "$total_files file(s) to be parsed.\n";
  228. $interval = intval($total_files * 0.01);
  229. if($interval == 0){
  230. $interval = 1;
  231. }
  232. $no_file = 0;
  233. // Parsing all files in the directory
  234. while ($file = readdir($dir_handle)) {
  235. if(preg_match("/^.*\.xml/i",$file)){
  236. tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, "$interproxmlfile/$file",
  237. $parsego, $query_re, $query_type, $query_uniquename, $job_id,0);
  238. // Set job status
  239. if ($no_file % $interval == 0) {
  240. $percentage = (int) (($no_file / $total_files) * 100);
  241. tripal_job_set_progress($job_id, $percentage);
  242. print $percentage."% ";
  243. }
  244. }
  245. $no_file ++;
  246. }
  247. }
  248. print "Done.";
  249. }
  250. /**
  251. *
  252. */
  253. function tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxmlfile,
  254. $parsego, $query_re, $query_type, $query_uniquename, $job_id,$uptate_status = 1)
  255. {
  256. // Parsing started
  257. print "Parsing File:".$interproxmlfile." ...\n";
  258. // Get cvterm_id for 'analysis_interpro_xmloutput_hits' which is required
  259. // for inserting into the analysisfeatureprop table
  260. $previous_db = db_set_active('chado'); // use chado database
  261. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  262. " INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  263. "WHERE CVT.name = 'analysis_interpro_xmloutput_hit' ".
  264. " AND CV.name = 'tripal'";
  265. $type_id = db_result(db_query($sql));
  266. // Load the XML file
  267. $interproput = simplexml_load_file($interproxmlfile);
  268. // Get entries parsing
  269. $xml = $interproput->children();
  270. // If there is an EBI header then we need to skip that
  271. // and set our proteins array to be the second element of the array. This
  272. // occurs if results were generated with the online InterProScan tool.
  273. // if the XML starts in with the results then this happens when InterProScan
  274. // is used command-line and we can just use the object as is
  275. if(preg_match('/^Header$/',$xml[0]->getname())){
  276. $proteins = $xml[1];
  277. } else {
  278. $proteins = $xml[0];
  279. }
  280. // Count the number of entires to be processed
  281. $no_iterations = 0;
  282. foreach($proteins as $protein) {
  283. $no_iterations ++;
  284. }
  285. print " Found results for $no_iterations sequences\n";
  286. $interval = intval($no_iterations * 0.01);
  287. $idx_iterations = 0;
  288. // get the DB id for the GO database
  289. $parsego = tripal_analysis_get_property($analysis_id,'analysis_interpro_parsego');
  290. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  291. if ($parsego and !$go_db_id) {
  292. print 'GO schema not installed in chado. GO terms are not processed.';;
  293. }
  294. // Processed each protein
  295. foreach ($proteins as $protein) {
  296. // Set job status
  297. $idx_iterations ++;
  298. if ($idx_iterations % $interval == 0 and $update_status) {
  299. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  300. db_set_active($previous_db);
  301. tripal_job_set_progress($job_id, $percentage);
  302. $previous_db = db_set_active('chado');
  303. print $percentage."% ";
  304. }
  305. // match the protein id with the feature name
  306. $feature_id = 0;
  307. $attr = $protein->attributes();
  308. $seqname =$attr ['id'];
  309. // is the sequence name a generic name (i.e. 'Sequence_1') then the
  310. // blast results do not contain the original sequence names. The only
  311. // option we have is to use the filename. This will work in the case of
  312. // Blast2GO which stores the XML for each sequence in a file with the
  313. // the filename the name of the sequence
  314. if(preg_match('/Sequence_\d+/',$seqname)){
  315. $filename = preg_replace('/^.*\/(.*).xml$/', '$1', $interproxmlfile);
  316. print " Sequence name is not specific, using filename: $filename\n";
  317. $seqname = $filename;
  318. }
  319. // Remove _ORF from the sequence name
  320. $seqname = preg_replace('/^(.+)_\d+_ORF\d+.*/', '$1', $seqname);
  321. // if a regular expression is provided then pick out the portion requested
  322. if ($query_re and preg_match("/$query_re/", $seqname, $matches)) {
  323. $feature = $matches[1];
  324. }
  325. // If no match by the regular expression then get everything up to the first space
  326. else {
  327. if (preg_match('/^(.*?)\s.*$/', $seqname, $matches)) {
  328. $feature = $matches[1];
  329. }
  330. // if no match up to the first space then just use the entire string
  331. else {
  332. $feature = $seqname;
  333. }
  334. }
  335. if(!$feature and $query_re){
  336. print "Failed: Cannot find feature for '$seqname' using the regular expression: $query_re\n";
  337. continue;
  338. }
  339. // now find the feature in chado
  340. $select = array();
  341. if($query_uniquename){
  342. $select['uniquename'] = $feature;
  343. } else {
  344. $select['name'] = $feature;
  345. }
  346. if($query_type){
  347. $select['type_id'] = array(
  348. 'cv_id' => array(
  349. 'name' => 'sequence'
  350. ),
  351. 'name' => $query_type,
  352. );
  353. }
  354. $feature_arr = tripal_core_chado_select('feature',array('feature_id'),$select);
  355. if(count($feature_arr) > 1){
  356. print "Ambiguous: '$feature' matches more than one feature and is being skipped.\n";
  357. continue;
  358. }
  359. if(count($feature_arr) == 0){
  360. print "Failed: '$feature' cannot find a matching feature in the database.\n";
  361. continue;
  362. }
  363. $feature_id = $feature_arr[0]->feature_id;
  364. // Successfully matched. print 'Succeeded'. Add analysis_id and
  365. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  366. if ($feature_id) {
  367. print " Adding InterPro results for feature '$seqname' ($feature_id)\n";
  368. // Insert into analysisfeature table
  369. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  370. "VALUES (%d, %d)";
  371. db_query ($sql, $feature_id, $analysis_id);
  372. // Get the analysisfeature_id
  373. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  374. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  375. // Insert interpro xml results into analysisfeatureprop table
  376. // Check to see if we have an existing entry
  377. $sql = "SELECT analysisfeatureprop_id,rank
  378. FROM {analysisfeatureprop}
  379. WHERE analysisfeature_id = %d AND type_id = %d
  380. ORDER BY rank DESC";
  381. $result = db_fetch_object(db_query($sql, $analysisfeature_id, $type_id));
  382. $rank = 0;
  383. if($result){
  384. $afp_id = $result->analysisfeatureprop_id;
  385. $rank = $result->rank + 1;
  386. }
  387. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  388. "VALUES (%d, %d, '%s', %d)";
  389. db_query($sql, $analysisfeature_id, $type_id, $protein->asXML(), $rank);
  390. // parse the XML for each protein if GO terms are requested
  391. if($parsego and $go_db_id){
  392. $protein = tripal_analysis_interpro_get_result_object($protein->asXML(),$feature_id);
  393. $goterms = $protein['goterms'];
  394. // cycle through the GO terms and add them to the database
  395. foreach($goterms as $goterm){
  396. // seperate the 'GO:' from the term
  397. if (preg_match("/^.*?GO:(\d+).*$/", $goterm, $matches)) {
  398. // Find cvterm_id for the matched GO term
  399. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  400. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  401. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  402. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  403. // Insert GO terms into feature_cvterm table
  404. // Default pub_id = 1 (NULL) was used
  405. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  406. VALUES (%d, %d, 1)";
  407. db_query($sql, $feature_id, $goterm_id);
  408. // Insert GO terms into analysisfeatureprop table
  409. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  410. "VALUES (%d, %d, '%s', 0)";
  411. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  412. } // end if preg_match
  413. } // end for each goterm
  414. } // end if($parsego and $go_db_id)
  415. } // end if($feature_id)
  416. } // end foreach ($proteins as $protein)
  417. db_set_active ($previous_db); // Use drupal database
  418. return;
  419. }
  420. /********************************************************************************
  421. *
  422. */
  423. function tripal_analysis_interpro_get_result_object($interpro_xml,$feature_id){
  424. // Load the XML into an object
  425. $xmlObj = simplexml_load_string($interpro_xml);
  426. // iterate through each interpro results for this protein
  427. $results = array();
  428. $terms = array();
  429. $protein = array();
  430. $iprterms = array();
  431. $goterms = array();
  432. $term_count = 0;
  433. $match_count = 0;
  434. // get the properties of this result
  435. $attr = $xmlObj->attributes();
  436. $protein['orf_id'] = (string) $attr["id"];
  437. $protein['orf_length'] = (string) $attr["length"];
  438. $protein['orf_crc64'] = (string) $attr["crc64"];
  439. foreach($xmlObj->children() as $intepro){
  440. // get the interpro term for this match
  441. $attr = $intepro->attributes();
  442. $terms[$term_count]['ipr_id'] = (string) $attr["id"];
  443. $terms[$term_count]['ipr_name'] = (string) $attr["name"];
  444. $terms[$term_count]['ipr_type'] = (string) $attr["type"];
  445. $iprterms[] = array($terms[$term_count]['ipr_id'],$terms[$term_count]['ipr_name']);
  446. // iterate through the elements of the interpro result
  447. $matches[$term_count]['matches'] = array();
  448. $match_count = 0;
  449. foreach($intepro->children() as $level1){
  450. $element_name = $level1->getName();
  451. if($element_name == 'match'){
  452. // get the match name for this match
  453. $attr = $level1->attributes();
  454. $terms[$term_count]['matches'][$match_count]['match_id'] = (string) $attr["id"];
  455. $terms[$term_count]['matches'][$match_count]['match_name'] = (string) $attr["name"];
  456. $terms[$term_count]['matches'][$match_count]['match_dbname'] = (string) $attr["dbname"];
  457. // get the location information for this match
  458. $loc_count = 0;
  459. foreach($level1->children() as $level2){
  460. $element_name = $level2->getName();
  461. if($element_name == 'location'){
  462. $attr = $level2->attributes();
  463. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_start'] = (string) $attr["start"];
  464. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_end'] = (string) $attr["end"];
  465. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_score'] = (string) $attr["score"];
  466. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_status'] = (string) $attr["status"];
  467. $terms[$term_count]['matches'][$match_count]['locations'][$loc_count]['match_evidence'] = (string) $attr["evidence"];
  468. $loc_count++;
  469. }
  470. }
  471. $match_count++;
  472. }
  473. if($element_name == 'classification'){
  474. $attr = $level1->attributes();
  475. if($attr['class_type'] == 'GO'){
  476. $terms[$term_count]['matches'][$match_count]['go_terms'][] = (string) $attr['id'];
  477. $goterms[] = (string) $attr['id'];
  478. }
  479. }
  480. }
  481. $term_count++;
  482. }
  483. $results['terms'] = $terms;
  484. $results['orf'] = $protein;
  485. $results['iprterms'] = $iprterms;
  486. $results['goterms'] = $goterms;
  487. return $results;
  488. }