obo_loader.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. <?php
  2. /**
  3. * @defgroup tripal_obo_loader Tripal Ontology Loader
  4. * @ingroup tripal_cv
  5. */
  6. /**
  7. *
  8. * @ingroup tripal_obo_loader
  9. */
  10. function tripal_cv_load_obo_v1_2_id($obo_id,$jobid = NULL){
  11. // get the OBO reference
  12. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  13. $obo = db_fetch_object(db_query($sql,$obo_id));
  14. // if the reference is for a remote URL then run the URL processing function
  15. if(preg_match("/^http:\/\/",$obo->path) or preg_match("/^ftp:\/\/",$obo->path)){
  16. tripal_cv_load_obo_v1_2_url($obo->name,$obo->path,$jobid,0);
  17. }
  18. // if the reference is for a local file then run the file processing function
  19. else {
  20. // check to see if the file is located local to Drupal
  21. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  22. if(file_exists($dfile)){
  23. tripal_cv_load_obo_v1_2_file($obo->name,$dfile,$jobid,0);
  24. }
  25. // if not local to Drupal, the file must be someplace else, just use
  26. // the full path provided
  27. else{
  28. tripal_cv_load_obo_v1_2_file($obo->name,$obo->path,$jobid,0);
  29. }
  30. }
  31. }
  32. /**
  33. *
  34. * @ingroup tripal_obo_loader
  35. */
  36. function tripal_cv_load_obo_v1_2_file($obo_name,$file,$jobid = NULL,$is_new = 1){
  37. $newcvs = array();
  38. tripal_cv_load_obo_v1_2($file,$jobid,$newcvs);
  39. if($is_new){
  40. tripal_cv_load_obo_add_ref($obo_name,$file);
  41. }
  42. // update the cvtermpath table
  43. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  44. print "Ontology Sucessfully loaded!\n";
  45. }
  46. /**
  47. *
  48. * @ingroup tripal_obo_loader
  49. */
  50. function tripal_cv_load_obo_v1_2_url($obo_name,$url,$jobid = NULL,$is_new = 1){
  51. $newcvs = array();
  52. // first download the OBO
  53. $temp = tempnam(sys_get_temp_dir(),'obo_');
  54. print "Opening URL $url\n";
  55. $url_fh = fopen($url,"r");
  56. $obo_fh = fopen($temp,"w");
  57. while(!feof($url_fh)){
  58. fwrite($obo_fh,fread($url_fh,255),255);
  59. }
  60. fclose($url_fh);
  61. fclose($obo_fh);
  62. // second, parse the OBO
  63. tripal_cv_load_obo_v1_2($temp,$jobid,$newcvs);
  64. // now remove the temp file
  65. unlink($temp);
  66. if($is_new){
  67. tripal_cv_load_obo_add_ref($obo_name,$url);
  68. }
  69. // update the cvtermpath table
  70. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  71. print "Ontology Sucessfully loaded!\n";
  72. }
  73. /**
  74. *
  75. * @ingroup tripal_obo_loader
  76. */
  77. function tripal_cv_load_update_cvtermpath($newcvs,$jobid){
  78. print "\nUpdating cvtermpath table. This may take a while...\n";
  79. foreach($newcvs as $namespace => $cvid){
  80. tripal_cv_update_cvtermpath($cvid, $jobid);
  81. }
  82. }
  83. /**
  84. *
  85. */
  86. function tripal_cv_load_obo_add_ref($name,$path){
  87. $isql = "INSERT INTO tripal_cv_obo (name,path) VALUES ('%s','%s')";
  88. db_query($isql,$name,$path);
  89. }
  90. /**
  91. *
  92. * @ingroup tripal_obo_loader
  93. */
  94. function tripal_cv_load_obo_v1_2($file,$jobid = NULL,&$newcvs) {
  95. global $previous_db;
  96. $header = array();
  97. $obo = array();
  98. print "Opening File $file\n";
  99. // set the search path
  100. $previous_db = tripal_db_set_active('chado');
  101. // make sure we have an 'internal' and a '_global' database
  102. if(!tripal_cv_obo_add_db('internal')){
  103. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  104. }
  105. if(!tripal_cv_obo_add_db('_global')){
  106. tripal_cv_obo_quiterror("Cannot add '_global' database");
  107. }
  108. // parse the obo file
  109. tripal_cv_obo_parse($file,$obo,$header);
  110. // add the CV for this ontology to the database
  111. $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0],'');
  112. if(!$defaultcv){
  113. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  114. }
  115. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  116. // add any typedefs to the vocabulary first
  117. $typedefs = $obo['Typedef'];
  118. foreach($typedefs as $typedef){
  119. tripal_cv_obo_process_term($typedef,$defaultcv,$obo,1,$newcvs);
  120. }
  121. // next add terms to the vocabulary
  122. $terms = $obo['Term'];
  123. if(!tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid,$newcvs)){
  124. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  125. }
  126. return tripal_cv_obo_loader_done();
  127. }
  128. /**
  129. *
  130. * @ingroup tripal_obo_loader
  131. */
  132. function tripal_cv_obo_quiterror ($message){
  133. print "ERROR: $message\n";
  134. db_query("set search_path to public");
  135. exit;
  136. }
  137. /**
  138. *
  139. * @ingroup tripal_obo_loader
  140. */
  141. function tripal_cv_obo_loader_done (){
  142. // return the search path to normal
  143. tripal_db_set_active($previous_db);
  144. db_query("set search_path to public");
  145. return '';
  146. }
  147. /**
  148. *
  149. * @ingroup tripal_obo_loader
  150. */
  151. function tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid=null,&$newcvs){
  152. $i = 0;
  153. $count = sizeof($terms);
  154. $interval = intval($count * 0.01);
  155. foreach ($terms as $term){
  156. // update the job status every 1% terms
  157. if($jobid and $i % $interval == 0){
  158. tripal_job_set_progress($jobid,intval(($i/$count)*50)); // we mulitply by 50 because parsing and loacing cvterms
  159. // is only the first half. The other half is updating
  160. } // the cvtermpath table.
  161. if(!tripal_cv_obo_process_term($term,$defaultcv,$obo,0,$newcvs)){
  162. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  163. }
  164. $i++;
  165. }
  166. return 1;
  167. }
  168. /**
  169. *
  170. * @ingroup tripal_obo_loader
  171. */
  172. function tripal_cv_obo_process_term($term,$defaultcv,$obo,$is_relationship=0,&$newcvs){
  173. // add the cvterm
  174. $cvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship,1);
  175. if(!$cvterm){
  176. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  177. }
  178. if($term['namespace'][0]){
  179. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  180. }
  181. // now handle other properites
  182. if(isset($term['is_anonymous'])){
  183. print "WARNING: unhandled tag: is_anonymous\n";
  184. }
  185. if(isset($term['alt_id'])){
  186. foreach($term['alt_id'] as $alt_id){
  187. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$alt_id)){
  188. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  189. }
  190. }
  191. }
  192. if(isset($term['subset'])){
  193. print "WARNING: unhandled tag: subset\n";
  194. }
  195. // add synonyms for this cvterm
  196. if(isset($term['synonym'])){
  197. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  198. tripal_cv_obo_quiterror("Cannot add synonyms");
  199. }
  200. }
  201. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  202. // types to be of the v1.2 standard
  203. if(isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])){
  204. if(isset($term['exact_synonym'])){
  205. foreach ($term['exact_synonym'] as $synonym){
  206. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 EXACT $2',$synonym);
  207. $term['synonym'][] = $new;
  208. }
  209. }
  210. if(isset($term['narrow_synonym'])){
  211. foreach ($term['narrow_synonym'] as $synonym){
  212. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 NARROW $2',$synonym);
  213. $term['synonym'][] = $new;
  214. }
  215. }
  216. if(isset($term['broad_synonym'])){
  217. foreach ($term['broad_synonym'] as $synonym){
  218. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 BROAD $2',$synonym);
  219. $term['synonym'][] = $new;
  220. }
  221. }
  222. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  223. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  224. }
  225. }
  226. // add the comment to the cvtermprop table
  227. if(isset($term['comment'])){
  228. $comments = $term['comment'];
  229. $j = 0;
  230. foreach($comments as $comment){
  231. if(!tripal_cv_obo_add_cvterm_prop($cvterm,'comment',$comment,$j)){
  232. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  233. }
  234. $j++;
  235. }
  236. }
  237. // add any other external dbxrefs
  238. if(isset($term['xref']) or isset($term['xref_analog']) or isset($term['xref_unk'])){
  239. foreach($term['xref'] as $xref){
  240. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
  241. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  242. }
  243. }
  244. }
  245. // add is_a relationships for this cvterm
  246. if(isset($term['is_a'])){
  247. foreach($term['is_a'] as $is_a){
  248. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,'is_a',$is_a,$is_relationship)){
  249. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  250. }
  251. }
  252. }
  253. if(isset($term['intersection_of'])){
  254. print "WARNING: unhandled tag: intersection_of\n";
  255. }
  256. if(isset($term['union_of'])){
  257. print "WARNING: unhandled tag: union_on\n";
  258. }
  259. if(isset($term['disjoint_from'])){
  260. print "WARNING: unhandled tag: disjoint_from\n";
  261. }
  262. if(isset($term['relationship'])){
  263. foreach($term['relationship'] as $value){
  264. $rel = preg_replace('/^(.+?)\s.+?$/','\1',$value);
  265. $object = preg_replace('/^.+?\s(.+?)$/','\1',$value);
  266. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$object,$is_relationship)){
  267. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  268. }
  269. }
  270. }
  271. if(isset($term['replaced_by'])){
  272. print "WARNING: unhandled tag: replaced_by\n";
  273. }
  274. if(isset($term['consider'])){
  275. print "WARNING: unhandled tag: consider\n";
  276. }
  277. if(isset($term['use_term'])){
  278. print "WARNING: unhandled tag: user_term\n";
  279. }
  280. if(isset($term['builtin'])){
  281. print "WARNING: unhandled tag: builtin\n";
  282. }
  283. return 1;
  284. }
  285. /**
  286. *
  287. * @ingroup tripal_obo_loader
  288. */
  289. function tripal_cv_obo_add_db($dbname){
  290. $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
  291. $db = db_fetch_object(db_query($db_sql,$dbname));
  292. if(!$db){
  293. if(!db_query("INSERT INTO {db} (name) VALUES ('%s')",$dbname)){
  294. tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
  295. }
  296. $db = db_fetch_object(db_query($db_sql,$dbname));
  297. }
  298. return $db;
  299. }
  300. /**
  301. *
  302. * @ingroup tripal_obo_loader
  303. */
  304. function tripal_cv_obo_add_cv($name,$comment){
  305. // see if the CV (default-namespace) exists already in the database
  306. $vocab = $name;
  307. $remark = $comment;
  308. $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
  309. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  310. // if the CV exists then update it, otherwise insert
  311. if(!$cv){
  312. $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
  313. if(!db_query($sql,$vocab,$remark)){
  314. tripal_cv_obo_quiterror("Failed to create the CV record");
  315. }
  316. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  317. } else {
  318. $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
  319. if(!db_query($sql,$remark,$vocab)){
  320. tripal_cv_obo_quiterror("Failed to update the CV record");
  321. }
  322. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  323. }
  324. return $cv;
  325. }
  326. /**
  327. *
  328. * @ingroup tripal_obo_loader
  329. */
  330. function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
  331. // make sure the 'cvterm_property_type' CV exists
  332. $cv = tripal_cv_obo_add_cv('cvterm_property_type','');
  333. if(!$cv){
  334. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  335. }
  336. // get the property type cvterm. If it doesn't exist then we want to add it
  337. $sql = "
  338. SELECT *
  339. FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  340. WHERE CVT.name = '%s' and CV.name = '%s'
  341. ";
  342. $cvproptype = db_fetch_object(db_query($sql,$property,'cvterm_property_type'));
  343. if(!$cvproptype){
  344. $term = array(
  345. 'name' => array($property),
  346. 'id' => array("internal:$property"),
  347. 'definition' => array(''),
  348. 'is_obsolete' => array(0),
  349. );
  350. $cvproptype = tripal_cv_obo_add_cv_term($term,$cv,0,0);
  351. if(!$cvproptype){
  352. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  353. }
  354. }
  355. // remove any properties that currently exist for this term. We'll reset them
  356. if($rank == 0){
  357. $sql = "DELETE FROM {cvtermprop} WHERE cvterm_id = %d";
  358. db_query($sql,$cvterm->cvterm_id);
  359. }
  360. // now add the property
  361. $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
  362. "VALUES (%d, %d, '%s',%d)";
  363. if(!db_query($sql,$cvterm->cvterm_id,$cvproptype->cvterm_id,$value,$rank)){
  364. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  365. }
  366. return 1;
  367. }
  368. /**
  369. *
  370. * @ingroup tripal_obo_loader
  371. */
  372. function tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$objname,$object_is_relationship=0){
  373. // make sure the relationship cvterm exists
  374. $term = array(
  375. 'name' => array($rel),
  376. 'id' => array($rel),
  377. 'definition' => array(''),
  378. 'is_obsolete' => array(0),
  379. );
  380. $relcvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,1,0);
  381. if(!$relcvterm){
  382. tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
  383. }
  384. // get the object term
  385. $objterm = tripal_cv_obo_get_term($obo,$objname);
  386. if(!$objterm) {
  387. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  388. }
  389. $objcvterm = tripal_cv_obo_add_cv_term($objterm,$defaultcv,$object_is_relationship,1);
  390. if(!$objcvterm){
  391. tripal_cv_obo_quiterror("Cannot add/find cvterm");
  392. }
  393. // check to see if the cvterm_relationship already exists, if not add it
  394. $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
  395. if(!db_fetch_object(db_query($cvrsql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id))){
  396. $sql = "INSERT INTO {cvterm_relationship} ".
  397. "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
  398. if(!db_query($sql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id)){
  399. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  400. }
  401. }
  402. return 1;
  403. }
  404. /**
  405. *
  406. * @ingroup tripal_obo_loader
  407. */
  408. function tripal_cv_obo_get_term($obo,$id){
  409. foreach ($obo as $type){
  410. foreach ($type as $term){
  411. $accession = $term['id'][0];
  412. if(strcmp($accession,$id)==0){
  413. return $term;
  414. }
  415. }
  416. }
  417. return;
  418. }
  419. /**
  420. *
  421. * @ingroup tripal_obo_loader
  422. */
  423. function tripal_cv_obo_add_synonyms($term,$cvterm){
  424. // make sure we have a 'synonym_type' vocabulary
  425. $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
  426. $syncv = db_fetch_object(db_query($sql));
  427. if(!$syncv){
  428. $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
  429. if(!db_query($sql)){
  430. tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
  431. }
  432. $syncv = db_fetch_object(db_query($sql));
  433. }
  434. // now add the synonyms
  435. if(isset($term['synonym'])){
  436. foreach($term['synonym'] as $synonym){
  437. // separate out the synonym definition and the synonym type
  438. $def = preg_replace('/^\s*"(.*)"\s*.*$/','\1',$synonym);
  439. $type = strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/','\1',$synonym));
  440. // make sure the synonym type exists in the 'synonym_type' vocabulary
  441. $cvtsql = "
  442. SELECT *
  443. FROM {cvterm} CVT
  444. INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
  445. WHERE CVT.name = '%s' and CV.name = '%s'
  446. ";
  447. $syntype = db_fetch_object(db_query($cvtsql,$type,'synonym_type'));
  448. if(!$syntype){
  449. // build a 'term' object so we can add the missing term
  450. $term = array(
  451. 'name' => array($type),
  452. 'id' => array("internal:$type"),
  453. 'definition' => array(''),
  454. 'is_obsolete' => array(0),
  455. );
  456. $syntype = tripal_cv_obo_add_cv_term($term,$syncv,0,1);
  457. if(!$syntype){
  458. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
  459. }
  460. }
  461. // make sure the synonym doesn't already exists
  462. $sql = "
  463. SELECT *
  464. FROM {cvtermsynonym}
  465. WHERE cvterm_id = %d and synonym = '%s' and type_id = %d
  466. ";
  467. $syn = db_fetch_object(db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id));
  468. if(!$syn){
  469. $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
  470. VALUES(%d,'%s',%d)";
  471. if(!db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id)){
  472. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  473. }
  474. }
  475. // now add the dbxrefs for the synonym if we have a comma in the middle
  476. // of a description then this will cause problems when splitting os lets
  477. // just change it so it won't mess up our splitting and then set it back
  478. // later.
  479. // $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  480. // $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  481. // foreach($dbxrefs as $dbxref){
  482. // $dbxref = preg_replace('/,_/',", ",$dbxref);
  483. // if($dbxref){
  484. // tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  485. // }
  486. // }
  487. }
  488. }
  489. return 1;
  490. }
  491. /**
  492. *
  493. * @ingroup tripal_obo_loader
  494. */
  495. function tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship = 0,$update = 1){
  496. // get the term properties
  497. $id = $term['id'][0];
  498. $name = $term['name'][0];
  499. $cvname = $term['namespace'][0];
  500. $definition = preg_replace('/^\"(.*)\"/','\1',$term['def'][0]);
  501. $is_obsolete = 0;
  502. if(isset($term['is_obsolete'][0]) and strcmp($term['is_obsolete'][0],'true')==0){
  503. $is_obsolete = 1;
  504. }
  505. if(!$cvname){
  506. $cvname = $defaultcv->name;
  507. }
  508. // make sure the CV name exists
  509. $cv = tripal_cv_obo_add_cv($cvname,'');
  510. if(!$cv){
  511. tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
  512. }
  513. // this SQL statement will be used a lot to find a cvterm so just set it
  514. // here for easy reference below.
  515. $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
  516. FROM {cvterm} CVT
  517. INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
  518. INNER JOIN {db} DB on DBX.db_id = DB.db_id
  519. INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
  520. WHERE CVT.name = '%s' and DB.name = '%s'";
  521. // get the accession and the database from the cvterm
  522. if(preg_match('/^.+?:.*$/',$id)){
  523. $accession = preg_replace('/^.+?:(.*)$/','\1',$id);
  524. $dbname = preg_replace('/^(.+?):.*$/','\1',$id);
  525. }
  526. if($is_relationship and !$dbname){
  527. $accession = $id;
  528. // because this is a relationship cvterm first check to see if it
  529. // exists in the relationship ontology. If it does then return the cvterm.
  530. // If not then set the dbname to _global and we'll add it or find it there
  531. $cvterm = db_fetch_object(db_query($cvtermsql,$name,'OBO_REL'));
  532. if($cvterm){
  533. return $cvterm;
  534. } else {
  535. // next check if this term is in the _global ontology. If it is then
  536. // return it no matter what the original CV
  537. $dbname = '_global';
  538. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  539. if($cvterm){
  540. return $cvterm;
  541. }
  542. }
  543. }
  544. if(!$is_relationship and !$dbname){
  545. tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
  546. }
  547. // check to see if the database exists.
  548. $db = tripal_cv_obo_add_db($dbname);
  549. if(!$db){
  550. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  551. }
  552. // if the cvterm doesn't exist then add it otherwise just update it
  553. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  554. if(!$cvterm){
  555. // check to see if the dbxref exists if not, add it
  556. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession);
  557. if(!$dbxref){
  558. tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
  559. }
  560. // now add the cvterm
  561. $sql = "
  562. INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
  563. is_obsolete, is_relationshiptype)
  564. VALUES (%d,'%s','%s',%d,%d,%d)
  565. ";
  566. if(!db_query($sql,$cv->cv_id,$name,$definition,
  567. $dbxref->dbxref_id,$is_obsolete,$is_relationship)){
  568. if(!$is_relationship){
  569. tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
  570. } else {
  571. tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  572. }
  573. }
  574. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  575. if(!$is_relationship){
  576. print "Added CV term: $name ($dbname)\n";
  577. } else {
  578. print "Added relationship CV term: $name ($dbname)\n";
  579. }
  580. }
  581. elseif($update) { // update the cvterm
  582. $sql = "
  583. UPDATE {cvterm} SET name='%s', definition='%s',
  584. is_obsolete = %d, is_relationshiptype = %d
  585. WHERE cvterm_id = %d
  586. ";
  587. if(!db_query($sql,$term['name'][0],$definition,
  588. $is_obsolete,$is_relationship,$cvterm->cvterm_id)){
  589. tripal_cv_obo_quiterror("Failed to update the term: $name");
  590. }
  591. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  592. if(!$is_relationship){
  593. print "Updated CV term: $name ($dbname)\n";
  594. } else {
  595. print "Updated relationship CV term: $name ($dbname)\n";
  596. }
  597. }
  598. // return the cvterm
  599. return $cvterm;
  600. }
  601. /**
  602. *
  603. * @ingroup tripal_obo_loader
  604. */
  605. function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
  606. $dbname = preg_replace('/^(.+?):.*$/','$1',$xref);
  607. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/','$1',$xref);
  608. $description = preg_replace('/^.+?\"(.+?)\".*?$/','$1',$xref);
  609. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/','$1',$xref);
  610. if(!$accession){
  611. tripal_cv_obo_quiterror("Cannot add a dbxref without an accession: '$xref'");
  612. }
  613. // if the xref is a database link, handle that specially
  614. if(strcmp($dbname,'http')==0){
  615. $accession = $xref;
  616. $dbname = 'URL';
  617. }
  618. // check to see if the database exists
  619. $db = tripal_cv_obo_add_db($dbname);
  620. if(!$db){
  621. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  622. }
  623. // now add the dbxref
  624. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession,'',$description);
  625. if(!$dbxref){
  626. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  627. }
  628. // finally add the cvterm_dbxref but first check to make sure it exists
  629. $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
  630. if(!db_fetch_object(db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id))){
  631. $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
  632. "VALUES (%d,%d)";
  633. if(!db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id)){
  634. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  635. }
  636. }
  637. return 1;
  638. }
  639. /**
  640. *
  641. * @ingroup tripal_obo_loader
  642. */
  643. function tripal_cv_obo_add_dbxref($db_id,$accession,$version='',$description=''){
  644. // check to see if the dbxref exists if not, add it
  645. $dbxsql = "SELECT dbxref_id FROM {dbxref} WHERE db_id = %d and accession = '%s'";
  646. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  647. if(!$dbxref){
  648. $sql = "
  649. INSERT INTO {dbxref} (db_id, accession, version, description)
  650. VALUES (%d,'%s','%s','%s')
  651. ";
  652. if(!db_query($sql,$db_id,$accession,$version,$description)){
  653. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  654. }
  655. print "Added Dbxref accession: $accession\n";
  656. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  657. }
  658. return $dbxref;
  659. }
  660. /**
  661. *
  662. * @ingroup tripal_obo_loader
  663. */
  664. function tripal_cv_obo_parse($obo_file,&$obo,&$header){
  665. $i = 0;
  666. $in_header = 1;
  667. $stanza = array();
  668. // iterate through the lines in the OBO file and parse the stanzas
  669. $fh = fopen($obo_file,'r');
  670. while($line = fgets($fh)) {
  671. $i++;
  672. // remove newlines
  673. $line = rtrim($line);
  674. // skip empty lines
  675. if(strcmp($line,'')==0) { continue; }
  676. //remove comments from end of lines
  677. $line = preg_replace('/^(.*?)\!.*$/','\1',$line); // TODO: if the explamation is escaped
  678. if(preg_match('/^\s*\[/',$line)){ // at the first stanza we're out of header
  679. $in_header = 0;
  680. // load the stanza we just finished reading
  681. if(sizeof($stanza) > 0){
  682. if(!isset($obo[$type])){
  683. $obo[$type] = array();
  684. }
  685. if(!isset($obo[$type][$stanza['id'][0]])){
  686. $obo[$type][$stanza['id'][0]] = $stanza;
  687. } else {
  688. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  689. }
  690. }
  691. // get the stanza type: Term, Typedef or Instance
  692. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/','\1',$line);
  693. // start fresh with a new array
  694. $stanza = array();
  695. continue;
  696. }
  697. // break apart the line into the tag and value but ignore any escaped colons
  698. preg_replace("/\\:/","|-|-|",$line); // temporarily replace escaped colons
  699. $pair = explode(":",$line,2);
  700. $tag = $pair[0];
  701. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  702. $tag = preg_replace("/\|-\|-\|/","\:",$tag); // return the escaped colon
  703. $value = preg_replace("/\|-\|-\|/","\:",$value);
  704. if($in_header){
  705. if(!isset($header[$tag])){
  706. $header[$tag] = array();
  707. }
  708. $header[$tag][] = $value;
  709. } else {
  710. if(!isset($stanza[$tag])){
  711. $stanza[$tag] = array();
  712. }
  713. $stanza[$tag][] = $value;
  714. }
  715. }
  716. // now add the last term in the file
  717. if(sizeof($stanza) > 0){
  718. if(!isset($obo[$type])){
  719. $obo[$type] = array();
  720. }
  721. if(!isset($obo[$type][$stanza['id'][0]])){
  722. $obo[$type][$stanza['id'][0]] = $stanza;
  723. } else {
  724. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  725. }
  726. }
  727. }