obo_loader.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. <?php
  2. /*************************************************************************
  3. *
  4. */
  5. function tripal_cv_load_obo_v1_2_id($obo_id,$jobid = NULL){
  6. // get the OBO reference
  7. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  8. $obo = db_fetch_object(db_query($sql,$obo_id));
  9. // if the reference is for a remote URL then run the URL processing function
  10. if(preg_match("/^http:\/\/",$obo->path) or preg_match("/^ftp:\/\/",$obo->path)){
  11. tripal_cv_load_obo_v1_2_url($obo->name,$obo->path,$jobid,0);
  12. }
  13. // if the reference is for a local file then run the file processing function
  14. else {
  15. // check to see if the file is located local to Drupal
  16. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  17. if(file_exists($dfile)){
  18. tripal_cv_load_obo_v1_2_file($obo->name,$dfile,$jobid,0);
  19. }
  20. // if not local to Drupal, the file must be someplace else, just use
  21. // the full path provided
  22. else{
  23. tripal_cv_load_obo_v1_2_file($obo->name,$obo->path,$jobid,0);
  24. }
  25. }
  26. }
  27. /*************************************************************************
  28. *
  29. */
  30. function tripal_cv_load_obo_v1_2_file($obo_name,$file,$jobid = NULL,$is_new = 1){
  31. $newcvs = array();
  32. tripal_cv_load_obo_v1_2($file,$jobid,$newcvs);
  33. if($is_new){
  34. tripal_cv_load_obo_add_ref($obo_name,$file);
  35. }
  36. // update the cvtermpath table
  37. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  38. print "Ontology Sucessfully loaded!\n";
  39. }
  40. /*************************************************************************
  41. *
  42. */
  43. function tripal_cv_load_obo_v1_2_url($obo_name,$url,$jobid = NULL,$is_new = 1){
  44. $newcvs = array();
  45. // first download the OBO
  46. $temp = tempnam(sys_get_temp_dir(),'obo_');
  47. print "Opening URL $url\n";
  48. $url_fh = fopen($url,"r");
  49. $obo_fh = fopen($temp,"w");
  50. while(!feof($url_fh)){
  51. fwrite($obo_fh,fread($url_fh,255),255);
  52. }
  53. fclose($url_fh);
  54. fclose($obo_fh);
  55. // second, parse the OBO
  56. tripal_cv_load_obo_v1_2($temp,$jobid,$newcvs);
  57. // now remove the temp file
  58. unlink($temp);
  59. if($is_new){
  60. tripal_cv_load_obo_add_ref($obo_name,$url);
  61. }
  62. // update the cvtermpath table
  63. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  64. print "Ontology Sucessfully loaded!\n";
  65. }
  66. /*************************************************************************
  67. *
  68. */
  69. function tripal_cv_load_update_cvtermpath($newcvs,$jobid){
  70. print "\nUpdating cvtermpath table. This may take a while...\n";
  71. foreach($newcvs as $namespace => $cvid){
  72. tripal_cv_update_cvtermpath($cvid, $jobid);
  73. }
  74. }
  75. /*************************************************************************
  76. *
  77. */
  78. function tripal_cv_load_obo_add_ref($name,$path){
  79. $isql = "INSERT INTO tripal_cv_obo (name,path) VALUES ('%s','%s')";
  80. db_query($isql,$name,$path);
  81. }
  82. /*************************************************************************
  83. *
  84. */
  85. function tripal_cv_load_obo_v1_2($file,$jobid = NULL,&$newcvs) {
  86. global $previous_db;
  87. $header = array();
  88. $obo = array();
  89. print "Opening File $file\n";
  90. // set the search path
  91. $previous_db = tripal_db_set_active('chado');
  92. // make sure we have an 'internal' and a '_global' database
  93. if(!tripal_cv_obo_add_db('internal')){
  94. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  95. }
  96. if(!tripal_cv_obo_add_db('_global')){
  97. tripal_cv_obo_quiterror("Cannot add '_global' database");
  98. }
  99. // parse the obo file
  100. tripal_cv_obo_parse($file,$obo,$header);
  101. // add the CV for this ontology to the database
  102. $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0],'');
  103. if(!$defaultcv){
  104. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  105. }
  106. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  107. // add any typedefs to the vocabulary first
  108. $typedefs = $obo['Typedef'];
  109. foreach($typedefs as $typedef){
  110. tripal_cv_obo_process_term($typedef,$defaultcv,$obo,1,$newcvs);
  111. }
  112. // next add terms to the vocabulary
  113. $terms = $obo['Term'];
  114. if(!tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid,$newcvs)){
  115. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  116. }
  117. return tripal_cv_obo_loader_done();
  118. }
  119. /*************************************************************************
  120. *
  121. */
  122. function tripal_cv_obo_quiterror ($message){
  123. print "ERROR: $message\n";
  124. db_query("set search_path to public");
  125. exit;
  126. }
  127. /*************************************************************************
  128. *
  129. */
  130. function tripal_cv_obo_loader_done (){
  131. // return the search path to normal
  132. tripal_db_set_active($previous_db);
  133. db_query("set search_path to public");
  134. return '';
  135. }
  136. /*************************************************************************
  137. *
  138. */
  139. function tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid=null,&$newcvs){
  140. $i = 0;
  141. $count = sizeof($terms);
  142. $interval = intval($count * 0.01);
  143. foreach ($terms as $term){
  144. // update the job status every 1% terms
  145. if($jobid and $i % $interval == 0){
  146. tripal_job_set_progress($jobid,intval(($i/$count)*50)); // we mulitply by 50 because parsing and loacing cvterms
  147. // is only the first half. The other half is updating
  148. } // the cvtermpath table.
  149. if(!tripal_cv_obo_process_term($term,$defaultcv,$obo,0,$newcvs)){
  150. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  151. }
  152. $i++;
  153. }
  154. return 1;
  155. }
  156. /*************************************************************************
  157. *
  158. */
  159. function tripal_cv_obo_process_term($term,$defaultcv,$obo,$is_relationship=0,&$newcvs){
  160. // add the cvterm
  161. $cvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship,1);
  162. if(!$cvterm){
  163. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  164. }
  165. if($term['namespace'][0]){
  166. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  167. }
  168. // now handle other properites
  169. if(isset($term['is_anonymous'])){
  170. print "WARNING: unhandled tag: is_anonymous\n";
  171. }
  172. if(isset($term['alt_id'])){
  173. foreach($term['alt_id'] as $alt_id){
  174. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$alt_id)){
  175. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  176. }
  177. }
  178. }
  179. if(isset($term['subset'])){
  180. print "WARNING: unhandled tag: subset\n";
  181. }
  182. // add synonyms for this cvterm
  183. if(isset($term['synonym'])){
  184. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  185. tripal_cv_obo_quiterror("Cannot add synonyms");
  186. }
  187. }
  188. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  189. // types to be of the v1.2 standard
  190. if(isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])){
  191. if(isset($term['exact_synonym'])){
  192. foreach ($term['exact_synonym'] as $synonym){
  193. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 EXACT $2',$synonym);
  194. $term['synonym'][] = $new;
  195. }
  196. }
  197. if(isset($term['narrow_synonym'])){
  198. foreach ($term['narrow_synonym'] as $synonym){
  199. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 NARROW $2',$synonym);
  200. $term['synonym'][] = $new;
  201. }
  202. }
  203. if(isset($term['broad_synonym'])){
  204. foreach ($term['broad_synonym'] as $synonym){
  205. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 BROAD $2',$synonym);
  206. $term['synonym'][] = $new;
  207. }
  208. }
  209. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  210. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  211. }
  212. }
  213. // add the comment to the cvtermprop table
  214. if(isset($term['comment'])){
  215. $comments = $term['comment'];
  216. $j = 0;
  217. foreach($comments as $comment){
  218. if(!tripal_cv_obo_add_cvterm_prop($cvterm,'comment',$comment,$j)){
  219. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  220. }
  221. $j++;
  222. }
  223. }
  224. // add any other external dbxrefs
  225. if(isset($term['xref']) or isset($term['xref_analog']) or isset($term['xref_unk'])){
  226. foreach($term['xref'] as $xref){
  227. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
  228. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  229. }
  230. }
  231. }
  232. // add is_a relationships for this cvterm
  233. if(isset($term['is_a'])){
  234. foreach($term['is_a'] as $is_a){
  235. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,'is_a',$is_a,$is_relationship)){
  236. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  237. }
  238. }
  239. }
  240. if(isset($term['intersection_of'])){
  241. print "WARNING: unhandled tag: intersection_of\n";
  242. }
  243. if(isset($term['union_of'])){
  244. print "WARNING: unhandled tag: union_on\n";
  245. }
  246. if(isset($term['disjoint_from'])){
  247. print "WARNING: unhandled tag: disjoint_from\n";
  248. }
  249. if(isset($term['relationship'])){
  250. foreach($term['relationship'] as $value){
  251. $rel = preg_replace('/^(.+?)\s.+?$/','\1',$value);
  252. $object = preg_replace('/^.+?\s(.+?)$/','\1',$value);
  253. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$object,$is_relationship)){
  254. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  255. }
  256. }
  257. }
  258. if(isset($term['replaced_by'])){
  259. print "WARNING: unhandled tag: replaced_by\n";
  260. }
  261. if(isset($term['consider'])){
  262. print "WARNING: unhandled tag: consider\n";
  263. }
  264. if(isset($term['use_term'])){
  265. print "WARNING: unhandled tag: user_term\n";
  266. }
  267. if(isset($term['builtin'])){
  268. print "WARNING: unhandled tag: builtin\n";
  269. }
  270. return 1;
  271. }
  272. /*************************************************************************
  273. *
  274. */
  275. function tripal_cv_obo_add_db($dbname){
  276. $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
  277. $db = db_fetch_object(db_query($db_sql,$dbname));
  278. if(!$db){
  279. if(!db_query("INSERT INTO {db} (name) VALUES ('%s')",$dbname)){
  280. tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
  281. }
  282. $db = db_fetch_object(db_query($db_sql,$dbname));
  283. }
  284. return $db;
  285. }
  286. /*************************************************************************
  287. *
  288. */
  289. function tripal_cv_obo_add_cv($name,$comment){
  290. // see if the CV (default-namespace) exists already in the database
  291. $vocab = $name;
  292. $remark = $comment;
  293. $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
  294. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  295. // if the CV exists then update it, otherwise insert
  296. if(!$cv){
  297. $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
  298. if(!db_query($sql,$vocab,$remark)){
  299. tripal_cv_obo_quiterror("Failed to create the CV record");
  300. }
  301. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  302. } else {
  303. $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
  304. if(!db_query($sql,$remark,$vocab)){
  305. tripal_cv_obo_quiterror("Failed to update the CV record");
  306. }
  307. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  308. }
  309. return $cv;
  310. }
  311. /*************************************************************************
  312. *
  313. */
  314. function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
  315. // make sure the 'cvterm_property_type' CV exists
  316. $cv = tripal_cv_obo_add_cv('cvterm_property_type','');
  317. if(!$cv){
  318. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  319. }
  320. // get the property type cvterm. If it doesn't exist then we want to add it
  321. $sql = "
  322. SELECT *
  323. FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  324. WHERE CVT.name = '%s' and CV.name = '%s'
  325. ";
  326. $cvproptype = db_fetch_object(db_query($sql,$property,'cvterm_property_type'));
  327. if(!$cvproptype){
  328. $term = array(
  329. 'name' => array($property),
  330. 'id' => array("internal:$property"),
  331. 'definition' => array(''),
  332. 'is_obsolete' => array(0),
  333. );
  334. $cvproptype = tripal_cv_obo_add_cv_term($term,$cv,0,0);
  335. if(!$cvproptype){
  336. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  337. }
  338. }
  339. // remove any properties that currently exist for this term. We'll reset them
  340. if($rank == 0){
  341. $sql = "DELETE FROM {cvtermprop} WHERE cvterm_id = %d";
  342. db_query($sql,$cvterm->cvterm_id);
  343. }
  344. // now add the property
  345. $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
  346. "VALUES (%d, %d, '%s',%d)";
  347. if(!db_query($sql,$cvterm->cvterm_id,$cvproptype->cvterm_id,$value,$rank)){
  348. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  349. }
  350. return 1;
  351. }
  352. /*************************************************************************
  353. *
  354. */
  355. function tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$objname,$object_is_relationship=0){
  356. // make sure the relationship cvterm exists
  357. $term = array(
  358. 'name' => array($rel),
  359. 'id' => array($rel),
  360. 'definition' => array(''),
  361. 'is_obsolete' => array(0),
  362. );
  363. $relcvterm = tripal_cv_obo_add_cv_term($term,$defaultcv,1,0);
  364. if(!$relcvterm){
  365. tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
  366. }
  367. // get the object term
  368. $objterm = tripal_cv_obo_get_term($obo,$objname);
  369. if(!$objterm) {
  370. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  371. }
  372. $objcvterm = tripal_cv_obo_add_cv_term($objterm,$defaultcv,$object_is_relationship,1);
  373. if(!$objcvterm){
  374. tripal_cv_obo_quiterror("Cannot add/find cvterm");
  375. }
  376. // check to see if the cvterm_relationship already exists, if not add it
  377. $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
  378. if(!db_fetch_object(db_query($cvrsql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id))){
  379. $sql = "INSERT INTO {cvterm_relationship} ".
  380. "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
  381. if(!db_query($sql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id)){
  382. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  383. }
  384. }
  385. return 1;
  386. }
  387. /*************************************************************************
  388. *
  389. */
  390. function tripal_cv_obo_get_term($obo,$id){
  391. foreach ($obo as $type){
  392. foreach ($type as $term){
  393. $accession = $term['id'][0];
  394. if(strcmp($accession,$id)==0){
  395. return $term;
  396. }
  397. }
  398. }
  399. return;
  400. }
  401. /*************************************************************************
  402. *
  403. */
  404. function tripal_cv_obo_add_synonyms($term,$cvterm){
  405. // make sure we have a 'synonym_type' vocabulary
  406. $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
  407. $syncv = db_fetch_object(db_query($sql));
  408. if(!$syncv){
  409. $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
  410. if(!db_query($sql)){
  411. tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
  412. }
  413. $syncv = db_fetch_object(db_query($sql));
  414. }
  415. // now add the synonyms
  416. if(isset($term['synonym'])){
  417. foreach($term['synonym'] as $synonym){
  418. // separate out the synonym definition and the synonym type
  419. $def = preg_replace('/^\s*"(.*)"\s*.*$/','\1',$synonym);
  420. $type = strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/','\1',$synonym));
  421. // make sure the synonym type exists in the 'synonym_type' vocabulary
  422. $cvtsql = "
  423. SELECT *
  424. FROM {cvterm} CVT
  425. INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
  426. WHERE CVT.name = '%s' and CV.name = '%s'
  427. ";
  428. $syntype = db_fetch_object(db_query($cvtsql,$type,'synonym_type'));
  429. if(!$syntype){
  430. // build a 'term' object so we can add the missing term
  431. $term = array(
  432. 'name' => array($type),
  433. 'id' => array("internal:$type"),
  434. 'definition' => array(''),
  435. 'is_obsolete' => array(0),
  436. );
  437. $syntype = tripal_cv_obo_add_cv_term($term,$syncv,0,1);
  438. if(!$syntype){
  439. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
  440. }
  441. }
  442. // make sure the synonym doesn't already exists
  443. $sql = "
  444. SELECT *
  445. FROM {cvtermsynonym}
  446. WHERE cvterm_id = %d and synonym = '%s' and type_id = %d
  447. ";
  448. $syn = db_fetch_object(db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id));
  449. if(!$syn){
  450. $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
  451. VALUES(%d,'%s',%d)";
  452. if(!db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id)){
  453. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  454. }
  455. }
  456. // now add the dbxrefs for the synonym if we have a comma in the middle
  457. // of a description then this will cause problems when splitting os lets
  458. // just change it so it won't mess up our splitting and then set it back
  459. // later.
  460. // $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  461. // $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  462. // foreach($dbxrefs as $dbxref){
  463. // $dbxref = preg_replace('/,_/',", ",$dbxref);
  464. // if($dbxref){
  465. // tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  466. // }
  467. // }
  468. }
  469. }
  470. return 1;
  471. }
  472. /*************************************************************************
  473. *
  474. */
  475. function tripal_cv_obo_add_cv_term($term,$defaultcv,$is_relationship = 0,$update = 1){
  476. // get the term properties
  477. $id = $term['id'][0];
  478. $name = $term['name'][0];
  479. $cvname = $term['namespace'][0];
  480. $definition = preg_replace('/^\"(.*)\"/','\1',$term['def'][0]);
  481. $is_obsolete = 0;
  482. if(isset($term['is_obsolete'][0]) and strcmp($term['is_obsolete'][0],'true')==0){
  483. $is_obsolete = 1;
  484. }
  485. if(!$cvname){
  486. $cvname = $defaultcv->name;
  487. }
  488. // make sure the CV name exists
  489. $cv = tripal_cv_obo_add_cv($cvname,'');
  490. if(!$cv){
  491. tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
  492. }
  493. // this SQL statement will be used a lot to find a cvterm so just set it
  494. // here for easy reference below.
  495. $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
  496. FROM {cvterm} CVT
  497. INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
  498. INNER JOIN {db} DB on DBX.db_id = DB.db_id
  499. INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
  500. WHERE CVT.name = '%s' and DB.name = '%s'";
  501. // get the accession and the database from the cvterm
  502. if(preg_match('/^.+?:.*$/',$id)){
  503. $accession = preg_replace('/^.+?:(.*)$/','\1',$id);
  504. $dbname = preg_replace('/^(.+?):.*$/','\1',$id);
  505. }
  506. if($is_relationship and !$dbname){
  507. $accession = $id;
  508. // because this is a relationship cvterm first check to see if it
  509. // exists in the relationship ontology. If it does then return the cvterm.
  510. // If not then set the dbname to _global and we'll add it or find it there
  511. $cvterm = db_fetch_object(db_query($cvtermsql,$name,'OBO_REL'));
  512. if($cvterm){
  513. return $cvterm;
  514. } else {
  515. // next check if this term is in the _global ontology. If it is then
  516. // return it no matter what the original CV
  517. $dbname = '_global';
  518. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  519. if($cvterm){
  520. return $cvterm;
  521. }
  522. }
  523. }
  524. if(!$is_relationship and !$dbname){
  525. tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
  526. }
  527. // check to see if the database exists.
  528. $db = tripal_cv_obo_add_db($dbname);
  529. if(!$db){
  530. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  531. }
  532. // if the cvterm doesn't exist then add it otherwise just update it
  533. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  534. if(!$cvterm){
  535. // check to see if the dbxref exists if not, add it
  536. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession);
  537. if(!$dbxref){
  538. tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
  539. }
  540. // now add the cvterm
  541. $sql = "
  542. INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
  543. is_obsolete, is_relationshiptype)
  544. VALUES (%d,'%s','%s',%d,%d,%d)
  545. ";
  546. if(!db_query($sql,$cv->cv_id,$name,$definition,
  547. $dbxref->dbxref_id,$is_obsolete,$is_relationship)){
  548. if(!$is_relationship){
  549. tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
  550. } else {
  551. tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  552. }
  553. }
  554. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  555. if(!$is_relationship){
  556. print "Added CV term: $name ($dbname)\n";
  557. } else {
  558. print "Added relationship CV term: $name ($dbname)\n";
  559. }
  560. }
  561. elseif($update) { // update the cvterm
  562. $sql = "
  563. UPDATE {cvterm} SET name='%s', definition='%s',
  564. is_obsolete = %d, is_relationshiptype = %d
  565. WHERE cvterm_id = %d
  566. ";
  567. if(!db_query($sql,$term['name'][0],$definition,
  568. $is_obsolete,$is_relationship,$cvterm->cvterm_id)){
  569. tripal_cv_obo_quiterror("Failed to update the term: $name");
  570. }
  571. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  572. if(!$is_relationship){
  573. print "Updated CV term: $name ($dbname)\n";
  574. } else {
  575. print "Updated relationship CV term: $name ($dbname)\n";
  576. }
  577. }
  578. // return the cvterm
  579. return $cvterm;
  580. }
  581. /*************************************************************************
  582. *
  583. */
  584. function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
  585. $dbname = preg_replace('/^(.+?):.*$/','$1',$xref);
  586. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/','$1',$xref);
  587. $description = preg_replace('/^.+?\"(.+?)\".*?$/','$1',$xref);
  588. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/','$1',$xref);
  589. if(!$accession){
  590. tripal_cv_obo_quiterror("Cannot add a dbxref without an accession: '$xref'");
  591. }
  592. // if the xref is a database link, handle that specially
  593. if(strcmp($dbname,'http')==0){
  594. $accession = $xref;
  595. $dbname = 'URL';
  596. }
  597. // check to see if the database exists
  598. $db = tripal_cv_obo_add_db($dbname);
  599. if(!$db){
  600. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  601. }
  602. // now add the dbxref
  603. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession,'',$description);
  604. if(!$dbxref){
  605. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  606. }
  607. // finally add the cvterm_dbxref but first check to make sure it exists
  608. $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
  609. if(!db_fetch_object(db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id))){
  610. $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
  611. "VALUES (%d,%d)";
  612. if(!db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id)){
  613. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  614. }
  615. }
  616. return 1;
  617. }
  618. /*************************************************************************
  619. *
  620. */
  621. function tripal_cv_obo_add_dbxref($db_id,$accession,$version='',$description=''){
  622. // check to see if the dbxref exists if not, add it
  623. $dbxsql = "SELECT dbxref_id FROM {dbxref} WHERE db_id = %d and accession = '%s'";
  624. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  625. if(!$dbxref){
  626. $sql = "
  627. INSERT INTO {dbxref} (db_id, accession, version, description)
  628. VALUES (%d,'%s','%s','%s')
  629. ";
  630. if(!db_query($sql,$db_id,$accession,$version,$description)){
  631. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  632. }
  633. print "Added Dbxref accession: $accession\n";
  634. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  635. }
  636. return $dbxref;
  637. }
  638. /*************************************************************************
  639. *
  640. */
  641. function tripal_cv_obo_parse($obo_file,&$obo,&$header){
  642. $i = 0;
  643. $in_header = 1;
  644. $stanza = array();
  645. // iterate through the lines in the OBO file and parse the stanzas
  646. $fh = fopen($obo_file,'r');
  647. while($line = fgets($fh)) {
  648. $i++;
  649. // remove newlines
  650. $line = rtrim($line);
  651. // skip empty lines
  652. if(strcmp($line,'')==0) { continue; }
  653. //remove comments from end of lines
  654. $line = preg_replace('/^(.*?)\!.*$/','\1',$line); // TODO: if the explamation is escaped
  655. if(preg_match('/^\s*\[/',$line)){ // at the first stanza we're out of header
  656. $in_header = 0;
  657. // load the stanza we just finished reading
  658. if(sizeof($stanza) > 0){
  659. if(!isset($obo[$type])){
  660. $obo[$type] = array();
  661. }
  662. if(!isset($obo[$type][$stanza['id'][0]])){
  663. $obo[$type][$stanza['id'][0]] = $stanza;
  664. } else {
  665. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  666. }
  667. }
  668. // get the stanza type: Term, Typedef or Instance
  669. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/','\1',$line);
  670. // start fresh with a new array
  671. $stanza = array();
  672. continue;
  673. }
  674. // break apart the line into the tag and value but ignore any escaped colons
  675. preg_replace("/\\:/","|-|-|",$line); // temporarily replace escaped colons
  676. $pair = explode(":",$line,2);
  677. $tag = $pair[0];
  678. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  679. $tag = preg_replace("/\|-\|-\|/","\:",$tag); // return the escaped colon
  680. $value = preg_replace("/\|-\|-\|/","\:",$value);
  681. if($in_header){
  682. if(!isset($header[$tag])){
  683. $header[$tag] = array();
  684. }
  685. $header[$tag][] = $value;
  686. } else {
  687. if(!isset($stanza[$tag])){
  688. $stanza[$tag] = array();
  689. }
  690. $stanza[$tag][] = $value;
  691. }
  692. }
  693. // now add the last term in the file
  694. if(sizeof($stanza) > 0){
  695. if(!isset($obo[$type])){
  696. $obo[$type] = array();
  697. }
  698. if(!isset($obo[$type][$stanza['id'][0]])){
  699. $obo[$type][$stanza['id'][0]] = $stanza;
  700. } else {
  701. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  702. }
  703. }
  704. }