obo_loader.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860
  1. <?php
  2. /**
  3. * @defgroup tripal_obo_loader Tripal Ontology Loader
  4. * @ingroup tripal_cv
  5. */
  6. /**
  7. *
  8. * @ingroup tripal_obo_loader
  9. */
  10. function tripal_cv_load_obo_v1_2_id($obo_id,$jobid = NULL){
  11. // get the OBO reference
  12. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  13. $obo = db_fetch_object(db_query($sql,$obo_id));
  14. // if the reference is for a remote URL then run the URL processing function
  15. if(preg_match("/^http:\/\//",$obo->path) or preg_match("/^ftp:\/\//",$obo->path)){
  16. tripal_cv_load_obo_v1_2_url($obo->name,$obo->path,$jobid,0);
  17. }
  18. // if the reference is for a local file then run the file processing function
  19. else {
  20. // check to see if the file is located local to Drupal
  21. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  22. if(file_exists($dfile)){
  23. tripal_cv_load_obo_v1_2_file($obo->name,$dfile,$jobid,0);
  24. }
  25. // if not local to Drupal, the file must be someplace else, just use
  26. // the full path provided
  27. else{
  28. if(file_exists($obo->path)){
  29. tripal_cv_load_obo_v1_2_file($obo->name,$obo->path,$jobid,0);
  30. } else {
  31. print "ERROR: counld not find OBO file: '$obo->path'\n";
  32. }
  33. }
  34. }
  35. }
  36. /**
  37. *
  38. * @ingroup tripal_obo_loader
  39. */
  40. function tripal_cv_load_obo_v1_2_file($obo_name,$file,$jobid = NULL,$is_new = 1){
  41. $newcvs = array();
  42. tripal_cv_load_obo_v1_2($file,$jobid,$newcvs);
  43. if($is_new){
  44. tripal_cv_load_obo_add_ref($obo_name,$file);
  45. }
  46. // update the cvtermpath table
  47. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  48. print "Ontology Sucessfully loaded!\n";
  49. }
  50. /**
  51. *
  52. * @ingroup tripal_obo_loader
  53. */
  54. function tripal_cv_load_obo_v1_2_url($obo_name,$url,$jobid = NULL,$is_new = 1){
  55. $newcvs = array();
  56. // first download the OBO
  57. $temp = tempnam(sys_get_temp_dir(),'obo_');
  58. print "Downloading URL $url, saving to $temp\n";
  59. $url_fh = fopen($url,"r");
  60. $obo_fh = fopen($temp,"w");
  61. while(!feof($url_fh)){
  62. fwrite($obo_fh,fread($url_fh,255),255);
  63. }
  64. fclose($url_fh);
  65. fclose($obo_fh);
  66. // second, parse the OBO
  67. tripal_cv_load_obo_v1_2($temp,$jobid,$newcvs);
  68. // now remove the temp file
  69. unlink($temp);
  70. if($is_new){
  71. tripal_cv_load_obo_add_ref($obo_name,$url);
  72. }
  73. // update the cvtermpath table
  74. tripal_cv_load_update_cvtermpath($newcvs,$jobid);
  75. print "Ontology Sucessfully loaded!\n";
  76. }
  77. /**
  78. *
  79. * @ingroup tripal_obo_loader
  80. */
  81. function tripal_cv_load_update_cvtermpath($newcvs,$jobid){
  82. print "\nUpdating cvtermpath table. This may take a while...\n";
  83. foreach($newcvs as $namespace => $cvid){
  84. tripal_cv_update_cvtermpath($cvid, $jobid);
  85. }
  86. }
  87. /**
  88. *
  89. */
  90. function tripal_cv_load_obo_add_ref($name,$path){
  91. $isql = "INSERT INTO tripal_cv_obo (name,path) VALUES ('%s','%s')";
  92. db_query($isql,$name,$path);
  93. }
  94. /**
  95. *
  96. * @ingroup tripal_obo_loader
  97. */
  98. function tripal_cv_load_obo_v1_2($file,$jobid = NULL,&$newcvs) {
  99. global $previous_db;
  100. $header = array();
  101. $obo = array();
  102. print "Opening File $file\n";
  103. // set the search path
  104. $previous_db = tripal_db_set_active('chado');
  105. // make sure we have an 'internal' and a '_global' database
  106. if(!tripal_cv_obo_add_db('internal')){
  107. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  108. }
  109. if(!tripal_cv_obo_add_db('_global')){
  110. tripal_cv_obo_quiterror("Cannot add '_global' database");
  111. }
  112. // parse the obo file
  113. tripal_cv_obo_parse($file,$obo,$header);
  114. // add the CV for this ontology to the database
  115. $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0],'');
  116. if(!$defaultcv){
  117. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  118. }
  119. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  120. // add any typedefs to the vocabulary first
  121. $typedefs = $obo['Typedef'];
  122. foreach($typedefs as $typedef){
  123. tripal_cv_obo_process_term($typedef,$defaultcv,$obo,1,$newcvs);
  124. }
  125. // next add terms to the vocabulary
  126. $terms = $obo['Term'];
  127. if(!tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid,$newcvs)){
  128. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  129. }
  130. return tripal_cv_obo_loader_done();
  131. }
  132. /**
  133. *
  134. * @ingroup tripal_obo_loader
  135. */
  136. function tripal_cv_obo_quiterror ($message){
  137. print "ERROR: $message\n";
  138. db_query("set search_path to public");
  139. exit;
  140. }
  141. /**
  142. *
  143. * @ingroup tripal_obo_loader
  144. */
  145. function tripal_cv_obo_loader_done (){
  146. // return the search path to normal
  147. tripal_db_set_active($previous_db);
  148. db_query("set search_path to public");
  149. return '';
  150. }
  151. /**
  152. *
  153. * @ingroup tripal_obo_loader
  154. */
  155. function tripal_cv_obo_process_terms($terms,$defaultcv,$obo,$jobid=null,&$newcvs){
  156. $i = 0;
  157. $count = sizeof($terms);
  158. $interval = intval($count * 0.01);
  159. if($interval > 1){
  160. $interval = 1;
  161. }
  162. foreach ($terms as $term){
  163. // update the job status every 1% terms
  164. if($jobid and $i % $interval == 0){
  165. tripal_job_set_progress($jobid,intval(($i/$count)*50)); // we mulitply by 50 because parsing and loacing cvterms
  166. // is only the first half. The other half is updating
  167. } // the cvtermpath table.
  168. if(!tripal_cv_obo_process_term($term,$defaultcv,$obo,0,$newcvs)){
  169. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  170. }
  171. $i++;
  172. }
  173. return 1;
  174. }
  175. /**
  176. *
  177. * @ingroup tripal_obo_loader
  178. */
  179. function tripal_cv_obo_process_term($term,$defaultcv,$obo,$is_relationship=0,&$newcvs){
  180. // add the cvterm
  181. $cvterm = tripal_cv_obo_add_cvterm($term,$defaultcv,$is_relationship,1);
  182. if(!$cvterm){
  183. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  184. }
  185. if($term['namespace'][0]){
  186. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  187. }
  188. // now handle other properites
  189. if(isset($term['is_anonymous'])){
  190. print "WARNING: unhandled tag: is_anonymous\n";
  191. }
  192. if(isset($term['alt_id'])){
  193. foreach($term['alt_id'] as $alt_id){
  194. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$alt_id)){
  195. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  196. }
  197. }
  198. }
  199. if(isset($term['subset'])){
  200. print "WARNING: unhandled tag: subset\n";
  201. }
  202. // add synonyms for this cvterm
  203. if(isset($term['synonym'])){
  204. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  205. tripal_cv_obo_quiterror("Cannot add synonyms");
  206. }
  207. }
  208. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  209. // types to be of the v1.2 standard
  210. if(isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])){
  211. if(isset($term['exact_synonym'])){
  212. foreach ($term['exact_synonym'] as $synonym){
  213. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 EXACT $2',$synonym);
  214. $term['synonym'][] = $new;
  215. }
  216. }
  217. if(isset($term['narrow_synonym'])){
  218. foreach ($term['narrow_synonym'] as $synonym){
  219. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 NARROW $2',$synonym);
  220. $term['synonym'][] = $new;
  221. }
  222. }
  223. if(isset($term['broad_synonym'])){
  224. foreach ($term['broad_synonym'] as $synonym){
  225. $new = preg_replace('/^\s*(\".+?\")(.*?)$/','$1 BROAD $2',$synonym);
  226. $term['synonym'][] = $new;
  227. }
  228. }
  229. if(!tripal_cv_obo_add_synonyms($term,$cvterm)){
  230. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  231. }
  232. }
  233. // add the comment to the cvtermprop table
  234. if(isset($term['comment'])){
  235. $comments = $term['comment'];
  236. $j = 0;
  237. foreach($comments as $comment){
  238. if(!tripal_cv_obo_add_cvterm_prop($cvterm,'comment',$comment,$j)){
  239. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  240. }
  241. $j++;
  242. }
  243. }
  244. // add any other external dbxrefs
  245. if(isset($term['xref'])){
  246. foreach($term['xref'] as $xref){
  247. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
  248. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  249. }
  250. }
  251. }
  252. if(isset($term['xref_analog'])){
  253. foreach($term['xref_analog'] as $xref){
  254. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
  255. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  256. }
  257. }
  258. }
  259. if(isset($term['xref_unk'])){
  260. foreach($term['xref_unk'] as $xref){
  261. if(!tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref)){
  262. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  263. }
  264. }
  265. }
  266. // add is_a relationships for this cvterm
  267. if(isset($term['is_a'])){
  268. foreach($term['is_a'] as $is_a){
  269. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,'is_a',$is_a,$is_relationship)){
  270. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  271. }
  272. }
  273. }
  274. if(isset($term['intersection_of'])){
  275. print "WARNING: unhandled tag: intersection_of\n";
  276. }
  277. if(isset($term['union_of'])){
  278. print "WARNING: unhandled tag: union_on\n";
  279. }
  280. if(isset($term['disjoint_from'])){
  281. print "WARNING: unhandled tag: disjoint_from\n";
  282. }
  283. if(isset($term['relationship'])){
  284. foreach($term['relationship'] as $value){
  285. $rel = preg_replace('/^(.+?)\s.+?$/','\1',$value);
  286. $object = preg_replace('/^.+?\s(.+?)$/','\1',$value);
  287. if(!tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$object,$is_relationship)){
  288. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  289. }
  290. }
  291. }
  292. if(isset($term['replaced_by'])){
  293. print "WARNING: unhandled tag: replaced_by\n";
  294. }
  295. if(isset($term['consider'])){
  296. print "WARNING: unhandled tag: consider\n";
  297. }
  298. if(isset($term['use_term'])){
  299. print "WARNING: unhandled tag: user_term\n";
  300. }
  301. if(isset($term['builtin'])){
  302. print "WARNING: unhandled tag: builtin\n";
  303. }
  304. return 1;
  305. }
  306. /**
  307. *
  308. * @ingroup tripal_obo_loader
  309. */
  310. function tripal_cv_obo_add_db($dbname){
  311. $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
  312. $db = db_fetch_object(db_query($db_sql,$dbname));
  313. if(!$db){
  314. if(!db_query("INSERT INTO {db} (name) VALUES ('%s')",$dbname)){
  315. tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
  316. }
  317. $db = db_fetch_object(db_query($db_sql,$dbname));
  318. }
  319. return $db;
  320. }
  321. /**
  322. *
  323. * @ingroup tripal_obo_loader
  324. */
  325. function tripal_cv_obo_add_cv($name,$comment){
  326. // see if the CV (default-namespace) exists already in the database
  327. $vocab = $name;
  328. $remark = $comment;
  329. $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
  330. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  331. // if the CV exists then update it, otherwise insert
  332. if(!$cv){
  333. $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
  334. if(!db_query($sql,$vocab,$remark)){
  335. tripal_cv_obo_quiterror("Failed to create the CV record");
  336. }
  337. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  338. } else {
  339. $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
  340. if(!db_query($sql,$remark,$vocab)){
  341. tripal_cv_obo_quiterror("Failed to update the CV record");
  342. }
  343. $cv = db_fetch_object(db_query($cv_sql,$vocab));
  344. }
  345. return $cv;
  346. }
  347. /**
  348. *
  349. * @ingroup tripal_obo_loader
  350. */
  351. function tripal_cv_obo_add_relationship($cvterm,$defaultcv,$obo,$rel,$objname,$object_is_relationship=0){
  352. // make sure the relationship cvterm exists
  353. $term = array(
  354. 'name' => array($rel),
  355. 'id' => array($rel),
  356. 'definition' => array(''),
  357. 'is_obsolete' => array(0),
  358. );
  359. $relcvterm = tripal_cv_obo_add_cvterm($term,$defaultcv,1,0);
  360. if(!$relcvterm){
  361. tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
  362. }
  363. // get the object term
  364. $objterm = tripal_cv_obo_get_term($obo,$objname);
  365. if(!$objterm) {
  366. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  367. }
  368. $objcvterm = tripal_cv_obo_add_cvterm($objterm,$defaultcv,$object_is_relationship,1);
  369. if(!$objcvterm){
  370. tripal_cv_obo_quiterror("Cannot add/find cvterm");
  371. }
  372. // check to see if the cvterm_relationship already exists, if not add it
  373. $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
  374. if(!db_fetch_object(db_query($cvrsql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id))){
  375. $sql = "INSERT INTO {cvterm_relationship} ".
  376. "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
  377. if(!db_query($sql,$relcvterm->cvterm_id,$cvterm->cvterm_id,$objcvterm->cvterm_id)){
  378. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  379. }
  380. }
  381. return 1;
  382. }
  383. /**
  384. *
  385. * @ingroup tripal_obo_loader
  386. */
  387. function tripal_cv_obo_get_term($obo,$id){
  388. foreach ($obo as $type){
  389. foreach ($type as $term){
  390. $accession = $term['id'][0];
  391. if(strcmp($accession,$id)==0){
  392. return $term;
  393. }
  394. }
  395. }
  396. return;
  397. }
  398. /**
  399. *
  400. * @ingroup tripal_obo_loader
  401. */
  402. function tripal_cv_obo_add_synonyms($term,$cvterm){
  403. // make sure we have a 'synonym_type' vocabulary
  404. $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
  405. $syncv = db_fetch_object(db_query($sql));
  406. if(!$syncv){
  407. $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
  408. if(!db_query($sql)){
  409. tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
  410. }
  411. $syncv = db_fetch_object(db_query($sql));
  412. }
  413. // now add the synonyms
  414. if(isset($term['synonym'])){
  415. foreach($term['synonym'] as $synonym){
  416. // separate out the synonym definition and the synonym type
  417. $def = preg_replace('/^\s*"(.*)"\s*.*$/','\1',$synonym);
  418. $type = strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/','\1',$synonym));
  419. // make sure the synonym type exists in the 'synonym_type' vocabulary
  420. $cvtsql = "
  421. SELECT *
  422. FROM {cvterm} CVT
  423. INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
  424. WHERE CVT.name = '%s' and CV.name = '%s'
  425. ";
  426. $syntype = db_fetch_object(db_query($cvtsql,$type,'synonym_type'));
  427. if(!$syntype){
  428. // build a 'term' object so we can add the missing term
  429. $term = array(
  430. 'name' => array($type),
  431. 'id' => array("internal:$type"),
  432. 'definition' => array(''),
  433. 'is_obsolete' => array(0),
  434. );
  435. $syntype = tripal_cv_obo_add_cvterm($term,$syncv,0,1);
  436. if(!$syntype){
  437. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
  438. }
  439. }
  440. // make sure the synonym doesn't already exists
  441. $sql = "
  442. SELECT *
  443. FROM {cvtermsynonym}
  444. WHERE cvterm_id = %d and synonym = '%s'
  445. ";
  446. $syn = db_fetch_object(db_query($sql,$cvterm->cvterm_id,$def));
  447. if(!$syn){
  448. $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
  449. VALUES(%d,'%s',%d)";
  450. if(!db_query($sql,$cvterm->cvterm_id,$def,$syntype->cvterm_id)){
  451. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  452. }
  453. }
  454. // now add the dbxrefs for the synonym if we have a comma in the middle
  455. // of a description then this will cause problems when splitting os lets
  456. // just change it so it won't mess up our splitting and then set it back
  457. // later.
  458. // $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  459. // $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  460. // foreach($dbxrefs as $dbxref){
  461. // $dbxref = preg_replace('/,_/',", ",$dbxref);
  462. // if($dbxref){
  463. // tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  464. // }
  465. // }
  466. }
  467. }
  468. return 1;
  469. }
  470. /**
  471. *
  472. * @ingroup tripal_obo_loader
  473. */
  474. function tripal_cv_obo_parse($obo_file,&$obo,&$header){
  475. $i = 0;
  476. $in_header = 1;
  477. $stanza = array();
  478. // iterate through the lines in the OBO file and parse the stanzas
  479. $fh = fopen($obo_file,'r');
  480. while($line = fgets($fh)) {
  481. $i++;
  482. // remove newlines
  483. $line = rtrim($line);
  484. // remove any special characters that may be hiding
  485. $line = preg_replace('/[^(\x20-\x7F)]*/','', $line);
  486. // skip empty lines
  487. if(strcmp($line,'')==0) { continue; }
  488. //remove comments from end of lines
  489. $line = preg_replace('/^(.*?)\!.*$/','\1',$line); // TODO: if the explamation is escaped
  490. if(preg_match('/^\s*\[/',$line)){ // at the first stanza we're out of header
  491. $in_header = 0;
  492. // load the stanza we just finished reading
  493. if(sizeof($stanza) > 0){
  494. if(!isset($obo[$type])){
  495. $obo[$type] = array();
  496. }
  497. if(!isset($obo[$type][$stanza['id'][0]])){
  498. $obo[$type][$stanza['id'][0]] = $stanza;
  499. } else {
  500. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  501. }
  502. }
  503. // get the stanza type: Term, Typedef or Instance
  504. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/','\1',$line);
  505. // start fresh with a new array
  506. $stanza = array();
  507. continue;
  508. }
  509. // break apart the line into the tag and value but ignore any escaped colons
  510. preg_replace("/\\:/","|-|-|",$line); // temporarily replace escaped colons
  511. $pair = explode(":",$line,2);
  512. $tag = $pair[0];
  513. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  514. $tag = preg_replace("/\|-\|-\|/","\:",$tag); // return the escaped colon
  515. $value = preg_replace("/\|-\|-\|/","\:",$value);
  516. if($in_header){
  517. if(!isset($header[$tag])){
  518. $header[$tag] = array();
  519. }
  520. $header[$tag][] = $value;
  521. } else {
  522. if(!isset($stanza[$tag])){
  523. $stanza[$tag] = array();
  524. }
  525. $stanza[$tag][] = $value;
  526. }
  527. }
  528. // now add the last term in the file
  529. if(sizeof($stanza) > 0){
  530. if(!isset($obo[$type])){
  531. $obo[$type] = array();
  532. }
  533. if(!isset($obo[$type][$stanza['id'][0]])){
  534. $obo[$type][$stanza['id'][0]] = $stanza;
  535. } else {
  536. array_merge($obo[$type][$stanza['id'][0]],$stanza);
  537. }
  538. }
  539. }
  540. /**
  541. *
  542. * @ingroup tripal_obo_loader
  543. */
  544. function tripal_cv_obo_add_cvterm_dbxref($cvterm,$xref){
  545. $dbname = preg_replace('/^(.+?):.*$/','$1',$xref);
  546. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/','$1',$xref);
  547. $description = preg_replace('/^.+?\"(.+?)\".*?$/','$1',$xref);
  548. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/','$1',$xref);
  549. if(!$accession){
  550. tripal_cv_obo_quiterror();
  551. watchdog('tripal_cv',"Cannot add a dbxref without an accession: '$xref'"
  552. ,NULL,WATCHDOG_WARNING);
  553. return 0;
  554. }
  555. // if the xref is a database link, handle that specially
  556. if(strcmp($dbname,'http')==0){
  557. $accession = $xref;
  558. $dbname = 'URL';
  559. }
  560. // check to see if the database exists
  561. $db = tripal_cv_obo_add_db($dbname);
  562. if(!$db){
  563. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  564. }
  565. // now add the dbxref
  566. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession,'',$description);
  567. if(!$dbxref){
  568. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  569. }
  570. // finally add the cvterm_dbxref but first check to make sure it exists
  571. $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
  572. if(!db_fetch_object(db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id))){
  573. $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
  574. "VALUES (%d,%d)";
  575. if(!db_query($sql,$cvterm->cvterm_id,$dbxref->dbxref_id)){
  576. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  577. }
  578. }
  579. return 1;
  580. }
  581. /**
  582. *
  583. * @ingroup tripal_obo_loader
  584. */
  585. function tripal_cv_obo_add_cvterm_prop($cvterm,$property,$value,$rank){
  586. // make sure the 'cvterm_property_type' CV exists
  587. $cv = tripal_cv_obo_add_cv('cvterm_property_type','');
  588. if(!$cv){
  589. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  590. }
  591. // get the property type cvterm. If it doesn't exist then we want to add it
  592. $sql = "
  593. SELECT *
  594. FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  595. WHERE CVT.name = '%s' and CV.name = '%s'
  596. ";
  597. $cvproptype = db_fetch_object(db_query($sql,$property,'cvterm_property_type'));
  598. if(!$cvproptype){
  599. $term = array(
  600. 'name' => array($property),
  601. 'id' => array("internal:$property"),
  602. 'definition' => array(''),
  603. 'is_obsolete' => array(0),
  604. );
  605. $cvproptype = tripal_cv_obo_add_cvterm($term,$cv,0,0);
  606. if(!$cvproptype){
  607. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  608. }
  609. }
  610. // remove any properties that currently exist for this term. We'll reset them
  611. if($rank == 0){
  612. $sql = "DELETE FROM {cvtermprop} WHERE cvterm_id = %d";
  613. db_query($sql,$cvterm->cvterm_id);
  614. }
  615. // now add the property
  616. $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
  617. "VALUES (%d, %d, '%s',%d)";
  618. if(!db_query($sql,$cvterm->cvterm_id,$cvproptype->cvterm_id,$value,$rank)){
  619. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  620. }
  621. return 1;
  622. }
  623. /**
  624. *
  625. * @ingroup tripal_obo_loader
  626. */
  627. function tripal_cv_obo_add_cvterm($term,$defaultcv,$is_relationship = 0,$update = 1){
  628. // get the term properties
  629. $id = $term['id'][0];
  630. $name = $term['name'][0];
  631. $cvname = $term['namespace'][0];
  632. $definition = preg_replace('/^\"(.*)\"/','\1',$term['def'][0]);
  633. $is_obsolete = 0;
  634. if(isset($term['is_obsolete'][0]) and strcmp($term['is_obsolete'][0],'true')==0){
  635. $is_obsolete = 1;
  636. }
  637. if(!$cvname){
  638. $cvname = $defaultcv->name;
  639. }
  640. // make sure the CV name exists
  641. $cv = tripal_cv_obo_add_cv($cvname,'');
  642. if(!$cv){
  643. tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
  644. }
  645. // this SQL statement will be used a lot to find a cvterm so just set it
  646. // here for easy reference below.
  647. $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
  648. FROM {cvterm} CVT
  649. INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
  650. INNER JOIN {db} DB on DBX.db_id = DB.db_id
  651. INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
  652. WHERE CVT.name = '%s' and DB.name = '%s'";
  653. // get the accession and the database from the cvterm
  654. if(preg_match('/^.+?:.*$/',$id)){
  655. $accession = preg_replace('/^.+?:(.*)$/','\1',$id);
  656. $dbname = preg_replace('/^(.+?):.*$/','\1',$id);
  657. }
  658. if($is_relationship and !$dbname){
  659. $accession = $id;
  660. // because this is a relationship cvterm first check to see if it
  661. // exists in the relationship ontology. If it does then return the cvterm.
  662. // If not then set the dbname to _global and we'll add it or find it there
  663. $cvterm = db_fetch_object(db_query($cvtermsql,$name,'OBO_REL'));
  664. if($cvterm){
  665. return $cvterm;
  666. } else {
  667. // next check if this term is in the _global ontology. If it is then
  668. // return it no matter what the original CV
  669. $dbname = '_global';
  670. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  671. if($cvterm){
  672. return $cvterm;
  673. }
  674. }
  675. }
  676. if(!$is_relationship and !$dbname){
  677. tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
  678. }
  679. // check to see if the database exists.
  680. $db = tripal_cv_obo_add_db($dbname);
  681. if(!$db){
  682. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  683. }
  684. // if the cvterm doesn't exist then add it otherwise just update it
  685. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  686. if(!$cvterm){
  687. // check to see if the dbxref exists if not, add it
  688. $dbxref = tripal_cv_obo_add_dbxref($db->db_id,$accession);
  689. if(!$dbxref){
  690. tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
  691. }
  692. // check to see if the dbxref already has an entry in the cvterm table
  693. $sql = "SELECT * FROM {cvterm} WHERE dbxref_id = %d";
  694. $check = db_fetch_object(db_query($sql,$dbxref->dbxref_id));
  695. if(!$check){
  696. // now add the cvterm
  697. $sql = "
  698. INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
  699. is_obsolete, is_relationshiptype)
  700. VALUES (%d,'%s','%s',%d,%d,%d)
  701. ";
  702. if(!db_query($sql,$cv->cv_id,$name,$definition,
  703. $dbxref->dbxref_id,$is_obsolete,$is_relationship)){
  704. if(!$is_relationship){
  705. tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
  706. } else {
  707. tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  708. }
  709. }
  710. if(!$is_relationship){
  711. print "Added CV term: $name ($dbname)\n";
  712. } else {
  713. print "Added relationship CV term: $name ($dbname)\n";
  714. }
  715. }
  716. elseif($check and strcmp($check->name,$name)!=0){
  717. // this dbxref_id alrady exists in the database but the name is
  718. // different. We will trust that the OBO is correct and that there
  719. // has been a name change for this dbxref, so we'll update
  720. $sql = "
  721. UPDATE {cvterm} SET name = '%s', definition = '%s',
  722. is_obsolete = %d, is_relationshiptype = %d
  723. WHERE dbxref_id = %d
  724. ";
  725. if(!db_query($sql,$name,$definition,$is_obsolete,$is_relationship,$dbxref->dbxref_id)){
  726. if(!$is_relationship){
  727. tripal_cv_obo_quiterror("Failed to update the term: $name ($dbname)");
  728. } else {
  729. tripal_cv_obo_quiterror("Failed to update the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  730. }
  731. }
  732. if(!$is_relationship){
  733. print "Updated CV term: $name ($dbname)\n";
  734. } else {
  735. print "Updated relationship CV term: $name ($dbname)\n";
  736. }
  737. }
  738. elseif($check and strcmp($check->name,$name)==0){
  739. // this entry already exists. We're good, so do nothing
  740. }
  741. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  742. }
  743. elseif($update) { // update the cvterm
  744. $sql = "
  745. UPDATE {cvterm} SET name='%s', definition='%s',
  746. is_obsolete = %d, is_relationshiptype = %d
  747. WHERE cvterm_id = %d
  748. ";
  749. if(!db_query($sql,$term['name'][0],$definition,
  750. $is_obsolete,$is_relationship,$cvterm->cvterm_id)){
  751. tripal_cv_obo_quiterror("Failed to update the term: $name");
  752. }
  753. $cvterm = db_fetch_object(db_query($cvtermsql,$name,$dbname));
  754. if(!$is_relationship){
  755. print "Updated CV term: $name ($dbname)\n";
  756. } else {
  757. print "Updated relationship CV term: $name ($dbname)\n";
  758. }
  759. }
  760. // return the cvterm
  761. return $cvterm;
  762. }
  763. /**
  764. *
  765. * @ingroup tripal_obo_loader
  766. */
  767. function tripal_cv_obo_add_dbxref($db_id,$accession,$version='',$description=''){
  768. // check to see if the dbxref exists if not, add it
  769. $dbxsql = "SELECT dbxref_id FROM {dbxref} WHERE db_id = %d and accession = '%s'";
  770. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  771. if(!$dbxref){
  772. $sql = "
  773. INSERT INTO {dbxref} (db_id, accession, version, description)
  774. VALUES (%d,'%s','%s','%s')
  775. ";
  776. if(!db_query($sql,$db_id,$accession,$version,$description)){
  777. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  778. }
  779. print "Added Dbxref accession: $accession\n";
  780. $dbxref = db_fetch_object(db_query($dbxsql,$db_id,$accession));
  781. }
  782. return $dbxref;
  783. }