obo_loader.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. <?php
  2. /**
  3. * @file
  4. * Tripal Ontology Loader
  5. *
  6. * @defgroup tripal_obo_loader Tripal Ontology Loader
  7. * @ingroup tripal_cv
  8. */
  9. /**
  10. *
  11. * @ingroup tripal_obo_loader
  12. */
  13. function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
  14. // get the OBO reference
  15. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  16. $obo = db_fetch_object(db_query($sql, $obo_id));
  17. // if the reference is for a remote URL then run the URL processing function
  18. if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
  19. tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
  20. }
  21. // if the reference is for a local file then run the file processing function
  22. else {
  23. // check to see if the file is located local to Drupal
  24. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  25. if (file_exists($dfile)) {
  26. tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
  27. }
  28. // if not local to Drupal, the file must be someplace else, just use
  29. // the full path provided
  30. else {
  31. if (file_exists($obo->path)) {
  32. tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
  33. }
  34. else {
  35. print "ERROR: counld not find OBO file: '$obo->path'\n";
  36. }
  37. }
  38. }
  39. }
  40. /**
  41. *
  42. * @ingroup tripal_obo_loader
  43. */
  44. function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
  45. $newcvs = array();
  46. tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
  47. if ($is_new) {
  48. tripal_cv_load_obo_add_ref($obo_name, $file);
  49. }
  50. // update the cvtermpath table
  51. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  52. print "Ontology Sucessfully loaded!\n";
  53. }
  54. /**
  55. *
  56. * @ingroup tripal_obo_loader
  57. */
  58. function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
  59. $newcvs = array();
  60. // first download the OBO
  61. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  62. print "Downloading URL $url, saving to $temp\n";
  63. $url_fh = fopen($url, "r");
  64. $obo_fh = fopen($temp, "w");
  65. if (!$url_fh) {
  66. tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
  67. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
  68. " specify the location of the file on your server.");
  69. }
  70. while (!feof($url_fh)) {
  71. fwrite($obo_fh, fread($url_fh, 255), 255);
  72. }
  73. fclose($url_fh);
  74. fclose($obo_fh);
  75. // second, parse the OBO
  76. tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
  77. // now remove the temp file
  78. unlink($temp);
  79. if ($is_new) {
  80. tripal_cv_load_obo_add_ref($obo_name, $url);
  81. }
  82. // update the cvtermpath table
  83. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  84. print "Ontology Sucessfully loaded!\n";
  85. }
  86. /**
  87. *
  88. * @ingroup tripal_obo_loader
  89. */
  90. function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
  91. print "\nUpdating cvtermpath table. This may take a while...\n";
  92. foreach ($newcvs as $namespace => $cvid) {
  93. tripal_cv_update_cvtermpath($cvid, $jobid);
  94. }
  95. }
  96. /**
  97. *
  98. */
  99. function tripal_cv_load_obo_add_ref($name, $path) {
  100. $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
  101. db_query($isql, $name, $path);
  102. }
  103. /**
  104. *
  105. * @ingroup tripal_obo_loader
  106. */
  107. function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  108. global $_tripal_cv_previous_db;
  109. $header = array();
  110. $obo = array();
  111. print "Opening File $file\n";
  112. // set the search path
  113. $_tripal_cv_previous_db = tripal_db_set_active('chado');
  114. // make sure we have an 'internal' and a '_global' database
  115. if (!tripal_cv_obo_add_db('internal')) {
  116. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  117. }
  118. if (!tripal_cv_obo_add_db('_global')) {
  119. tripal_cv_obo_quiterror("Cannot add '_global' database");
  120. }
  121. // parse the obo file
  122. tripal_cv_obo_parse($file, $obo, $header);
  123. // add the CV for this ontology to the database
  124. $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0], '');
  125. if (!$defaultcv) {
  126. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  127. }
  128. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  129. // add any typedefs to the vocabulary first
  130. $typedefs = $obo['Typedef'];
  131. foreach ($typedefs as $typedef) {
  132. tripal_cv_obo_process_term($typedef, $defaultcv, $obo, 1, $newcvs);
  133. }
  134. // next add terms to the vocabulary
  135. $terms = $obo['Term'];
  136. if (!tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid, $newcvs)) {
  137. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  138. }
  139. return tripal_cv_obo_loader_done();
  140. }
  141. /**
  142. *
  143. * @ingroup tripal_obo_loader
  144. */
  145. function tripal_cv_obo_quiterror($message) {
  146. print "ERROR: $message\n";
  147. db_query("set search_path to public");
  148. exit;
  149. }
  150. /**
  151. *
  152. * @ingroup tripal_obo_loader
  153. */
  154. function tripal_cv_obo_loader_done() {
  155. // return the search path to normal
  156. tripal_db_set_active($_tripal_cv_previous_db);
  157. db_query("set search_path to public");
  158. return '';
  159. }
  160. /**
  161. *
  162. * @ingroup tripal_obo_loader
  163. */
  164. function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs) {
  165. $i = 0;
  166. $count = sizeof($terms);
  167. $interval = intval($count * 0.01);
  168. if ($interval > 1) {
  169. $interval = 1;
  170. }
  171. foreach ($terms as $term) {
  172. // update the job status every 1% terms
  173. if ($jobid and $i % $interval == 0) {
  174. tripal_job_set_progress($jobid, intval(($i / $count) * 50)); // we mulitply by 50 because parsing and loacing cvterms
  175. // is only the first half. The other half is updating
  176. } // the cvtermpath table.
  177. if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs)) {
  178. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  179. }
  180. $i++;
  181. }
  182. return 1;
  183. }
  184. /**
  185. *
  186. * @ingroup tripal_obo_loader
  187. */
  188. function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs) {
  189. // add the cvterm
  190. $cvterm = tripal_cv_obo_add_cvterm($term, $defaultcv, $is_relationship, 1);
  191. if (!$cvterm) {
  192. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  193. }
  194. if ($term['namespace'][0]) {
  195. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  196. }
  197. // now handle other properites
  198. if (isset($term['is_anonymous'])) {
  199. //print "WARNING: unhandled tag: is_anonymous\n";
  200. }
  201. if (isset($term['alt_id'])) {
  202. foreach ($term['alt_id'] as $alt_id) {
  203. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
  204. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  205. }
  206. }
  207. }
  208. if (isset($term['subset'])) {
  209. //print "WARNING: unhandled tag: subset\n";
  210. }
  211. // add synonyms for this cvterm
  212. if (isset($term['synonym'])) {
  213. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  214. tripal_cv_obo_quiterror("Cannot add synonyms");
  215. }
  216. }
  217. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  218. // types to be of the v1.2 standard
  219. if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
  220. if (isset($term['exact_synonym'])) {
  221. foreach ($term['exact_synonym'] as $synonym) {
  222. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  223. $term['synonym'][] = $new;
  224. }
  225. }
  226. if (isset($term['narrow_synonym'])) {
  227. foreach ($term['narrow_synonym'] as $synonym) {
  228. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  229. $term['synonym'][] = $new;
  230. }
  231. }
  232. if (isset($term['broad_synonym'])) {
  233. foreach ($term['broad_synonym'] as $synonym) {
  234. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  235. $term['synonym'][] = $new;
  236. }
  237. }
  238. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  239. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  240. }
  241. }
  242. // add the comment to the cvtermprop table
  243. if (isset($term['comment'])) {
  244. $comments = $term['comment'];
  245. $j = 0;
  246. foreach ($comments as $comment) {
  247. if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
  248. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  249. }
  250. $j++;
  251. }
  252. }
  253. // add any other external dbxrefs
  254. if (isset($term['xref'])) {
  255. foreach ($term['xref'] as $xref) {
  256. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  257. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  258. }
  259. }
  260. }
  261. if (isset($term['xref_analog'])) {
  262. foreach ($term['xref_analog'] as $xref) {
  263. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  264. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  265. }
  266. }
  267. }
  268. if (isset($term['xref_unk'])) {
  269. foreach ($term['xref_unk'] as $xref) {
  270. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  271. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  272. }
  273. }
  274. }
  275. // add is_a relationships for this cvterm
  276. if (isset($term['is_a'])) {
  277. foreach ($term['is_a'] as $is_a) {
  278. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship)) {
  279. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  280. }
  281. }
  282. }
  283. if (isset($term['intersection_of'])) {
  284. //print "WARNING: unhandled tag: intersection_of\n";
  285. }
  286. if (isset($term['union_of'])) {
  287. //print "WARNING: unhandled tag: union_on\n";
  288. }
  289. if (isset($term['disjoint_from'])) {
  290. //print "WARNING: unhandled tag: disjoint_from\n";
  291. }
  292. if (isset($term['relationship'])) {
  293. foreach ($term['relationship'] as $value) {
  294. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  295. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  296. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship)) {
  297. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  298. }
  299. }
  300. }
  301. if (isset($term['replaced_by'])) {
  302. //print "WARNING: unhandled tag: replaced_by\n";
  303. }
  304. if (isset($term['consider'])) {
  305. //print "WARNING: unhandled tag: consider\n";
  306. }
  307. if (isset($term['use_term'])) {
  308. //print "WARNING: unhandled tag: user_term\n";
  309. }
  310. if (isset($term['builtin'])) {
  311. //print "WARNING: unhandled tag: builtin\n";
  312. }
  313. return 1;
  314. }
  315. /**
  316. * Add a database record
  317. * @ingroup tripal_obo_loader
  318. */
  319. function tripal_cv_obo_add_db($dbname) {
  320. $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
  321. $db = db_fetch_object(db_query($db_sql, $dbname));
  322. if (!$db) {
  323. if (!db_query("INSERT INTO {db} (name) VALUES ('%s')", $dbname)) {
  324. tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
  325. }
  326. $db = db_fetch_object(db_query($db_sql, $dbname));
  327. }
  328. return $db;
  329. }
  330. /**
  331. * Add a controlled vocab record
  332. * @ingroup tripal_obo_loader
  333. */
  334. function tripal_cv_obo_add_cv($name, $comment) {
  335. // see if the CV (default-namespace) exists already in the database
  336. $vocab = $name;
  337. $remark = $comment;
  338. $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
  339. $cv = db_fetch_object(db_query($cv_sql, $vocab));
  340. // if the CV exists then update it, otherwise insert
  341. if (!$cv) {
  342. $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
  343. if (!db_query($sql, $vocab, $remark)) {
  344. tripal_cv_obo_quiterror("Failed to create the CV record");
  345. }
  346. $cv = db_fetch_object(db_query($cv_sql, $vocab));
  347. }
  348. else {
  349. $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
  350. if (!db_query($sql, $remark, $vocab)) {
  351. tripal_cv_obo_quiterror("Failed to update the CV record");
  352. }
  353. $cv = db_fetch_object(db_query($cv_sql, $vocab));
  354. }
  355. return $cv;
  356. }
  357. /**
  358. * Add a cvterm relationship
  359. *
  360. * @ingroup tripal_obo_loader
  361. */
  362. function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $objname, $object_is_relationship = 0) {
  363. // make sure the relationship cvterm exists
  364. $term = array(
  365. 'name' => array($rel),
  366. 'id' => array($rel),
  367. 'definition' => array(''),
  368. 'is_obsolete' => array(0),
  369. );
  370. $relcvterm = tripal_cv_obo_add_cvterm($term, $defaultcv, 1, 0);
  371. if (!$relcvterm) {
  372. tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
  373. }
  374. // get the object term
  375. $objterm = tripal_cv_obo_get_term($obo, $objname);
  376. if (!$objterm) {
  377. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  378. }
  379. $objcvterm = tripal_cv_obo_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1);
  380. if (!$objcvterm) {
  381. tripal_cv_obo_quiterror("Cannot add/find cvterm");
  382. }
  383. // check to see if the cvterm_relationship already exists, if not add it
  384. $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
  385. if (!db_fetch_object(db_query($cvrsql, $relcvterm->cvterm_id, $cvterm->cvterm_id, $objcvterm->cvterm_id))) {
  386. $sql = "INSERT INTO {cvterm_relationship} ".
  387. "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
  388. if (!db_query($sql, $relcvterm->cvterm_id, $cvterm->cvterm_id, $objcvterm->cvterm_id)) {
  389. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  390. }
  391. }
  392. return TRUE;
  393. }
  394. /**
  395. *
  396. * @ingroup tripal_obo_loader
  397. */
  398. function tripal_cv_obo_get_term($obo, $id) {
  399. foreach ($obo as $type) {
  400. foreach ($type as $term) {
  401. $accession = $term['id'][0];
  402. if (strcmp($accession, $id)==0) {
  403. return $term;
  404. }
  405. }
  406. }
  407. return FALSE;
  408. }
  409. /**
  410. *
  411. * @ingroup tripal_obo_loader
  412. */
  413. function tripal_cv_obo_add_synonyms($term, $cvterm) {
  414. // make sure we have a 'synonym_type' vocabulary
  415. $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
  416. $syncv = db_fetch_object(db_query($sql));
  417. if (!$syncv) {
  418. $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
  419. if (!db_query($sql)) {
  420. tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
  421. }
  422. $syncv = db_fetch_object(db_query($sql));
  423. }
  424. // now add the synonyms
  425. if (isset($term['synonym'])) {
  426. foreach ($term['synonym'] as $synonym) {
  427. // separate out the synonym definition and the synonym type
  428. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  429. $type = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  430. // make sure the synonym type exists in the 'synonym_type' vocabulary
  431. $cvtsql = "
  432. SELECT *
  433. FROM {cvterm} CVT
  434. INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
  435. WHERE CVT.name = '%s' and CV.name = '%s'
  436. ";
  437. $syntype = db_fetch_object(db_query($cvtsql, $type, 'synonym_type'));
  438. if (!$syntype) {
  439. // build a 'term' object so we can add the missing term
  440. $term = array(
  441. 'name' => array($type),
  442. 'id' => array("internal:$type"),
  443. 'definition' => array(''),
  444. 'is_obsolete' => array(0),
  445. );
  446. $syntype = tripal_cv_obo_add_cvterm($term, $syncv, 0, 1);
  447. if (!$syntype) {
  448. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
  449. }
  450. }
  451. // make sure the synonym doesn't already exists
  452. $sql = "
  453. SELECT *
  454. FROM {cvtermsynonym}
  455. WHERE cvterm_id = %d and synonym = '%s'
  456. ";
  457. $syn = db_fetch_object(db_query($sql, $cvterm->cvterm_id, $def));
  458. if (!$syn) {
  459. $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
  460. VALUES(%d,'%s',%d)";
  461. if (!db_query($sql, $cvterm->cvterm_id, $def, $syntype->cvterm_id)) {
  462. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  463. }
  464. }
  465. // now add the dbxrefs for the synonym if we have a comma in the middle
  466. // of a description then this will cause problems when splitting os lets
  467. // just change it so it won't mess up our splitting and then set it back
  468. // later.
  469. /**
  470. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  471. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  472. foreach ($dbxrefs as $dbxref) {
  473. $dbxref = preg_replace('/,_/',", ",$dbxref);
  474. if ($dbxref) {
  475. tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  476. }
  477. }
  478. */
  479. }
  480. }
  481. return TRUE;
  482. }
  483. /**
  484. * Actually parse the OBO file
  485. *
  486. * @ingroup tripal_obo_loader
  487. */
  488. function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
  489. $i = 0;
  490. $in_header = 1;
  491. $stanza = array();
  492. // iterate through the lines in the OBO file and parse the stanzas
  493. $fh = fopen($obo_file, 'r');
  494. while ($line = fgets($fh)) {
  495. $i++;
  496. // remove newlines
  497. $line = rtrim($line);
  498. // remove any special characters that may be hiding
  499. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  500. // skip empty lines
  501. if (strcmp($line, '') == 0) {
  502. continue;
  503. }
  504. //remove comments from end of lines
  505. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  506. if (preg_match('/^\s*\[/', $line)) { // at the first stanza we're out of header
  507. $in_header = 0;
  508. // load the stanza we just finished reading
  509. if (sizeof($stanza) > 0) {
  510. if (!isset($obo[$type])) {
  511. $obo[$type] = array();
  512. }
  513. if (!isset($obo[$type][$stanza['id'][0]])) {
  514. $obo[$type][$stanza['id'][0]] = $stanza;
  515. }
  516. else {
  517. array_merge($obo[$type][$stanza['id'][0]], $stanza);
  518. }
  519. }
  520. // get the stanza type: Term, Typedef or Instance
  521. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  522. // start fresh with a new array
  523. $stanza = array();
  524. continue;
  525. }
  526. // break apart the line into the tag and value but ignore any escaped colons
  527. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  528. $pair = explode(":", $line, 2);
  529. $tag = $pair[0];
  530. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  531. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  532. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  533. if ($in_header) {
  534. if (!isset($header[$tag])) {
  535. $header[$tag] = array();
  536. }
  537. $header[$tag][] = $value;
  538. }
  539. else {
  540. if (!isset($stanza[$tag])) {
  541. $stanza[$tag] = array();
  542. }
  543. $stanza[$tag][] = $value;
  544. }
  545. }
  546. // now add the last term in the file
  547. if (sizeof($stanza) > 0) {
  548. if (!isset($obo[$type])) {
  549. $obo[$type] = array();
  550. }
  551. if (!isset($obo[$type][$stanza['id'][0]])) {
  552. $obo[$type][$stanza['id'][0]] = $stanza;
  553. }
  554. else {
  555. array_merge($obo[$type][$stanza['id'][0]], $stanza);
  556. }
  557. }
  558. }
  559. /**
  560. * Add database reference to cvterm
  561. *
  562. * @ingroup tripal_obo_loader
  563. */
  564. function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
  565. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  566. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  567. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  568. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  569. if (!$accession) {
  570. tripal_cv_obo_quiterror();
  571. watchdog('tripal_cv', "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
  572. return FALSE;
  573. }
  574. // if the xref is a database link, handle that specially
  575. if (strcmp($dbname, 'http') == 0) {
  576. $accession = $xref;
  577. $dbname = 'URL';
  578. }
  579. // check to see if the database exists
  580. $db = tripal_cv_obo_add_db($dbname);
  581. if (!$db) {
  582. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  583. }
  584. // now add the dbxref
  585. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
  586. if (!$dbxref) {
  587. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  588. }
  589. // finally add the cvterm_dbxref but first check to make sure it exists
  590. $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
  591. if (!db_fetch_object(db_query($sql, $cvterm->cvterm_id, $dbxref->dbxref_id))) {
  592. $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
  593. "VALUES (%d,%d)";
  594. if (!db_query($sql, $cvterm->cvterm_id, $dbxref->dbxref_id)) {
  595. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  596. }
  597. }
  598. return TRUE;
  599. }
  600. /**
  601. * Add property to CVterm
  602. * @ingroup tripal_obo_loader
  603. */
  604. function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
  605. // make sure the 'cvterm_property_type' CV exists
  606. $cv = tripal_cv_obo_add_cv('cvterm_property_type', '');
  607. if (!$cv) {
  608. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  609. }
  610. // get the property type cvterm. If it doesn't exist then we want to add it
  611. $sql = "
  612. SELECT *
  613. FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  614. WHERE CVT.name = '%s' and CV.name = '%s'
  615. ";
  616. $cvproptype = db_fetch_object(db_query($sql, $property, 'cvterm_property_type'));
  617. if (!$cvproptype) {
  618. $term = array(
  619. 'name' => array($property),
  620. 'id' => array("internal:$property"),
  621. 'definition' => array(''),
  622. 'is_obsolete' => array(0),
  623. );
  624. $cvproptype = tripal_cv_obo_add_cvterm($term, $cv, 0, 0);
  625. if (!$cvproptype) {
  626. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  627. }
  628. }
  629. // remove any properties that currently exist for this term. We'll reset them
  630. if ($rank == 0) {
  631. $sql = "DELETE FROM {cvtermprop} WHERE cvterm_id = %d";
  632. db_query($sql, $cvterm->cvterm_id);
  633. }
  634. // now add the property
  635. $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
  636. "VALUES (%d, %d, '%s',%d)";
  637. if (!db_query($sql, $cvterm->cvterm_id, $cvproptype->cvterm_id, $value, $rank)) {
  638. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  639. }
  640. return TRUE;
  641. }
  642. /**
  643. *
  644. * @ingroup tripal_obo_loader
  645. */
  646. function tripal_cv_obo_add_cvterm($term, $defaultcv, $is_relationship = 0, $update = 1) {
  647. // get the term properties
  648. $id = $term['id'][0];
  649. $name = $term['name'][0];
  650. $cvname = $term['namespace'][0];
  651. $definition = preg_replace('/^\"(.*)\"/', '\1', $term['def'][0]);
  652. $is_obsolete = 0;
  653. if (isset($term['is_obsolete'][0]) and strcmp($term['is_obsolete'][0], 'true') == 0) {
  654. $is_obsolete = 1;
  655. }
  656. if (!$cvname) {
  657. $cvname = $defaultcv->name;
  658. }
  659. // make sure the CV name exists
  660. $cv = tripal_cv_obo_add_cv($cvname, '');
  661. if (!$cv) {
  662. tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
  663. }
  664. // this SQL statement will be used a lot to find a cvterm so just set it
  665. // here for easy reference below.
  666. $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
  667. FROM {cvterm} CVT
  668. INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
  669. INNER JOIN {db} DB on DBX.db_id = DB.db_id
  670. INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
  671. WHERE CVT.name = '%s' and DB.name = '%s'";
  672. // get the accession and the database from the cvterm
  673. if (preg_match('/^.+?:.*$/', $id)) {
  674. $accession = preg_replace('/^.+?:(.*)$/', '\1', $id);
  675. $dbname = preg_replace('/^(.+?):.*$/', '\1', $id);
  676. }
  677. if ($is_relationship and !$dbname) {
  678. $accession = $id;
  679. // because this is a relationship cvterm first check to see if it
  680. // exists in the relationship ontology. If it does then return the cvterm.
  681. // If not then set the dbname to _global and we'll add it or find it there
  682. $cvterm = db_fetch_object(db_query($cvtermsql, $name, 'OBO_REL'));
  683. if ($cvterm) {
  684. return $cvterm;
  685. }
  686. else {
  687. // next check if this term is in the _global ontology. If it is then
  688. // return it no matter what the original CV
  689. $dbname = '_global';
  690. $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
  691. if ($cvterm) {
  692. return $cvterm;
  693. }
  694. }
  695. }
  696. if (!$is_relationship and !$dbname) {
  697. tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
  698. }
  699. // check to see if the database exists.
  700. $db = tripal_cv_obo_add_db($dbname);
  701. if (!$db) {
  702. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  703. }
  704. // if the cvterm doesn't exist then add it otherwise just update it
  705. $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
  706. if (!$cvterm) {
  707. // check to see if the dbxref exists if not, add it
  708. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession);
  709. if (!$dbxref) {
  710. tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
  711. }
  712. // check to see if the dbxref already has an entry in the cvterm table
  713. $sql = "SELECT * FROM {cvterm} WHERE dbxref_id = %d";
  714. $check = db_fetch_object(db_query($sql, $dbxref->dbxref_id));
  715. if (!$check) {
  716. // now add the cvterm
  717. $sql = "
  718. INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
  719. is_obsolete, is_relationshiptype)
  720. VALUES (%d,'%s','%s',%d,%d,%d)
  721. ";
  722. if (!db_query($sql, $cv->cv_id, $name, $definition,
  723. $dbxref->dbxref_id, $is_obsolete, $is_relationship)) {
  724. if (!$is_relationship) {
  725. tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
  726. }
  727. else {
  728. tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  729. }
  730. }
  731. if (!$is_relationship) {
  732. print "Added CV term: $name ($dbname)\n";
  733. }
  734. else {
  735. print "Added relationship CV term: $name ($dbname)\n";
  736. }
  737. }
  738. elseif ($check and strcmp($check->name, $name)!=0) {
  739. // this dbxref_id alrady exists in the database but the name is
  740. // different. We will trust that the OBO is correct and that there
  741. // has been a name change for this dbxref, so we'll update
  742. $sql = "
  743. UPDATE {cvterm} SET name = '%s', definition = '%s',
  744. is_obsolete = %d, is_relationshiptype = %d
  745. WHERE dbxref_id = %d
  746. ";
  747. if (!db_query($sql, $name, $definition, $is_obsolete, $is_relationship, $dbxref->dbxref_id)) {
  748. if (!$is_relationship) {
  749. tripal_cv_obo_quiterror("Failed to update the term: $name ($dbname)");
  750. }
  751. else {
  752. tripal_cv_obo_quiterror("Failed to update the relationship term: $name (cv: " . $cvname . " db: $dbname)");
  753. }
  754. }
  755. if (!$is_relationship) {
  756. print "Updated CV term: $name ($dbname)\n";
  757. }
  758. else {
  759. print "Updated relationship CV term: $name ($dbname)\n";
  760. }
  761. }
  762. elseif ($check and strcmp($check->name, $name)==0) {
  763. // this entry already exists. We're good, so do nothing
  764. }
  765. $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
  766. }
  767. elseif ($update) { // update the cvterm
  768. $sql = "
  769. UPDATE {cvterm} SET name='%s', definition='%s',
  770. is_obsolete = %d, is_relationshiptype = %d
  771. WHERE cvterm_id = %d
  772. ";
  773. if (!db_query($sql, $term['name'][0], $definition,
  774. $is_obsolete, $is_relationship, $cvterm->cvterm_id)) {
  775. tripal_cv_obo_quiterror("Failed to update the term: $name");
  776. }
  777. $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
  778. if (!$is_relationship) {
  779. print "Updated CV term: $name ($dbname)\n";
  780. }
  781. else {
  782. print "Updated relationship CV term: $name ($dbname)\n";
  783. }
  784. }
  785. // return the cvterm
  786. return $cvterm;
  787. }
  788. /**
  789. * Add Database Reference
  790. * @ingroup tripal_obo_loader
  791. */
  792. function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
  793. // check to see if the dbxref exists if not, add it
  794. $dbxsql = "SELECT dbxref_id FROM {dbxref} WHERE db_id = %d and accession = '%s'";
  795. $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
  796. if (!$dbxref) {
  797. $sql = "
  798. INSERT INTO {dbxref} (db_id, accession, version, description)
  799. VALUES (%d,'%s','%s','%s')
  800. ";
  801. if (!db_query($sql, $db_id, $accession, $version, $description)) {
  802. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  803. }
  804. print "Added Dbxref accession: $accession\n";
  805. $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
  806. }
  807. return $dbxref;
  808. }