obo_loader.inc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. <?php
  2. /**
  3. * @file
  4. * Tripal Ontology Loader
  5. *
  6. * @defgroup tripal_obo_loader Tripal Ontology Loader
  7. * @ingroup tripal_cv
  8. */
  9. /**
  10. *
  11. * @ingroup tripal_obo_loader
  12. */
  13. function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
  14. // get the OBO reference
  15. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  16. $obo = db_fetch_object(db_query($sql, $obo_id));
  17. // if the reference is for a remote URL then run the URL processing function
  18. if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
  19. tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
  20. }
  21. // if the reference is for a local file then run the file processing function
  22. else {
  23. // check to see if the file is located local to Drupal
  24. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  25. if (file_exists($dfile)) {
  26. tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
  27. }
  28. // if not local to Drupal, the file must be someplace else, just use
  29. // the full path provided
  30. else {
  31. if (file_exists($obo->path)) {
  32. tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
  33. }
  34. else {
  35. print "ERROR: counld not find OBO file: '$obo->path'\n";
  36. }
  37. }
  38. }
  39. }
  40. /**
  41. *
  42. * @ingroup tripal_obo_loader
  43. */
  44. function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
  45. $newcvs = array();
  46. tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
  47. if ($is_new) {
  48. tripal_cv_load_obo_add_ref($obo_name, $file);
  49. }
  50. // update the cvtermpath table
  51. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  52. print "Ontology Sucessfully loaded!\n";
  53. }
  54. /**
  55. *
  56. * @ingroup tripal_obo_loader
  57. */
  58. function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
  59. $newcvs = array();
  60. // first download the OBO
  61. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  62. print "Downloading URL $url, saving to $temp\n";
  63. $url_fh = fopen($url, "r");
  64. $obo_fh = fopen($temp, "w");
  65. if (!$url_fh) {
  66. tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
  67. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
  68. " specify the location of the file on your server.");
  69. }
  70. while (!feof($url_fh)) {
  71. fwrite($obo_fh, fread($url_fh, 255), 255);
  72. }
  73. fclose($url_fh);
  74. fclose($obo_fh);
  75. // second, parse the OBO
  76. tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
  77. // now remove the temp file
  78. unlink($temp);
  79. if ($is_new) {
  80. tripal_cv_load_obo_add_ref($obo_name, $url);
  81. }
  82. // update the cvtermpath table
  83. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  84. print "Ontology Sucessfully loaded!\n";
  85. }
  86. /**
  87. *
  88. * @ingroup tripal_obo_loader
  89. */
  90. function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
  91. print "\nUpdating cvtermpath table. This may take a while...\n";
  92. foreach ($newcvs as $namespace => $cvid) {
  93. tripal_cv_update_cvtermpath($cvid, $jobid);
  94. }
  95. }
  96. /**
  97. * Add the obo to the tripal_cv_obo table in the Drupal database
  98. */
  99. function tripal_cv_load_obo_add_ref($name, $path) {
  100. $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
  101. db_query($isql, $name, $path);
  102. }
  103. /**
  104. *
  105. * @ingroup tripal_obo_loader
  106. */
  107. function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  108. // we need to get a persistent connection. If one exists this function
  109. // will not recreate it, but if not it will create one and store it in
  110. // a Drupal variable for reuse later.
  111. $connection = tripal_db_persistent_chado();
  112. $header = array();
  113. $obo = array();
  114. print "Opening File $file\n";
  115. // make sure we have an 'internal' and a '_global' database
  116. if (!tripal_db_add_db('internal')) {
  117. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  118. }
  119. if (!tripal_db_add_db('_global')) {
  120. tripal_cv_obo_quiterror("Cannot add '_global' database");
  121. }
  122. // parse the obo file
  123. $default_db = tripal_cv_obo_parse($file, $obo, $header);
  124. // add the CV for this ontology to the database
  125. $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
  126. if (!$defaultcv) {
  127. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  128. }
  129. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  130. // add any typedefs to the vocabulary first
  131. $typedefs = $obo['Typedef'];
  132. foreach ($typedefs as $typedef) {
  133. tripal_cv_obo_process_term($typedef, $defaultcv->name, $obo, 1, $newcvs, $default_db);
  134. }
  135. // next add terms to the vocabulary
  136. $terms = $obo['Term'];
  137. if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
  138. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  139. }
  140. return;
  141. }
  142. /**
  143. *
  144. * @ingroup tripal_obo_loader
  145. */
  146. function tripal_cv_obo_quiterror($message) {
  147. watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);;
  148. exit;
  149. }
  150. /**
  151. *
  152. * @ingroup tripal_obo_loader
  153. */
  154. function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
  155. $i = 0;
  156. $count = count($terms);
  157. // if the number of terms is zero then simply return.
  158. // this can happen when an OBO file simply has typedef
  159. // entries and no actual terms.
  160. if($count == 0) {
  161. return 1;
  162. }
  163. // calculate the interval for updates
  164. $interval = intval($count * 0.0001);
  165. if ($interval < 1) {
  166. $interval = 1;
  167. }
  168. // iterate through each term from the OBO file and add it
  169. print "Loading terms...\n";
  170. foreach ($terms as $term) {
  171. // update the job status every 1% terms
  172. if ($jobid and $i % $interval == 0) {
  173. $complete = ($i / $count) * 100;
  174. tripal_job_set_progress($jobid, intval($complete));
  175. printf("%d of %d records. (%0.2f%%) memory: %s bytes\r", $i, $count, $complete, number_format(memory_get_usage()));
  176. }
  177. // add/update this term
  178. if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs, $default_db)) {
  179. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  180. }
  181. $i++;
  182. }
  183. // set the final status
  184. if ($jobid) {
  185. $complete = ($i / $count) * 100;
  186. tripal_job_set_progress($jobid, intval($complete));
  187. printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
  188. }
  189. return 1;
  190. }
  191. /**
  192. *
  193. * @ingroup tripal_obo_loader
  194. */
  195. function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
  196. // construct the term array for sending to the tripal_cv_add_cvterm function
  197. // for adding a new cvterm
  198. $t = array();
  199. $t['id'] = $term['id'][0];
  200. $t['name'] = $term['name'][0];
  201. $t['def'] = $term['def'][0];
  202. if (isset($term['subset'])) {
  203. $t['subset'] = $term['subset'][0];
  204. }
  205. if (isset($term['namespace'])) {
  206. $t['namespace'] = $term['namespace'][0];
  207. }
  208. if (isset($term['is_obsolete'])) {
  209. $t['is_obsolete'] = $term['is_obsolete'][0];
  210. }
  211. // add the cvterm
  212. $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
  213. if (!$cvterm) {
  214. tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
  215. }
  216. if (isset($term['namespace'])) {
  217. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  218. }
  219. // now handle other properites
  220. if (isset($term['is_anonymous'])) {
  221. //print "WARNING: unhandled tag: is_anonymous\n";
  222. }
  223. if (isset($term['alt_id'])) {
  224. foreach ($term['alt_id'] as $alt_id) {
  225. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
  226. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  227. }
  228. }
  229. }
  230. if (isset($term['subset'])) {
  231. //print "WARNING: unhandled tag: subset\n";
  232. }
  233. // add synonyms for this cvterm
  234. if (isset($term['synonym'])) {
  235. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  236. tripal_cv_obo_quiterror("Cannot add synonyms");
  237. }
  238. }
  239. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  240. // types to be of the v1.2 standard
  241. if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
  242. if (isset($term['exact_synonym'])) {
  243. foreach ($term['exact_synonym'] as $synonym) {
  244. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  245. $term['synonym'][] = $new;
  246. }
  247. }
  248. if (isset($term['narrow_synonym'])) {
  249. foreach ($term['narrow_synonym'] as $synonym) {
  250. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  251. $term['synonym'][] = $new;
  252. }
  253. }
  254. if (isset($term['broad_synonym'])) {
  255. foreach ($term['broad_synonym'] as $synonym) {
  256. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  257. $term['synonym'][] = $new;
  258. }
  259. }
  260. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  261. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  262. }
  263. }
  264. // add the comment to the cvtermprop table
  265. if (isset($term['comment'])) {
  266. $comments = $term['comment'];
  267. $j = 0;
  268. foreach ($comments as $comment) {
  269. if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
  270. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  271. }
  272. $j++;
  273. }
  274. }
  275. // add any other external dbxrefs
  276. if (isset($term['xref'])) {
  277. foreach ($term['xref'] as $xref) {
  278. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  279. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  280. }
  281. }
  282. }
  283. if (isset($term['xref_analog'])) {
  284. foreach ($term['xref_analog'] as $xref) {
  285. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  286. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  287. }
  288. }
  289. }
  290. if (isset($term['xref_unk'])) {
  291. foreach ($term['xref_unk'] as $xref) {
  292. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  293. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  294. }
  295. }
  296. }
  297. // add is_a relationships for this cvterm
  298. if (isset($term['is_a'])) {
  299. foreach ($term['is_a'] as $is_a) {
  300. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship, $default_db)) {
  301. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  302. }
  303. }
  304. }
  305. if (isset($term['intersection_of'])) {
  306. //print "WARNING: unhandled tag: intersection_of\n";
  307. }
  308. if (isset($term['union_of'])) {
  309. //print "WARNING: unhandled tag: union_on\n";
  310. }
  311. if (isset($term['disjoint_from'])) {
  312. //print "WARNING: unhandled tag: disjoint_from\n";
  313. }
  314. if (isset($term['relationship'])) {
  315. foreach ($term['relationship'] as $value) {
  316. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  317. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  318. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship, $default_db)) {
  319. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  320. }
  321. }
  322. }
  323. if (isset($term['replaced_by'])) {
  324. //print "WARNING: unhandled tag: replaced_by\n";
  325. }
  326. if (isset($term['consider'])) {
  327. //print "WARNING: unhandled tag: consider\n";
  328. }
  329. if (isset($term['use_term'])) {
  330. //print "WARNING: unhandled tag: user_term\n";
  331. }
  332. if (isset($term['builtin'])) {
  333. //print "WARNING: unhandled tag: builtin\n";
  334. }
  335. return 1;
  336. }
  337. /**
  338. * Add a cvterm relationship
  339. *
  340. * @ingroup tripal_obo_loader
  341. */
  342. function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
  343. $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
  344. // make sure the relationship cvterm exists
  345. $term = array(
  346. 'name' => $rel,
  347. 'id' => "$default_db:$rel",
  348. 'definition' => '',
  349. 'is_obsolete' => 0,
  350. );
  351. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, $default_db);
  352. if (!$relcvterm) {
  353. // if the relationship term couldn't be found in the default_db provided
  354. // then do on more check to find it in the relationship ontology
  355. $term = array(
  356. 'name' => $rel,
  357. 'id' => "OBO_REL:$rel",
  358. 'definition' => '',
  359. 'is_obsolete' => 0,
  360. );
  361. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, 'OBO_REL');
  362. if (!$relcvterm) {
  363. tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
  364. }
  365. }
  366. // get the object term
  367. $oterm = tripal_cv_obo_get_term($obo, $objname);
  368. if (!$oterm) {
  369. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  370. }
  371. $objterm = array();
  372. $objterm['id'] = $oterm['id'][0];
  373. $objterm['name'] = $oterm['name'][0];
  374. $objterm['def'] = $oterm['def'][0];
  375. if (isset($oterm['subset'])) {
  376. $objterm['subset'] = $oterm['subset'][0];
  377. }
  378. if (isset($oterm['namespace'])) {
  379. $objterm['namespace'] = $oterm['namespace'][0];
  380. }
  381. if (isset($oterm['is_obsolete'])) {
  382. $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
  383. }
  384. $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);
  385. if (!$objcvterm) {
  386. tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
  387. }
  388. // check to see if the cvterm_relationship already exists, if not add it
  389. $values = array(
  390. 'type_id' => $relcvterm->cvterm_id,
  391. 'subject_id' => $cvterm->cvterm_id,
  392. 'object_id' => $objcvterm->cvterm_id
  393. );
  394. $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
  395. $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
  396. if (count($result) == 0) {
  397. $options = array(
  398. 'statement_name' => 'ins_cvtermrelationship_tysuob',
  399. 'return_record' => FALSE
  400. );
  401. $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
  402. if (!$success) {
  403. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  404. }
  405. }
  406. return TRUE;
  407. }
  408. /**
  409. *
  410. * @ingroup tripal_obo_loader
  411. */
  412. function tripal_cv_obo_get_term($obo, $id) {
  413. /*
  414. foreach ($obo as $type) {
  415. foreach ($type as $term) {
  416. $accession = $term['id'][0];
  417. if (strcmp($accession, $id)==0) {
  418. return $term;
  419. }
  420. }
  421. } */
  422. // iterate through each of the types in the
  423. // obo file (parsed into memory) and look
  424. // for this term. If found, return it.
  425. foreach ($obo as $type) {
  426. if (array_key_exists($id, $type)) {
  427. return $type[$id];
  428. }
  429. }
  430. return FALSE;
  431. }
  432. /**
  433. *
  434. * @ingroup tripal_obo_loader
  435. */
  436. function tripal_cv_obo_add_synonyms($term, $cvterm) {
  437. // make sure we have a 'synonym_type' vocabulary
  438. $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
  439. // now add the synonyms
  440. if (isset($term['synonym'])) {
  441. foreach ($term['synonym'] as $synonym) {
  442. // separate out the synonym definition and the synonym type
  443. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  444. // the scope will be 'EXACT', etc...
  445. $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  446. if (!$scope) { // if no scope then default to 'exact'
  447. $scope = 'exact';
  448. }
  449. // make sure the synonym type exists in the 'synonym_type' vocabulary
  450. $values = array(
  451. 'name' => $scope,
  452. 'cv_id' => array(
  453. 'name' => 'synonym_type',
  454. ),
  455. );
  456. $options = array('statement_name' => 'sel_cvterm_nacv', 'is_updlicate' => 1);
  457. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  458. // if it doesn't exist then add it
  459. if (!$results) {
  460. // build a 'term' object so we can add the missing term
  461. $term = array(
  462. 'name' => $scope,
  463. 'id' => "internal:$scope",
  464. 'definition' => '',
  465. 'is_obsolete' => 0,
  466. );
  467. $syntype = tripal_cv_add_cvterm($term, $syncv->name, 0, 1);
  468. if (!$syntype) {
  469. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
  470. }
  471. }
  472. else {
  473. $syntype = $results[0];
  474. }
  475. // make sure the synonym doesn't already exists
  476. $values = array(
  477. 'cvterm_id' => $cvterm->cvterm_id,
  478. 'synonym' => $def
  479. );
  480. $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
  481. $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
  482. if (count($results) == 0) {
  483. $values = array(
  484. 'cvterm_id' => $cvterm->cvterm_id,
  485. 'synonym' => $def,
  486. 'type_id' => $syntype->cvterm_id
  487. );
  488. $options = array(
  489. 'statement_name' => 'ins_cvtermsynonym_cvsy',
  490. 'return_record' => FALSE
  491. );
  492. $success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
  493. if (!$success) {
  494. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  495. }
  496. }
  497. // now add the dbxrefs for the synonym if we have a comma in the middle
  498. // of a description then this will cause problems when splitting os lets
  499. // just change it so it won't mess up our splitting and then set it back
  500. // later.
  501. /**
  502. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  503. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  504. foreach ($dbxrefs as $dbxref) {
  505. $dbxref = preg_replace('/,_/',", ",$dbxref);
  506. if ($dbxref) {
  507. tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  508. }
  509. }
  510. */
  511. }
  512. }
  513. return TRUE;
  514. }
  515. /**
  516. * Actually parse the OBO file
  517. *
  518. * @ingroup tripal_obo_loader
  519. */
  520. function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
  521. $i = 0;
  522. $in_header = 1;
  523. $stanza = array();
  524. $default_db = '_global';
  525. // iterate through the lines in the OBO file and parse the stanzas
  526. $fh = fopen($obo_file, 'r');
  527. while ($line = fgets($fh)) {
  528. $i++;
  529. // remove newlines
  530. $line = rtrim($line);
  531. // remove any special characters that may be hiding
  532. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  533. // skip empty lines
  534. if (strcmp($line, '') == 0) {
  535. continue;
  536. }
  537. //remove comments from end of lines
  538. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  539. if (preg_match('/^\s*\[/', $line)) { // at the first stanza we're out of header
  540. $in_header = 0;
  541. // store the stanza we just finished reading
  542. if (sizeof($stanza) > 0) {
  543. if (!isset($obo[$type])) {
  544. $obo[$type] = array();
  545. }
  546. if (!isset($obo[$type][$stanza['id'][0]])) {
  547. $obo[$type][$stanza['id'][0]] = $stanza;
  548. }
  549. else {
  550. array_merge($obo[$type][$stanza['id'][0]], $stanza);
  551. }
  552. }
  553. // get the stanza type: Term, Typedef or Instance
  554. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  555. // start fresh with a new array
  556. $stanza = array();
  557. continue;
  558. }
  559. // break apart the line into the tag and value but ignore any escaped colons
  560. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  561. $pair = explode(":", $line, 2);
  562. $tag = $pair[0];
  563. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  564. // if this is the ID then look for the default DB
  565. $matches = array();
  566. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  567. $default_db = $matches[1];
  568. }
  569. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  570. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  571. if ($in_header) {
  572. if (!isset($header[$tag])) {
  573. $header[$tag] = array();
  574. }
  575. $header[$tag][] = $value;
  576. }
  577. else {
  578. if (!isset($stanza[$tag])) {
  579. $stanza[$tag] = array();
  580. }
  581. $stanza[$tag][] = $value;
  582. }
  583. }
  584. // now add the last term in the file
  585. if (sizeof($stanza) > 0) {
  586. if (!isset($obo[$type])) {
  587. $obo[$type] = array();
  588. }
  589. if (!isset($obo[$type][$stanza['id'][0]])) {
  590. $obo[$type][$stanza['id'][0]] = $stanza;
  591. }
  592. else {
  593. array_merge($obo[$type][$stanza['id'][0]], $stanza);
  594. }
  595. }
  596. return $default_db;
  597. }
  598. /**
  599. * Add database reference to cvterm
  600. *
  601. * @ingroup tripal_obo_loader
  602. */
  603. function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
  604. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  605. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  606. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  607. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  608. if (!$accession) {
  609. tripal_cv_obo_quiterror();
  610. watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
  611. return FALSE;
  612. }
  613. // if the xref is a database link, handle that specially
  614. if (strcmp($dbname, 'http') == 0) {
  615. $accession = $xref;
  616. $dbname = 'URL';
  617. }
  618. // add the database
  619. $db = tripal_db_add_db($dbname);
  620. if (!$db) {
  621. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  622. }
  623. // now add the dbxref
  624. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
  625. if (!$dbxref) {
  626. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  627. }
  628. // finally add the cvterm_dbxref but first check to make sure it exists
  629. $values = array(
  630. 'cvterm_id' => $cvterm->cvterm_id,
  631. 'dbxref_id' => $dbxref->dbxref_id,
  632. );
  633. $options = array('statement_name' => 'sel_cvtermdbxref_cvdb');
  634. $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options);
  635. if (count($result) == 0) {
  636. $ins_options = array(
  637. 'statement_name' => 'ins_cvtermdbxref_cvdb',
  638. 'return_record' => FALSE
  639. );
  640. $result = tripal_core_chado_insert('cvterm_dbxref', $values, $ins_options);
  641. if (!$result){
  642. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  643. return FALSE;
  644. }
  645. }
  646. return TRUE;
  647. }
  648. /**
  649. * Add property to CVterm
  650. * @ingroup tripal_obo_loader
  651. */
  652. function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
  653. // make sure the 'cvterm_property_type' CV exists
  654. $cv = tripal_cv_add_cv('cvterm_property_type', '');
  655. if (!$cv) {
  656. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  657. }
  658. // get the property type cvterm. If it doesn't exist then we want to add it
  659. $values = array(
  660. 'name' => $property,
  661. 'cv_id' => $cv->cv_id,
  662. );
  663. $options = array('statement_name' => 'sel_cvterm_nacv_na');
  664. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  665. if (count($results) == 0) {
  666. $term = array(
  667. 'name' => $property,
  668. 'id' => "internal:$property",
  669. 'definition' => '',
  670. 'is_obsolete' => 0,
  671. );
  672. $cvproptype = tripal_cv_add_cvterm($term, $cv->name, 0, 0);
  673. if (!$cvproptype) {
  674. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  675. return FALSE;
  676. }
  677. }
  678. else {
  679. $cvproptype = $results[0];
  680. }
  681. // remove any properties that currently exist for this term. We'll reset them
  682. if ($rank == 0) {
  683. $values = array('cvterm_id' => $cvterm->cvterm_id);
  684. $options = array('statement_name' => 'del_cvtermprop_cv');
  685. $success = tripal_core_chado_delete('cvtermprop', $values, $options);
  686. if (!$success) {
  687. tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
  688. return FALSE;
  689. }
  690. }
  691. // now add the property
  692. $values = array(
  693. 'cvterm_id' => $cvterm->cvterm_id,
  694. 'type_id' => $cvproptype->cvterm_id,
  695. 'value' => $value,
  696. 'rank' => $rank,
  697. );
  698. $options = array(
  699. 'statement_name' => 'ins_cvtermprop_cvtyvara',
  700. 'return_record' => FALSE,
  701. );
  702. $result = tripal_core_chado_insert('cvtermprop', $values, $options);
  703. if (!$result) {
  704. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  705. return FALSE;
  706. }
  707. return TRUE;
  708. }
  709. /**
  710. * Add Database Reference
  711. * @ingroup tripal_obo_loader
  712. */
  713. function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
  714. // check to see if the dbxref exists if not, add it
  715. $values = array(
  716. 'db_id' => $db_id,
  717. 'accession' => $accession,
  718. );
  719. $options = array('statement_name' => 'sel_dbxref_idac');
  720. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  721. if (count($result) == 0){
  722. $ins_values = array(
  723. 'db_id' => $db_id,
  724. 'accession' => $accession,
  725. 'version' => $version,
  726. 'description' => $description,
  727. );
  728. $ins_options = array(
  729. 'statement_name' => 'ins_dbxref_idacvede',
  730. 'return_record' => FALSE
  731. );
  732. $result = tripal_core_chado_insert('dbxref', $ins_values, $ins_options);
  733. if (!$result) {
  734. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  735. return FALSE;
  736. }
  737. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  738. }
  739. return $result[0];
  740. }