obo_loader.inc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890
  1. <?php
  2. /**
  3. * @file
  4. * Tripal Ontology Loader
  5. *
  6. * @defgroup tripal_obo_loader Tripal Ontology Loader
  7. * @ingroup tripal_cv
  8. */
  9. /**
  10. *
  11. * @ingroup tripal_obo_loader
  12. */
  13. function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
  14. // get the OBO reference
  15. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  16. $obo = db_fetch_object(db_query($sql, $obo_id));
  17. // if the reference is for a remote URL then run the URL processing function
  18. if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
  19. tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
  20. }
  21. // if the reference is for a local file then run the file processing function
  22. else {
  23. // check to see if the file is located local to Drupal
  24. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  25. if (file_exists($dfile)) {
  26. tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
  27. }
  28. // if not local to Drupal, the file must be someplace else, just use
  29. // the full path provided
  30. else {
  31. if (file_exists($obo->path)) {
  32. tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
  33. }
  34. else {
  35. print "ERROR: counld not find OBO file: '$obo->path'\n";
  36. }
  37. }
  38. }
  39. }
  40. /**
  41. *
  42. * @ingroup tripal_obo_loader
  43. */
  44. function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
  45. $newcvs = array();
  46. tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
  47. if ($is_new) {
  48. tripal_cv_load_obo_add_ref($obo_name, $file);
  49. }
  50. // update the cvtermpath table
  51. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  52. print "Ontology Sucessfully loaded!\n";
  53. }
  54. /**
  55. *
  56. * @ingroup tripal_obo_loader
  57. */
  58. function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
  59. $newcvs = array();
  60. // first download the OBO
  61. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  62. print "Downloading URL $url, saving to $temp\n";
  63. $url_fh = fopen($url, "r");
  64. $obo_fh = fopen($temp, "w");
  65. if (!$url_fh) {
  66. tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
  67. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
  68. " specify the location of the file on your server.");
  69. }
  70. while (!feof($url_fh)) {
  71. fwrite($obo_fh, fread($url_fh, 255), 255);
  72. }
  73. fclose($url_fh);
  74. fclose($obo_fh);
  75. // second, parse the OBO
  76. tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
  77. // now remove the temp file
  78. unlink($temp);
  79. if ($is_new) {
  80. tripal_cv_load_obo_add_ref($obo_name, $url);
  81. }
  82. // update the cvtermpath table
  83. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  84. print "Ontology Sucessfully loaded!\n";
  85. }
  86. /**
  87. *
  88. * @ingroup tripal_obo_loader
  89. */
  90. function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
  91. print "\nUpdating cvtermpath table. This may take a while...\n";
  92. foreach ($newcvs as $namespace => $cvid) {
  93. tripal_cv_update_cvtermpath($cvid, $jobid);
  94. }
  95. }
  96. /**
  97. * Add the obo to the tripal_cv_obo table in the Drupal database
  98. */
  99. function tripal_cv_load_obo_add_ref($name, $path) {
  100. $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
  101. db_query($isql, $name, $path);
  102. }
  103. /**
  104. *
  105. * @ingroup tripal_obo_loader
  106. */
  107. function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  108. $header = array();
  109. // make sure our temporary table exists
  110. $ret = array();
  111. if (!db_table_exists('tripal_obo_temp')) {
  112. $schema = tripal_cv_get_custom_tables('tripal_obo_temp');
  113. $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
  114. if (!$success) {
  115. watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR);
  116. return;
  117. }
  118. }
  119. // empty the temp table
  120. $sql = "DELETE FROM tripal_obo_temp";
  121. chado_query($sql);
  122. // get a persistent connection
  123. $connection = tripal_db_persistent_chado();
  124. if (!$connection) {
  125. print "A persistant connection was not obtained. Loading will be slow\n";
  126. }
  127. // if we cannot get a connection then let the user know the loading will be slow
  128. tripal_db_start_transaction();
  129. if ($connection) {
  130. print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
  131. "If the load fails or is terminated prematurely then the entire set of \n" .
  132. "insertions/updates is rolled back and will not be found in the database\n\n";
  133. }
  134. print "Step 1: Preloading File $file\n";
  135. // make sure we have an 'internal' and a '_global' database
  136. if (!tripal_db_add_db('internal')) {
  137. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  138. }
  139. if (!tripal_db_add_db('_global')) {
  140. tripal_cv_obo_quiterror("Cannot add '_global' database");
  141. }
  142. // parse the obo file
  143. $default_db = tripal_cv_obo_parse($file, $header, $jobid);
  144. // add the CV for this ontology to the database
  145. $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
  146. if (!$defaultcv) {
  147. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  148. }
  149. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  150. // add any typedefs to the vocabulary first
  151. $sql = "
  152. SELECT * FROM tripal_obo_temp
  153. WHERE type = 'Typedef'
  154. ";
  155. $typedefs = chado_query($sql);
  156. while ($typedef = db_fetch_object($typedefs)) {
  157. $term = unserialize(base64_decode($typedef->stanza));
  158. tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
  159. }
  160. // next add terms to the vocabulary
  161. print "\nStep 2: Loading terms...\n";
  162. if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) {
  163. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  164. }
  165. // transaction is complete
  166. tripal_db_commit_transaction();
  167. return;
  168. }
  169. /**
  170. *
  171. * @ingroup tripal_obo_loader
  172. */
  173. function tripal_cv_obo_quiterror($message) {
  174. watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);;
  175. exit;
  176. }
  177. /**
  178. *
  179. * @ingroup tripal_obo_loader
  180. */
  181. function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
  182. $i = 0;
  183. // iterate through each term from the OBO file and add it
  184. $sql = "
  185. SELECT * FROM tripal_obo_temp
  186. WHERE type = 'Term'
  187. ORDER BY id
  188. ";
  189. $terms = chado_query($sql);
  190. $count = pg_num_rows($terms);
  191. // calculate the interval for updates
  192. $interval = intval($count * 0.0001);
  193. if ($interval < 1) {
  194. $interval = 1;
  195. }
  196. while($t = db_fetch_object($terms)) {
  197. $term = unserialize(base64_decode($t->stanza));
  198. // update the job status every interval
  199. if ($jobid and $i % $interval == 0) {
  200. $complete = ($i / $count) * 50;
  201. tripal_job_set_progress($jobid + 50, intval($complete));
  202. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
  203. }
  204. // add/update this term
  205. if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) {
  206. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  207. }
  208. $i++;
  209. }
  210. // set the final status
  211. if ($jobid) {
  212. $complete = ($i / $count) * 50;
  213. tripal_job_set_progress($jobid + 50, intval($complete));
  214. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
  215. }
  216. return 1;
  217. }
  218. /**
  219. *
  220. * @ingroup tripal_obo_loader
  221. */
  222. function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
  223. // construct the term array for sending to the tripal_cv_add_cvterm function
  224. // for adding a new cvterm
  225. $t = array();
  226. $t['id'] = $term['id'][0];
  227. $t['name'] = $term['name'][0];
  228. if (array_key_exists('def', $term)) {
  229. $t['def'] = $term['def'][0];
  230. }
  231. if (array_key_exists('subset', $term)) {
  232. $t['subset'] = $term['subset'][0];
  233. }
  234. if (array_key_exists('namespace', $term)) {
  235. $t['namespace'] = $term['namespace'][0];
  236. }
  237. if (array_key_exists('is_obsolete', $term)) {
  238. $t['is_obsolete'] = $term['is_obsolete'][0];
  239. }
  240. // add the cvterm
  241. $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
  242. if (!$cvterm) {
  243. tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
  244. }
  245. if (array_key_exists('namespace', $term)) {
  246. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  247. }
  248. // now handle other properites
  249. if (array_key_exists('is_anonymous', $term)) {
  250. //print "WARNING: unhandled tag: is_anonymous\n";
  251. }
  252. if (array_key_exists('alt_id', $term)) {
  253. foreach ($term['alt_id'] as $alt_id) {
  254. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
  255. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  256. }
  257. }
  258. }
  259. if (array_key_exists('subset', $term)) {
  260. //print "WARNING: unhandled tag: subset\n";
  261. }
  262. // add synonyms for this cvterm
  263. if (array_key_exists('synonym', $term)) {
  264. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  265. tripal_cv_obo_quiterror("Cannot add synonyms");
  266. }
  267. }
  268. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  269. // types to be of the v1.2 standard
  270. if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
  271. if (array_key_exists('exact_synonym', $term)) {
  272. foreach ($term['exact_synonym'] as $synonym) {
  273. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  274. $term['synonym'][] = $new;
  275. }
  276. }
  277. if (array_key_exists('narrow_synonym', $term)) {
  278. foreach ($term['narrow_synonym'] as $synonym) {
  279. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  280. $term['synonym'][] = $new;
  281. }
  282. }
  283. if (array_key_exists('broad_synonym', $term)) {
  284. foreach ($term['broad_synonym'] as $synonym) {
  285. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  286. $term['synonym'][] = $new;
  287. }
  288. }
  289. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  290. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  291. }
  292. }
  293. // add the comment to the cvtermprop table
  294. if (array_key_exists('comment', $term)) {
  295. $comments = $term['comment'];
  296. $j = 0;
  297. foreach ($comments as $comment) {
  298. if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
  299. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  300. }
  301. $j++;
  302. }
  303. }
  304. // add any other external dbxrefs
  305. if (array_key_exists('xref', $term)) {
  306. foreach ($term['xref'] as $xref) {
  307. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  308. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  309. }
  310. }
  311. }
  312. if (array_key_exists('xref_analog', $term)) {
  313. foreach ($term['xref_analog'] as $xref) {
  314. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  315. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  316. }
  317. }
  318. }
  319. if (array_key_exists('xref_unk', $term)) {
  320. foreach ($term['xref_unk'] as $xref) {
  321. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  322. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  323. }
  324. }
  325. }
  326. // add is_a relationships for this cvterm
  327. if (array_key_exists('is_a', $term)) {
  328. foreach ($term['is_a'] as $is_a) {
  329. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
  330. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  331. }
  332. }
  333. }
  334. if (array_key_exists('intersection_of', $term)) {
  335. //print "WARNING: unhandled tag: intersection_of\n";
  336. }
  337. if (array_key_exists('union_of', $term)) {
  338. //print "WARNING: unhandled tag: union_on\n";
  339. }
  340. if (array_key_exists('disjoint_from', $term)) {
  341. //print "WARNING: unhandled tag: disjoint_from\n";
  342. }
  343. if (array_key_exists('relationship', $term)) {
  344. foreach ($term['relationship'] as $value) {
  345. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  346. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  347. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
  348. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  349. }
  350. }
  351. }
  352. if (array_key_exists('replaced_by', $term)) {
  353. //print "WARNING: unhandled tag: replaced_by\n";
  354. }
  355. if (array_key_exists('consider', $term)) {
  356. //print "WARNING: unhandled tag: consider\n";
  357. }
  358. if (array_key_exists('use_term', $term)) {
  359. //print "WARNING: unhandled tag: user_term\n";
  360. }
  361. if (array_key_exists('builtin', $term)) {
  362. //print "WARNING: unhandled tag: builtin\n";
  363. }
  364. return 1;
  365. }
  366. /**
  367. * Add a cvterm relationship
  368. *
  369. * @ingroup tripal_obo_loader
  370. */
  371. function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel,
  372. $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
  373. // make sure the relationship cvterm exists
  374. $term = array(
  375. 'name' => $rel,
  376. 'id' => "$default_db:$rel",
  377. 'definition' => '',
  378. 'is_obsolete' => 0,
  379. );
  380. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, $default_db);
  381. if (!$relcvterm) {
  382. // if the relationship term couldn't be found in the default_db provided
  383. // then do on more check to find it in the relationship ontology
  384. $term = array(
  385. 'name' => $rel,
  386. 'id' => "OBO_REL:$rel",
  387. 'definition' => '',
  388. 'is_obsolete' => 0,
  389. );
  390. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, 'OBO_REL');
  391. if (!$relcvterm) {
  392. tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
  393. }
  394. }
  395. // get the object term
  396. $oterm = tripal_cv_obo_get_term($objname);
  397. if (!$oterm) {
  398. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  399. }
  400. $objterm = array();
  401. $objterm['id'] = $oterm['id'][0];
  402. $objterm['name'] = $oterm['name'][0];
  403. if (array_key_exists('def', $oterm)) {
  404. $objterm['def'] = $oterm['def'][0];
  405. }
  406. if (array_key_exists('subset', $oterm)) {
  407. $objterm['subset'] = $oterm['subset'][0];
  408. }
  409. if (array_key_exists('namespace', $oterm)) {
  410. $objterm['namespace'] = $oterm['namespace'][0];
  411. }
  412. if (array_key_exists('is_obsolete', $oterm)) {
  413. $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
  414. }
  415. $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);
  416. if (!$objcvterm) {
  417. tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
  418. }
  419. // check to see if the cvterm_relationship already exists, if not add it
  420. $values = array(
  421. 'type_id' => $relcvterm->cvterm_id,
  422. 'subject_id' => $cvterm->cvterm_id,
  423. 'object_id' => $objcvterm->cvterm_id
  424. );
  425. $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
  426. $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
  427. if (count($result) == 0) {
  428. $options = array(
  429. 'statement_name' => 'ins_cvtermrelationship_tysuob',
  430. 'return_record' => FALSE
  431. );
  432. $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
  433. if (!$success) {
  434. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  435. }
  436. }
  437. return TRUE;
  438. }
  439. /**
  440. *
  441. * @ingroup tripal_obo_loader
  442. */
  443. function tripal_cv_obo_get_term($id) {
  444. $values = array('id' => $id);
  445. $options = array('statement_name' => 'sel_tripalobotemp_id');
  446. $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
  447. if (count($result) == 0) {
  448. return FALSE;
  449. }
  450. return unserialize(base64_decode($result[0]->stanza));
  451. }
  452. /**
  453. *
  454. * @ingroup tripal_obo_loader
  455. */
  456. function tripal_cv_obo_add_synonyms($term, $cvterm) {
  457. // make sure we have a 'synonym_type' vocabulary
  458. $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
  459. // now add the synonyms
  460. if (array_key_exists('synonym', $term)) {
  461. foreach ($term['synonym'] as $synonym) {
  462. // separate out the synonym definition and the synonym type
  463. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  464. // the scope will be 'EXACT', etc...
  465. $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  466. if (!$scope) { // if no scope then default to 'exact'
  467. $scope = 'exact';
  468. }
  469. // make sure the synonym type exists in the 'synonym_type' vocabulary
  470. $values = array(
  471. 'name' => $scope,
  472. 'cv_id' => array(
  473. 'name' => 'synonym_type',
  474. ),
  475. );
  476. $options = array('statement_name' => 'sel_cvterm_nacv', 'is_updlicate' => 1);
  477. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  478. // if it doesn't exist then add it
  479. if (!$results) {
  480. // build a 'term' object so we can add the missing term
  481. $term = array(
  482. 'name' => $scope,
  483. 'id' => "internal:$scope",
  484. 'definition' => '',
  485. 'is_obsolete' => 0,
  486. );
  487. $syntype = tripal_cv_add_cvterm($term, $syncv->name, 0, 1);
  488. if (!$syntype) {
  489. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
  490. }
  491. }
  492. else {
  493. $syntype = $results[0];
  494. }
  495. // make sure the synonym doesn't already exists
  496. $values = array(
  497. 'cvterm_id' => $cvterm->cvterm_id,
  498. 'synonym' => $def
  499. );
  500. $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
  501. $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
  502. if (count($results) == 0) {
  503. $values = array(
  504. 'cvterm_id' => $cvterm->cvterm_id,
  505. 'synonym' => $def,
  506. 'type_id' => $syntype->cvterm_id
  507. );
  508. $options = array(
  509. 'statement_name' => 'ins_cvtermsynonym_cvsy',
  510. 'return_record' => FALSE
  511. );
  512. $success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
  513. if (!$success) {
  514. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  515. }
  516. }
  517. // now add the dbxrefs for the synonym if we have a comma in the middle
  518. // of a description then this will cause problems when splitting os lets
  519. // just change it so it won't mess up our splitting and then set it back
  520. // later.
  521. /**
  522. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  523. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  524. foreach ($dbxrefs as $dbxref) {
  525. $dbxref = preg_replace('/,_/',", ",$dbxref);
  526. if ($dbxref) {
  527. tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  528. }
  529. }
  530. */
  531. }
  532. }
  533. return TRUE;
  534. }
  535. /**
  536. * Actually parse the OBO file
  537. *
  538. * @ingroup tripal_obo_loader
  539. */
  540. function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
  541. $in_header = 1;
  542. $stanza = array();
  543. $default_db = '_global';
  544. $line_num = 0;
  545. $num_read = 0;
  546. $intv_read = 0;
  547. $filesize = filesize($obo_file);
  548. $interval = intval($filesize * 0.01);
  549. if ($interval < 1) {
  550. $interval = 1;
  551. }
  552. // iterate through the lines in the OBO file and parse the stanzas
  553. $fh = fopen($obo_file, 'r');
  554. while ($line = fgets($fh)) {
  555. $line_num++;
  556. $size = drupal_strlen($line);
  557. $num_read += $size;
  558. $intv_read += $size;
  559. $line = trim($line);
  560. // update the job status every 1% features
  561. if ($jobid and $intv_read >= $interval) {
  562. $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  563. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  564. tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50));
  565. $intv_read = 0;
  566. }
  567. // remove newlines
  568. $line = rtrim($line);
  569. // remove any special characters that may be hiding
  570. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  571. // skip empty lines
  572. if (strcmp($line, '') == 0) {
  573. continue;
  574. }
  575. //remove comments from end of lines
  576. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  577. // at the first stanza we're out of header
  578. if (preg_match('/^\s*\[/', $line)) {
  579. $in_header = 0;
  580. // store the stanza we just finished reading
  581. if (sizeof($stanza) > 0) {
  582. // add the term to the temp table
  583. $values = array(
  584. 'id' => $stanza['id'][0],
  585. 'stanza' => base64_encode(serialize($stanza)),
  586. 'type' => $type,
  587. );
  588. $options = array('statement_name' => 'ins_tripalobotemp_all');
  589. $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
  590. if (!$success) {
  591. watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  592. exit;
  593. }
  594. }
  595. // get the stanza type: Term, Typedef or Instance
  596. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  597. // start fresh with a new array
  598. $stanza = array();
  599. continue;
  600. }
  601. // break apart the line into the tag and value but ignore any escaped colons
  602. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  603. $pair = explode(":", $line, 2);
  604. $tag = $pair[0];
  605. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  606. // if this is the ID then look for the default DB
  607. $matches = array();
  608. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  609. $default_db = $matches[1];
  610. }
  611. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  612. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  613. if ($in_header) {
  614. if (!array_key_exists($tag, $header)) {
  615. $header[$tag] = array();
  616. }
  617. $header[$tag][] = $value;
  618. }
  619. else {
  620. if (!array_key_exists($tag, $stanza)) {
  621. $stanza[$tag] = array();
  622. }
  623. $stanza[$tag][] = $value;
  624. }
  625. }
  626. // now add the last term in the file
  627. if (sizeof($stanza) > 0) {
  628. $values = array(
  629. 'id' => $stanza['id'][0],
  630. 'stanza' => base64_encode(serialize($stanza)),
  631. 'type' => $type,
  632. );
  633. $options = array('statement_name' => 'ins_tripalobotemp_all');
  634. tripal_core_chado_insert('tripal_obo_temp', $values, $options);
  635. if (!$success) {
  636. watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  637. exit;
  638. }
  639. }
  640. return $default_db;
  641. }
  642. /**
  643. * Add database reference to cvterm
  644. *
  645. * @ingroup tripal_obo_loader
  646. */
  647. function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
  648. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  649. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  650. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  651. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  652. if (!$accession) {
  653. tripal_cv_obo_quiterror();
  654. watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
  655. return FALSE;
  656. }
  657. // if the xref is a database link, handle that specially
  658. if (strcmp($dbname, 'http') == 0) {
  659. $accession = $xref;
  660. $dbname = 'URL';
  661. }
  662. // add the database
  663. $db = tripal_db_add_db($dbname);
  664. if (!$db) {
  665. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  666. }
  667. // now add the dbxref
  668. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
  669. if (!$dbxref) {
  670. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  671. }
  672. // finally add the cvterm_dbxref but first check to make sure it exists
  673. $values = array(
  674. 'cvterm_id' => $cvterm->cvterm_id,
  675. 'dbxref_id' => $dbxref->dbxref_id,
  676. );
  677. $options = array('statement_name' => 'sel_cvtermdbxref_cvdb');
  678. $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options);
  679. if (count($result) == 0) {
  680. $ins_options = array(
  681. 'statement_name' => 'ins_cvtermdbxref_cvdb',
  682. 'return_record' => FALSE
  683. );
  684. $result = tripal_core_chado_insert('cvterm_dbxref', $values, $ins_options);
  685. if (!$result){
  686. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  687. return FALSE;
  688. }
  689. }
  690. return TRUE;
  691. }
  692. /**
  693. * Add property to CVterm
  694. * @ingroup tripal_obo_loader
  695. */
  696. function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
  697. // make sure the 'cvterm_property_type' CV exists
  698. $cv = tripal_cv_add_cv('cvterm_property_type', '');
  699. if (!$cv) {
  700. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  701. }
  702. // get the property type cvterm. If it doesn't exist then we want to add it
  703. $values = array(
  704. 'name' => $property,
  705. 'cv_id' => $cv->cv_id,
  706. );
  707. $options = array('statement_name' => 'sel_cvterm_nacv_na');
  708. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  709. if (count($results) == 0) {
  710. $term = array(
  711. 'name' => $property,
  712. 'id' => "internal:$property",
  713. 'definition' => '',
  714. 'is_obsolete' => 0,
  715. );
  716. $cvproptype = tripal_cv_add_cvterm($term, $cv->name, 0, 0);
  717. if (!$cvproptype) {
  718. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  719. return FALSE;
  720. }
  721. }
  722. else {
  723. $cvproptype = $results[0];
  724. }
  725. // remove any properties that currently exist for this term. We'll reset them
  726. if ($rank == 0) {
  727. $values = array('cvterm_id' => $cvterm->cvterm_id);
  728. $options = array('statement_name' => 'del_cvtermprop_cv');
  729. $success = tripal_core_chado_delete('cvtermprop', $values, $options);
  730. if (!$success) {
  731. tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
  732. return FALSE;
  733. }
  734. }
  735. // now add the property
  736. $values = array(
  737. 'cvterm_id' => $cvterm->cvterm_id,
  738. 'type_id' => $cvproptype->cvterm_id,
  739. 'value' => $value,
  740. 'rank' => $rank,
  741. );
  742. $options = array(
  743. 'statement_name' => 'ins_cvtermprop_cvtyvara',
  744. 'return_record' => FALSE,
  745. );
  746. $result = tripal_core_chado_insert('cvtermprop', $values, $options);
  747. if (!$result) {
  748. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  749. return FALSE;
  750. }
  751. return TRUE;
  752. }
  753. /**
  754. * Add Database Reference
  755. * @ingroup tripal_obo_loader
  756. */
  757. function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
  758. // check to see if the dbxref exists if not, add it
  759. $values = array(
  760. 'db_id' => $db_id,
  761. 'accession' => $accession,
  762. );
  763. $options = array('statement_name' => 'sel_dbxref_idac');
  764. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  765. if (count($result) == 0){
  766. $ins_values = array(
  767. 'db_id' => $db_id,
  768. 'accession' => $accession,
  769. 'version' => $version,
  770. 'description' => $description,
  771. );
  772. $ins_options = array(
  773. 'statement_name' => 'ins_dbxref_idacvede',
  774. 'return_record' => FALSE
  775. );
  776. $result = tripal_core_chado_insert('dbxref', $ins_values, $ins_options);
  777. if (!$result) {
  778. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  779. return FALSE;
  780. }
  781. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  782. }
  783. return $result[0];
  784. }