obo_loader.inc 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122
  1. <?php
  2. /**
  3. * @file
  4. * Tripal Ontology Loader
  5. *
  6. * @defgroup tripal_obo_loader Ontology Loader
  7. * @ingroup tripal_cv
  8. */
  9. /**
  10. * Purpose: Provides the form to load an already existing controlled
  11. * Vocabulary into chado
  12. *
  13. * @ingroup tripal_obo_loader
  14. */
  15. function tripal_cv_obo_form(&$form_state = NULL) {
  16. // get a list of db from chado for user to choose
  17. $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
  18. $results = db_query($sql);
  19. $obos = array();
  20. $obos[] = '';
  21. while ($obo = db_fetch_object($results)) {
  22. // $obos[$obo->obo_id] = "$obo->name | $obo->path";
  23. $obos[$obo->obo_id] = $obo->name;
  24. }
  25. $form['obo_existing'] = array(
  26. '#type' => 'fieldset',
  27. '#title' => t('Use a Saved Ontology OBO Reference')
  28. );
  29. $form['obo_new'] = array(
  30. '#type' => 'fieldset',
  31. '#title' => t('Use a New Ontology OBO Reference')
  32. );
  33. $form['obo_existing']['existing_instructions']= array(
  34. '#value' => t('The Ontology OBO files listed in the drop down below have been automatically added upon
  35. installation of the Tripal CV module or were added from a previous upload. Select
  36. an OBO, then click the submit button to load the vocabulary into the database. If the
  37. vocabularies already exist then the ontology will be updated.'),
  38. '#weight' => -1
  39. );
  40. $form['obo_existing']['obo_id'] = array(
  41. '#title' => t('Ontology OBO File Reference'),
  42. '#type' => 'select',
  43. '#options' => $obos,
  44. '#weight' => 0
  45. );
  46. $form['obo_new']['path_instructions']= array(
  47. '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file
  48. is stored local to the server provide a file name. If the vocabulry is stored remotely,
  49. provide a URL. Only provide a URL or a local file, not both.'),
  50. '#weight' => 0
  51. );
  52. $form['obo_new']['obo_name']= array(
  53. '#type' => 'textfield',
  54. '#title' => t('New Vocabulary Name'),
  55. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  56. list above for use again later.'),
  57. '#weight' => 1
  58. );
  59. $form['obo_new']['obo_url']= array(
  60. '#type' => 'textfield',
  61. '#title' => t('Remote URL'),
  62. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  63. (e.g. http://www.obofoundry.org/ro/ro.obo'),
  64. '#default_value' => $default_desc,
  65. '#weight' => 2
  66. );
  67. $form['obo_new']['obo_file']= array(
  68. '#type' => 'textfield',
  69. '#title' => t('Local File'),
  70. '#description' => t('Please enter the full system path for an OBO definition file, or a path within the Drupal
  71. installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the
  72. server on which this Drupal instance is running.'),
  73. '#default_value' => $default_desc,
  74. '#weight' => 3
  75. );
  76. $form['submit'] = array(
  77. '#type' => 'submit',
  78. '#value' => t('Submit'),
  79. '#weight' => 5,
  80. '#executes_submit_callback' => TRUE,
  81. );
  82. $form['#redirect'] = 'admin/tripal/tripal_cv/obo_loader';
  83. return $form;
  84. }
  85. /**
  86. * Purpose: The submit function for the load ontology form. It registers a
  87. * tripal job to run the obo_loader.php script
  88. *
  89. * @ingroup tripal_obo_loader
  90. */
  91. function tripal_cv_obo_form_submit($form, &$form_state) {
  92. $obo_id = $form_state['values']['obo_id'];
  93. $obo_name = $form_state['values']['obo_name'];
  94. $obo_url = $form_state['values']['obo_url'];
  95. $obo_file = $form_state['values']['obo_file'];
  96. tripal_cv_submit_obo_job($obo_id, $obo_name, $obo_url, $obo_file);
  97. }
  98. /**
  99. * Form for re-doing the cvterm path
  100. *
  101. * @ingroup tripal_cv
  102. */
  103. function tripal_cv_cvtermpath_form() {
  104. // get a list of db from chado for user to choose
  105. $sql = "SELECT * FROM {cv} WHERE NOT name = 'tripal' ORDER BY name ";
  106. $results = chado_query($sql);
  107. $cvs = array();
  108. $cvs[] = '';
  109. while ($cv = db_fetch_object($results)) {
  110. $cvs[$cv->cv_id] = $cv->name;
  111. }
  112. $form['cvid'] = array(
  113. '#title' => t('Controlled Vocabulary/Ontology Name'),
  114. '#type' => 'select',
  115. '#options' => $cvs,
  116. '#description' => t('The Chado cvtermpath is a database table that provides lineage for ontology terms
  117. and is useful for quickly finding any ancestor parent of a term. This table must be populated for each
  118. ontology. Select a controlled vocabulary for which you would like to upate the cvtermpath.'),
  119. );
  120. $form['description'] = array(
  121. '#type' => 'item',
  122. '#value' => t("Submit a job to update chado cvtermpath table."),
  123. '#weight' => 1,
  124. );
  125. $form['button'] = array(
  126. '#type' => 'submit',
  127. '#value' => t('Update cvtermpath'),
  128. '#weight' => 2,
  129. );
  130. return $form;
  131. }
  132. /**
  133. *
  134. * @ingroup tripal_obo_loader
  135. */
  136. function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
  137. // get the OBO reference
  138. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
  139. $obo = db_fetch_object(db_query($sql, $obo_id));
  140. // if the reference is for a remote URL then run the URL processing function
  141. if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
  142. tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
  143. }
  144. // if the reference is for a local file then run the file processing function
  145. else {
  146. // check to see if the file is located local to Drupal
  147. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  148. if (file_exists($dfile)) {
  149. tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
  150. }
  151. // if not local to Drupal, the file must be someplace else, just use
  152. // the full path provided
  153. else {
  154. if (file_exists($obo->path)) {
  155. tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
  156. }
  157. else {
  158. print "ERROR: counld not find OBO file: '$obo->path'\n";
  159. }
  160. }
  161. }
  162. }
  163. /**
  164. *
  165. * @ingroup tripal_obo_loader
  166. */
  167. function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
  168. $newcvs = array();
  169. // TODO: need better error detection
  170. tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
  171. if ($is_new) {
  172. tripal_cv_add_obo_ref($obo_name, $file);
  173. }
  174. print "\nDone\n";
  175. // update the cvtermpath table
  176. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  177. }
  178. /**
  179. *
  180. * @ingroup tripal_obo_loader
  181. */
  182. function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
  183. $newcvs = array();
  184. // first download the OBO
  185. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  186. print "Downloading URL $url, saving to $temp\n";
  187. $url_fh = fopen($url, "r");
  188. $obo_fh = fopen($temp, "w");
  189. if (!$url_fh) {
  190. tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
  191. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
  192. " specify the location of the file on your server.");
  193. }
  194. while (!feof($url_fh)) {
  195. fwrite($obo_fh, fread($url_fh, 255), 255);
  196. }
  197. fclose($url_fh);
  198. fclose($obo_fh);
  199. // second, parse the OBO
  200. tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
  201. // now remove the temp file
  202. unlink($temp);
  203. if ($is_new) {
  204. tripal_cv_add_obo_ref($obo_name, $url);
  205. }
  206. // update the cvtermpath table
  207. tripal_cv_load_update_cvtermpath($newcvs, $jobid);
  208. print "Done\n";
  209. }
  210. /**
  211. *
  212. * @ingroup tripal_obo_loader
  213. */
  214. function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
  215. print "\nUpdating cvtermpath table. This may take a while...\n";
  216. foreach ($newcvs as $namespace => $cvid) {
  217. tripal_cv_update_cvtermpath($cvid, $jobid);
  218. }
  219. }
  220. /**
  221. *
  222. * @ingroup tripal_obo_loader
  223. */
  224. function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  225. $header = array();
  226. // make sure our temporary table exists
  227. $ret = array();
  228. if (!db_table_exists('tripal_obo_temp')) {
  229. $schema = tripal_cv_get_custom_tables('tripal_obo_temp');
  230. $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
  231. if (!$success) {
  232. watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR);
  233. return;
  234. }
  235. }
  236. // empty the temp table
  237. $sql = "DELETE FROM {tripal_obo_temp}";
  238. chado_query($sql);
  239. // get a persistent connection
  240. $connection = tripal_db_persistent_chado();
  241. if (!$connection) {
  242. print "A persistant connection was not obtained. Loading will be slow\n";
  243. }
  244. // if we cannot get a connection then let the user know the loading will be slow
  245. tripal_db_start_transaction();
  246. if ($connection) {
  247. print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
  248. "If the load fails or is terminated prematurely then the entire set of \n" .
  249. "insertions/updates is rolled back and will not be found in the database\n\n";
  250. }
  251. print "Step 1: Preloading File $file\n";
  252. // make sure we have an 'internal' and a '_global' database
  253. if (!tripal_db_add_db('internal')) {
  254. tripal_cv_obo_quiterror("Cannot add 'internal' database");
  255. }
  256. if (!tripal_db_add_db('_global')) {
  257. tripal_cv_obo_quiterror("Cannot add '_global' database");
  258. }
  259. // parse the obo file
  260. $default_db = tripal_cv_obo_parse($file, $header, $jobid);
  261. // add the CV for this ontology to the database. The v1.2 definition
  262. // specifies a 'default-namespace' to be used if a 'namespace' is not
  263. // present for each stanza. Some ontologies have adopted the v1.4 method
  264. // in their v1.2 files and not including it.
  265. if (array_key_exists('default-namespace', $header)) {
  266. $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
  267. if (!$defaultcv) {
  268. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  269. }
  270. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  271. }
  272. // if the 'default-namespace' is missing
  273. else {
  274. // look to see if an 'ontology' key is present. It is part of the v1.4
  275. // specification so it shouldn't be in the file, but just in case
  276. if (array_key_exists('ontology', $header)) {
  277. $defaultcv = tripal_cv_add_cv(strtoupper($header['ontology'][0]), '');
  278. if (!$defaultcv) {
  279. tripal_cv_obo_quiterror('Cannot add namespace ' . strtoupper($header['ontology'][0]));
  280. }
  281. $newcvs[strtoupper(strtoupper($header['ontology'][0]))] = $defaultcv->cv_id;
  282. }
  283. else {
  284. $defaultcv = tripal_cv_add_cv('_global', '');
  285. $newcvs['_global'] = $defaultcv->cv_id;
  286. }
  287. watchdog('t_obo_loader', "This OBO is missing the 'default-namespace' header. It is not possible to determine which vocabulary terms without a 'namespace' key should go. Instead, those terms will be placed in the '%vocab' vocabulary.",
  288. array('%vocab' => $defaultcv->name), WATCHDOG_WARNING);
  289. }
  290. // add any typedefs to the vocabulary first
  291. print "\nStep 2: Loading type defs...\n";
  292. tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid);
  293. // next add terms to the vocabulary
  294. print "\nStep 3: Loading terms...\n";
  295. if (!tripal_cv_obo_process_terms($defaultcv, $jobid, $newcvs, $default_db)) {
  296. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  297. }
  298. // transaction is complete
  299. tripal_db_commit_transaction();
  300. return;
  301. }
  302. /**
  303. *
  304. * @ingroup tripal_obo_loader
  305. */
  306. function tripal_cv_obo_quiterror($message) {
  307. watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);;
  308. exit;
  309. }
  310. /*
  311. *
  312. */
  313. function tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid){
  314. $sql = "
  315. SELECT *
  316. FROM tripal_obo_temp
  317. WHERE type = 'Typedef'
  318. ";
  319. $typedefs = chado_query($sql);
  320. $sql = "
  321. SELECT count(*) as num_terms
  322. FROM tripal_obo_temp
  323. WHERE type = 'Typedef'
  324. ";
  325. $result = db_fetch_object(chado_query($sql));
  326. $count = $result->num_terms;
  327. // calculate the interval for updates
  328. $interval = intval($count * 0.0001);
  329. if ($interval < 1) {
  330. $interval = 1;
  331. }
  332. $i = 0;
  333. while ($typedef = db_fetch_object($typedefs)) {
  334. $term = unserialize(base64_decode($typedef->stanza));
  335. // update the job status every interval
  336. if ($jobid and $i % $interval == 0) {
  337. $complete = ($i / $count) * 33.33333333;
  338. tripal_job_set_progress($jobid, intval($complete + 33.33333333));
  339. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  340. }
  341. tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
  342. $i++;
  343. }
  344. // set the final status
  345. if ($jobid) {
  346. if ($count > 0) {
  347. $complete = ($i / $count) * 33.33333333;
  348. }
  349. else {
  350. $complete = 33.33333333;
  351. }
  352. tripal_job_set_progress($jobid, intval($complete + 33.33333333));
  353. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  354. }
  355. return 1;
  356. }
  357. /**
  358. *
  359. * @ingroup tripal_obo_loader
  360. */
  361. function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
  362. $i = 0;
  363. // iterate through each term from the OBO file and add it
  364. $sql = "
  365. SELECT * FROM {tripal_obo_temp}
  366. WHERE type = 'Term'
  367. ORDER BY id
  368. ";
  369. $terms = chado_query($sql);
  370. $sql = "
  371. SELECT count(*) as num_terms
  372. FROM {tripal_obo_temp}
  373. WHERE type = 'Term'
  374. ";
  375. $result = db_fetch_object(chado_query($sql));
  376. $count = $result->num_terms;
  377. // calculate the interval for updates
  378. $interval = intval($count * 0.0001);
  379. if ($interval < 1) {
  380. $interval = 1;
  381. }
  382. while($t = db_fetch_object($terms)) {
  383. $term = unserialize(base64_decode($t->stanza));
  384. // update the job status every interval
  385. if ($jobid and $i % $interval == 0) {
  386. $complete = ($i / $count) * 33.33333333;
  387. tripal_job_set_progress($jobid, intval($complete + 66.666666));
  388. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  389. }
  390. // add/update this term
  391. if (!tripal_cv_obo_process_term($term, $defaultcv->name, 0, $newcvs, $default_db)) {
  392. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  393. }
  394. $i++;
  395. }
  396. // set the final status
  397. if ($jobid) {
  398. if ($count > 0) {
  399. $complete = ($i / $count) * 33.33333333;
  400. }
  401. else {
  402. $complete = 33.33333333;
  403. }
  404. tripal_job_set_progress($jobid, intval($complete + 66.666666));
  405. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  406. }
  407. return 1;
  408. }
  409. /**
  410. *
  411. * @ingroup tripal_obo_loader
  412. */
  413. function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
  414. // make sure we have a namespace for this term
  415. if (!array_key_exists('namespace', $term) and !($defaultcv or $defaultcv == '')) {
  416. tripal_cv_obo_quiterror("Cannot add the term: no namespace defined. " . $term['id'][0]);
  417. }
  418. // construct the term array for sending to the tripal_cv_add_cvterm function
  419. // for adding a new cvterm
  420. $t = array();
  421. $t['id'] = $term['id'][0];
  422. $t['name'] = $term['name'][0];
  423. if (array_key_exists('def', $term)) {
  424. $t['def'] = $term['def'][0];
  425. }
  426. if (array_key_exists('subset', $term)) {
  427. $t['subset'] = $term['subset'][0];
  428. }
  429. if (array_key_exists('namespace', $term)) {
  430. $t['namespace'] = $term['namespace'][0];
  431. }
  432. if (array_key_exists('is_obsolete', $term)) {
  433. $t['is_obsolete'] = $term['is_obsolete'][0];
  434. }
  435. // add the cvterm
  436. $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
  437. if (!$cvterm) {
  438. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  439. }
  440. if (array_key_exists('namespace', $term)) {
  441. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  442. }
  443. // now handle other properites
  444. if (array_key_exists('is_anonymous', $term)) {
  445. //print "WARNING: unhandled tag: is_anonymous\n";
  446. }
  447. if (array_key_exists('alt_id', $term)) {
  448. foreach ($term['alt_id'] as $alt_id) {
  449. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
  450. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  451. }
  452. }
  453. }
  454. if (array_key_exists('subset', $term)) {
  455. //print "WARNING: unhandled tag: subset\n";
  456. }
  457. // add synonyms for this cvterm
  458. if (array_key_exists('synonym', $term)) {
  459. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  460. tripal_cv_obo_quiterror("Cannot add synonyms");
  461. }
  462. }
  463. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  464. // types to be of the v1.2 standard
  465. if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
  466. if (array_key_exists('exact_synonym', $term)) {
  467. foreach ($term['exact_synonym'] as $synonym) {
  468. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  469. $term['synonym'][] = $new;
  470. }
  471. }
  472. if (array_key_exists('narrow_synonym', $term)) {
  473. foreach ($term['narrow_synonym'] as $synonym) {
  474. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  475. $term['synonym'][] = $new;
  476. }
  477. }
  478. if (array_key_exists('broad_synonym', $term)) {
  479. foreach ($term['broad_synonym'] as $synonym) {
  480. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  481. $term['synonym'][] = $new;
  482. }
  483. }
  484. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  485. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  486. }
  487. }
  488. // add the comment to the cvtermprop table
  489. if (array_key_exists('comment', $term)) {
  490. $comments = $term['comment'];
  491. $j = 0;
  492. foreach ($comments as $comment) {
  493. if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
  494. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  495. }
  496. $j++;
  497. }
  498. }
  499. // add any other external dbxrefs
  500. if (array_key_exists('xref', $term)) {
  501. foreach ($term['xref'] as $xref) {
  502. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  503. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  504. }
  505. }
  506. }
  507. if (array_key_exists('xref_analog', $term)) {
  508. foreach ($term['xref_analog'] as $xref) {
  509. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  510. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  511. }
  512. }
  513. }
  514. if (array_key_exists('xref_unk', $term)) {
  515. foreach ($term['xref_unk'] as $xref) {
  516. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  517. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  518. }
  519. }
  520. }
  521. // add is_a relationships for this cvterm
  522. if (array_key_exists('is_a', $term)) {
  523. foreach ($term['is_a'] as $is_a) {
  524. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
  525. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  526. }
  527. }
  528. }
  529. if (array_key_exists('intersection_of', $term)) {
  530. //print "WARNING: unhandled tag: intersection_of\n";
  531. }
  532. if (array_key_exists('union_of', $term)) {
  533. //print "WARNING: unhandled tag: union_on\n";
  534. }
  535. if (array_key_exists('disjoint_from', $term)) {
  536. //print "WARNING: unhandled tag: disjoint_from\n";
  537. }
  538. if (array_key_exists('relationship', $term)) {
  539. foreach ($term['relationship'] as $value) {
  540. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  541. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  542. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
  543. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  544. }
  545. }
  546. }
  547. if (array_key_exists('replaced_by', $term)) {
  548. //print "WARNING: unhandled tag: replaced_by\n";
  549. }
  550. if (array_key_exists('consider', $term)) {
  551. //print "WARNING: unhandled tag: consider\n";
  552. }
  553. if (array_key_exists('use_term', $term)) {
  554. //print "WARNING: unhandled tag: user_term\n";
  555. }
  556. if (array_key_exists('builtin', $term)) {
  557. //print "WARNING: unhandled tag: builtin\n";
  558. }
  559. return 1;
  560. }
  561. /**
  562. * Add a cvterm relationship
  563. *
  564. * @ingroup tripal_obo_loader
  565. */
  566. function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel,
  567. $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
  568. // make sure the relationship cvterm exists
  569. $term = array(
  570. 'name' => $rel,
  571. 'id' => "$default_db:$rel",
  572. 'definition' => '',
  573. 'is_obsolete' => 0,
  574. );
  575. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, $default_db);
  576. if (!$relcvterm) {
  577. // if the relationship term couldn't be found in the default_db provided
  578. // then do on more check to find it in the relationship ontology
  579. $term = array(
  580. 'name' => $rel,
  581. 'id' => "OBO_REL:$rel",
  582. 'definition' => '',
  583. 'is_obsolete' => 0,
  584. );
  585. $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, 'OBO_REL');
  586. if (!$relcvterm) {
  587. tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
  588. }
  589. }
  590. // get the object term
  591. $oterm = tripal_cv_obo_get_term($objname);
  592. if (!$oterm) {
  593. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  594. }
  595. $objterm = array();
  596. $objterm['id'] = $oterm['id'][0];
  597. $objterm['name'] = $oterm['name'][0];
  598. if (array_key_exists('def', $oterm)) {
  599. $objterm['def'] = $oterm['def'][0];
  600. }
  601. if (array_key_exists('subset', $oterm)) {
  602. $objterm['subset'] = $oterm['subset'][0];
  603. }
  604. if (array_key_exists('namespace', $oterm)) {
  605. $objterm['namespace'] = $oterm['namespace'][0];
  606. }
  607. if (array_key_exists('is_obsolete', $oterm)) {
  608. $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
  609. }
  610. $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);
  611. if (!$objcvterm) {
  612. tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
  613. }
  614. // check to see if the cvterm_relationship already exists, if not add it
  615. $values = array(
  616. 'type_id' => $relcvterm->cvterm_id,
  617. 'subject_id' => $cvterm->cvterm_id,
  618. 'object_id' => $objcvterm->cvterm_id
  619. );
  620. $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
  621. $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
  622. if (count($result) == 0) {
  623. $options = array(
  624. 'statement_name' => 'ins_cvtermrelationship_tysuob',
  625. 'return_record' => FALSE
  626. );
  627. $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
  628. if (!$success) {
  629. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  630. }
  631. }
  632. return TRUE;
  633. }
  634. /**
  635. *
  636. * @ingroup tripal_obo_loader
  637. */
  638. function tripal_cv_obo_get_term($id) {
  639. $values = array('id' => $id);
  640. $options = array('statement_name' => 'sel_tripalobotemp_id');
  641. $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
  642. if (count($result) == 0) {
  643. return FALSE;
  644. }
  645. return unserialize(base64_decode($result[0]->stanza));
  646. }
  647. /**
  648. *
  649. * @ingroup tripal_obo_loader
  650. */
  651. function tripal_cv_obo_add_synonyms($term, $cvterm) {
  652. // make sure we have a 'synonym_type' vocabulary
  653. $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
  654. // now add the synonyms
  655. if (array_key_exists('synonym', $term)) {
  656. foreach ($term['synonym'] as $synonym) {
  657. // separate out the synonym definition and the synonym type
  658. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  659. // the scope will be 'EXACT', etc...
  660. $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  661. if (!$scope) { // if no scope then default to 'exact'
  662. $scope = 'exact';
  663. }
  664. // make sure the synonym type exists in the 'synonym_type' vocabulary
  665. $values = array(
  666. 'name' => $scope,
  667. 'cv_id' => array(
  668. 'name' => 'synonym_type',
  669. ),
  670. );
  671. $options = array('statement_name' => 'sel_cvterm_nacv', 'is_updlicate' => 1);
  672. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  673. // if it doesn't exist then add it
  674. if (!$results) {
  675. // build a 'term' object so we can add the missing term
  676. $term = array(
  677. 'name' => $scope,
  678. 'id' => "internal:$scope",
  679. 'definition' => '',
  680. 'is_obsolete' => 0,
  681. );
  682. $syntype = tripal_cv_add_cvterm($term, $syncv->name, 0, 1);
  683. if (!$syntype) {
  684. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
  685. }
  686. }
  687. else {
  688. $syntype = $results[0];
  689. }
  690. // make sure the synonym doesn't already exists
  691. $values = array(
  692. 'cvterm_id' => $cvterm->cvterm_id,
  693. 'synonym' => $def
  694. );
  695. $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
  696. $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
  697. if (count($results) == 0) {
  698. $values = array(
  699. 'cvterm_id' => $cvterm->cvterm_id,
  700. 'synonym' => $def,
  701. 'type_id' => $syntype->cvterm_id
  702. );
  703. $options = array(
  704. 'statement_name' => 'ins_cvtermsynonym_cvsy',
  705. 'return_record' => FALSE
  706. );
  707. $success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
  708. if (!$success) {
  709. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  710. }
  711. }
  712. // now add the dbxrefs for the synonym if we have a comma in the middle
  713. // of a description then this will cause problems when splitting os lets
  714. // just change it so it won't mess up our splitting and then set it back
  715. // later.
  716. /**
  717. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  718. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  719. foreach ($dbxrefs as $dbxref) {
  720. $dbxref = preg_replace('/,_/',", ",$dbxref);
  721. if ($dbxref) {
  722. tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  723. }
  724. }
  725. */
  726. }
  727. }
  728. return TRUE;
  729. }
  730. /**
  731. * Actually parse the OBO file
  732. *
  733. * @ingroup tripal_obo_loader
  734. */
  735. function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
  736. $in_header = 1;
  737. $stanza = array();
  738. $default_db = '_global';
  739. $line_num = 0;
  740. $num_read = 0;
  741. $intv_read = 0;
  742. $filesize = filesize($obo_file);
  743. $interval = intval($filesize * 0.01);
  744. if ($interval < 1) {
  745. $interval = 1;
  746. }
  747. // iterate through the lines in the OBO file and parse the stanzas
  748. $fh = fopen($obo_file, 'r');
  749. while ($line = fgets($fh)) {
  750. $line_num++;
  751. $size = drupal_strlen($line);
  752. $num_read += $size;
  753. $intv_read += $size;
  754. $line = trim($line);
  755. // update the job status every 1% features
  756. if ($jobid and $intv_read >= $interval) {
  757. $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  758. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  759. tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
  760. $intv_read = 0;
  761. }
  762. // remove newlines
  763. $line = rtrim($line);
  764. // remove any special characters that may be hiding
  765. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  766. // skip empty lines
  767. if (strcmp($line, '') == 0) {
  768. continue;
  769. }
  770. //remove comments from end of lines
  771. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  772. // at the first stanza we're out of header
  773. if (preg_match('/^\s*\[/', $line)) {
  774. $in_header = 0;
  775. // store the stanza we just finished reading
  776. if (sizeof($stanza) > 0) {
  777. // add the term to the temp table
  778. $values = array(
  779. 'id' => $stanza['id'][0],
  780. 'stanza' => base64_encode(serialize($stanza)),
  781. 'type' => $type,
  782. );
  783. $options = array('statement_name' => 'ins_tripalobotemp_all');
  784. $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
  785. if (!$success) {
  786. watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  787. exit;
  788. }
  789. }
  790. // get the stanza type: Term, Typedef or Instance
  791. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  792. // start fresh with a new array
  793. $stanza = array();
  794. continue;
  795. }
  796. // break apart the line into the tag and value but ignore any escaped colons
  797. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  798. $pair = explode(":", $line, 2);
  799. $tag = $pair[0];
  800. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  801. // if this is the ID then look for the default DB
  802. $matches = array();
  803. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  804. $default_db = $matches[1];
  805. }
  806. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  807. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  808. if ($in_header) {
  809. if (!array_key_exists($tag, $header)) {
  810. $header[$tag] = array();
  811. }
  812. $header[$tag][] = $value;
  813. }
  814. else {
  815. if (!array_key_exists($tag, $stanza)) {
  816. $stanza[$tag] = array();
  817. }
  818. $stanza[$tag][] = $value;
  819. }
  820. }
  821. // now add the last term in the file
  822. if (sizeof($stanza) > 0) {
  823. $values = array(
  824. 'id' => $stanza['id'][0],
  825. 'stanza' => base64_encode(serialize($stanza)),
  826. 'type' => $type,
  827. );
  828. $options = array('statement_name' => 'ins_tripalobotemp_all');
  829. tripal_core_chado_insert('tripal_obo_temp', $values, $options);
  830. if (!$success) {
  831. watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  832. exit;
  833. }
  834. $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  835. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  836. tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
  837. }
  838. return $default_db;
  839. }
  840. /**
  841. * Add database reference to cvterm
  842. *
  843. * @ingroup tripal_obo_loader
  844. */
  845. function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
  846. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  847. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  848. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  849. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  850. if (!$accession) {
  851. tripal_cv_obo_quiterror();
  852. watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
  853. return FALSE;
  854. }
  855. // if the xref is a database link, handle that specially
  856. if (strcmp($dbname, 'http') == 0) {
  857. $accession = $xref;
  858. $dbname = 'URL';
  859. }
  860. // add the database
  861. $db = tripal_db_add_db($dbname);
  862. if (!$db) {
  863. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  864. }
  865. // now add the dbxref
  866. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
  867. if (!$dbxref) {
  868. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  869. }
  870. // finally add the cvterm_dbxref but first check to make sure it exists
  871. $values = array(
  872. 'cvterm_id' => $cvterm->cvterm_id,
  873. 'dbxref_id' => $dbxref->dbxref_id,
  874. );
  875. $options = array('statement_name' => 'sel_cvtermdbxref_cvdb');
  876. $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options);
  877. if (count($result) == 0) {
  878. $ins_options = array(
  879. 'statement_name' => 'ins_cvtermdbxref_cvdb',
  880. 'return_record' => FALSE
  881. );
  882. $result = tripal_core_chado_insert('cvterm_dbxref', $values, $ins_options);
  883. if (!$result){
  884. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  885. return FALSE;
  886. }
  887. }
  888. return TRUE;
  889. }
  890. /**
  891. * Add property to CVterm
  892. * @ingroup tripal_obo_loader
  893. */
  894. function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
  895. // make sure the 'cvterm_property_type' CV exists
  896. $cv = tripal_cv_add_cv('cvterm_property_type', '');
  897. if (!$cv) {
  898. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  899. }
  900. // get the property type cvterm. If it doesn't exist then we want to add it
  901. $values = array(
  902. 'name' => $property,
  903. 'cv_id' => $cv->cv_id,
  904. );
  905. $options = array('statement_name' => 'sel_cvterm_nacv_na');
  906. $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
  907. if (count($results) == 0) {
  908. $term = array(
  909. 'name' => $property,
  910. 'id' => "internal:$property",
  911. 'definition' => '',
  912. 'is_obsolete' => 0,
  913. );
  914. $cvproptype = tripal_cv_add_cvterm($term, $cv->name, 0, 0);
  915. if (!$cvproptype) {
  916. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  917. return FALSE;
  918. }
  919. }
  920. else {
  921. $cvproptype = $results[0];
  922. }
  923. // remove any properties that currently exist for this term. We'll reset them
  924. if ($rank == 0) {
  925. $values = array('cvterm_id' => $cvterm->cvterm_id);
  926. $options = array('statement_name' => 'del_cvtermprop_cv');
  927. $success = tripal_core_chado_delete('cvtermprop', $values, $options);
  928. if (!$success) {
  929. tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
  930. return FALSE;
  931. }
  932. }
  933. // now add the property
  934. $values = array(
  935. 'cvterm_id' => $cvterm->cvterm_id,
  936. 'type_id' => $cvproptype->cvterm_id,
  937. 'value' => $value,
  938. 'rank' => $rank,
  939. );
  940. $options = array(
  941. 'statement_name' => 'ins_cvtermprop_cvtyvara',
  942. 'return_record' => FALSE,
  943. );
  944. $result = tripal_core_chado_insert('cvtermprop', $values, $options);
  945. if (!$result) {
  946. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  947. return FALSE;
  948. }
  949. return TRUE;
  950. }
  951. /**
  952. * Add Database Reference
  953. * @ingroup tripal_obo_loader
  954. */
  955. function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
  956. // check to see if the dbxref exists if not, add it
  957. $values = array(
  958. 'db_id' => $db_id,
  959. 'accession' => $accession,
  960. );
  961. $options = array('statement_name' => 'sel_dbxref_idac');
  962. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  963. if (count($result) == 0){
  964. $ins_values = array(
  965. 'db_id' => $db_id,
  966. 'accession' => $accession,
  967. 'version' => $version,
  968. 'description' => $description,
  969. );
  970. $ins_options = array(
  971. 'statement_name' => 'ins_dbxref_idacvede',
  972. 'return_record' => FALSE
  973. );
  974. $result = tripal_core_chado_insert('dbxref', $ins_values, $ins_options);
  975. if (!$result) {
  976. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  977. return FALSE;
  978. }
  979. $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
  980. }
  981. return $result[0];
  982. }