tripal_chado.obo_loader.inc 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459
  1. <?php
  2. /**
  3. * @file
  4. * Functions to aid in loading ontologies into the chado cv module
  5. */
  6. /**
  7. * @defgroup tripal_obo_loader Ontology Loader
  8. * @ingroup tripal_chado
  9. * @{
  10. * Functions to aid in loading ontologies into the chado cv module
  11. * @}
  12. */
  13. /**
  14. * Provides the form to load an already existing controlled
  15. * Vocabulary into chado
  16. *
  17. * @param $form
  18. * The form array
  19. * @param $form_state
  20. * The form state array
  21. *
  22. * @return
  23. * The form array with new additions
  24. *
  25. * @ingroup tripal_obo_loader
  26. */
  27. function tripal_cv_obo_form($form, &$form_state) {
  28. // get a list of db from chado for user to choose
  29. $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
  30. $results = db_query($sql);
  31. $obos = array();
  32. $obos[] = 'Select a Vocabulary';
  33. foreach ($results as $obo) {
  34. $obos[$obo->obo_id] = $obo->name;
  35. }
  36. $obo_id = '';
  37. if (array_key_exists('values', $form_state)) {
  38. $obo_id = array_key_exists('obo_id', $form_state['values']) ? $form_state['values']['obo_id'] : '';
  39. }
  40. $form['instructions']['info'] = array(
  41. '#type' => 'item',
  42. '#markup' => t('This page allows you to load vocabularies and ontologies
  43. that are in OBO format. Once loaded, the terms from these
  44. vocabularies can be used to create content.
  45. You may use the form below to either reload a vocabulary that is already
  46. loaded (as when new updates to that vocabulary are available) or load a new
  47. vocabulary.'),
  48. );
  49. $form['obo_existing'] = array(
  50. '#type' => 'fieldset',
  51. '#title' => t('Use a Saved Ontology OBO Reference'),
  52. '#prefix' => '<span id="obo-existing-fieldset">',
  53. '#suffix' => '</span>'
  54. );
  55. $form['obo_existing']['existing_instructions']= array(
  56. '#type' => 'item',
  57. '#markup' => t('The vocabularies listed in the select box below have bene pre-populated
  58. upon installation of Tripal or have been previously loaded. Select one to edit
  59. its settings or submit for loading. You may reload any vocabulary that has
  60. already been loaded to retrieve any new updates.'),
  61. );
  62. $form['obo_existing']['obo_id'] = array(
  63. '#title' => t('Ontology OBO File Reference'),
  64. '#type' => 'select',
  65. '#options' => $obos,
  66. '#ajax' => array(
  67. 'callback' => 'tripal_cv_obo_form_ajax_callback',
  68. 'wrapper' => 'obo-existing-fieldset',
  69. ),
  70. '#description' => t('Select a vocabulary to import.')
  71. );
  72. // If the user has selected an OBO ID then get the form elements for
  73. // updating.
  74. if ($obo_id) {
  75. $uobo_name = '';
  76. $uobo_url = '';
  77. $uobo_file = '';
  78. $vocab = db_select('tripal_cv_obo', 't')
  79. ->fields('t', array('name', 'path'))
  80. ->condition('obo_id', $obo_id)
  81. ->execute()
  82. ->fetchObject();
  83. $uobo_name = $vocab->name;
  84. if (preg_match('/^http/', $vocab->path)) {
  85. $uobo_url = $vocab->path;
  86. }
  87. else {
  88. $uobo_file = trim($vocab->path);
  89. $matches = array();
  90. if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
  91. $modpath = drupal_get_path('module', $matches[1]);
  92. $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
  93. }
  94. }
  95. // We don't want the previous value to remain. We want the new default to
  96. // show up, so remove the input values
  97. unset($form_state['input']['uobo_name']);
  98. unset($form_state['input']['uobo_url']);
  99. unset($form_state['input']['uobo_file']);
  100. $form['obo_existing']['uobo_name']= array(
  101. '#type' => 'textfield',
  102. '#title' => t('Vocabulary Name'),
  103. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  104. list above for use again later.'),
  105. '#default_value' => $uobo_name,
  106. );
  107. $form['obo_existing']['uobo_url']= array(
  108. '#type' => 'textfield',
  109. '#title' => t('Remote URL'),
  110. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  111. (e.g. http://www.obofoundry.org/ro/ro.obo'),
  112. '#default_value' => $uobo_url,
  113. );
  114. $form['obo_existing']['uobo_file']= array(
  115. '#type' => 'textfield',
  116. '#title' => t('Local File'),
  117. '#description' => t('Please enter the file system path for an OBO
  118. definition file. If entering a path relative to
  119. the Drupal installation you may use a relative path that excludes the
  120. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  121. that Drupal relative paths have no preceeding slash.
  122. Otherwise, please provide the full path on the filesystem. The path
  123. must be accessible to the web server on which this Drupal instance is running.'),
  124. '#default_value' => $uobo_file,
  125. );
  126. $form['obo_existing']['update_obo_details'] = array(
  127. '#type' => 'submit',
  128. '#value' => 'Update Ontology Details',
  129. '#name' => 'update_obo_details'
  130. );
  131. $form['obo_existing']['update_load_obo'] = array(
  132. '#type' => 'submit',
  133. '#value' => 'Import Vocabulary',
  134. '#name' => 'update_load_obo'
  135. );
  136. }
  137. $form['obo_new'] = array(
  138. '#type' => 'fieldset',
  139. '#title' => t('Add a New Ontology OBO Reference'),
  140. '#collapsible' => TRUE,
  141. '#collapsed' => TRUE,
  142. );
  143. $form['obo_new']['path_instructions']= array(
  144. '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file
  145. is stored local to the server provide a file name. If the vocabulry is stored remotely,
  146. provide a URL. Only provide a URL or a local file, not both.'),
  147. );
  148. $form['obo_new']['obo_name']= array(
  149. '#type' => 'textfield',
  150. '#title' => t('New Vocabulary Name'),
  151. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  152. list above for use again later.'),
  153. );
  154. $form['obo_new']['obo_url']= array(
  155. '#type' => 'textfield',
  156. '#title' => t('Remote URL'),
  157. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  158. (e.g. http://www.obofoundry.org/ro/ro.obo'),
  159. );
  160. $form['obo_new']['obo_file']= array(
  161. '#type' => 'textfield',
  162. '#title' => t('Local File'),
  163. '#description' => t('Please enter the file system path for an OBO
  164. definition file. If entering a path relative to
  165. the Drupal installation you may use a relative path that excludes the
  166. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  167. that Drupal relative paths have no preceeding slash.
  168. Otherwise, please provide the full path on the filesystem. The path
  169. must be accessible to the web server on which this Drupal instance is running.'),
  170. );
  171. $form['obo_new']['add_new_obo'] = array(
  172. '#type' => 'submit',
  173. '#value' => t('Add this vocabulary'),
  174. '#name' => 'add_new_obo',
  175. );
  176. $form['#redirect'] = 'admin/tripal/tripal_chado/obo_loader';
  177. return $form;
  178. }
  179. /**
  180. *
  181. * @param $form
  182. * @param $form_state
  183. */
  184. function tripal_cv_obo_form_validate($form, &$form_state) {
  185. $obo_id = $form_state['values']['obo_id'];
  186. $obo_name = trim($form_state['values']['obo_name']);
  187. $obo_url = trim($form_state['values']['obo_url']);
  188. $obo_file = trim($form_state['values']['obo_file']);
  189. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  190. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  191. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  192. // Make sure if the name is changed it doesn't conflict with another OBO.
  193. if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
  194. $form_state['clicked_button']['#name'] == 'update_load_obo') {
  195. // Get the current record
  196. $vocab = db_select('tripal_cv_obo', 't')
  197. ->fields('t', array('obo_id', 'name', 'path'))
  198. ->condition('name', $uobo_name)
  199. ->execute()
  200. ->fetchObject();
  201. if ($vocab and $vocab->obo_id != $obo_id) {
  202. form_set_error('uobo_name', 'The vocabulary name must be different from existing vocabularies');
  203. }
  204. // Make sure the file exists. First check if it is a relative path
  205. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $uobo_file;
  206. if (!file_exists($dfile)) {
  207. if (!file_exists($uobo_file)) {
  208. form_set_error('uobo_file', 'The specified path does not exist or cannot be read.');
  209. }
  210. }
  211. if (!$uobo_url and !$uobo_file) {
  212. form_set_error('uobo_url', 'Please provide a URL or a path for the vocabulary.');
  213. }
  214. if ($uobo_url and $uobo_file) {
  215. form_set_error('uobo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  216. }
  217. }
  218. if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
  219. // Get the current record
  220. $vocab = db_select('tripal_cv_obo', 't')
  221. ->fields('t', array('obo_id', 'name', 'path'))
  222. ->condition('name', $obo_name)
  223. ->execute()
  224. ->fetchObject();
  225. if ($vocab) {
  226. form_set_error('obo_name', 'The vocabulary name must be different from existing vocabularies');
  227. }
  228. // Make sure the file exists. First check if it is a relative path
  229. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo_file;
  230. if (!file_exists($dfile)) {
  231. if (!file_exists($obo_file)) {
  232. form_set_error('obo_file', 'The specified path does not exist or cannot be read.');
  233. }
  234. }
  235. if (!$obo_url and !$obo_file) {
  236. form_set_error('obo_url', 'Please provide a URL or a path for the vocabulary.');
  237. }
  238. if ($obo_url and $obo_file) {
  239. form_set_error('obo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  240. }
  241. }
  242. }
  243. /**
  244. * The submit function for the load ontology form. It registers a
  245. * tripal job to import the user specified ontology file
  246. *
  247. * @param $form
  248. * The form array
  249. * @param $form_state
  250. * The form state array
  251. *
  252. *
  253. * @ingroup tripal_obo_loader
  254. */
  255. function tripal_cv_obo_form_submit($form, &$form_state) {
  256. $obo_id = $form_state['values']['obo_id'];
  257. $obo_name = trim($form_state['values']['obo_name']);
  258. $obo_url = trim($form_state['values']['obo_url']);
  259. $obo_file = trim($form_state['values']['obo_file']);
  260. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  261. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  262. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  263. // If the user requested to alter the details then do that.
  264. if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
  265. $form_state['clicked_button']['#name'] == 'update_load_obo') {
  266. $success = db_update('tripal_cv_obo')
  267. ->fields(array(
  268. 'name' => $uobo_name,
  269. 'path' => $uobo_url ? $uobo_url : $uobo_file,
  270. ))
  271. ->condition('obo_id', $obo_id)
  272. ->execute();
  273. if ($success) {
  274. drupal_set_message(t("The vocabulary %vocab has been updated.", array('%vocab' => $uobo_name)));
  275. }
  276. else {
  277. drupal_set_message(t("The vocabulary %vocab could not be updated.", array('%vocab' => $uobo_name)), 'error');
  278. }
  279. }
  280. // If the user requested to update and load then we've already handled the
  281. // update now we just need to load.
  282. if ($form_state['clicked_button']['#name'] == 'update_load_obo') {
  283. tripal_submit_obo_job(array('obo_id' => $obo_id));
  284. }
  285. if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
  286. $success = db_insert('tripal_cv_obo')
  287. ->fields(array(
  288. 'name' => $obo_name,
  289. 'path' => $obo_url ? $obo_url : $obo_file,
  290. ))
  291. ->execute();
  292. if ($success) {
  293. drupal_set_message(t("The vocabulary %vocab has been added.", array('%vocab' => $obo_name)));
  294. }
  295. else {
  296. drupal_set_message(t("The vocabulary %vocab could not be added.", array('%vocab' => $obo_name)), 'error');
  297. }
  298. }
  299. }
  300. /**
  301. * A wrapper function for importing the user specified OBO file into Chado by
  302. * specifying the obo_id of the OBO. It requires that the file be in OBO v1.2
  303. * compatible format. This function is typically executed via the Tripal jobs
  304. * management after a user submits a job via the Load Onotloies form.
  305. *
  306. * @param $obo_id
  307. * An obo_id from the tripal_cv_obo file that specifies which OBO file to import
  308. * @param $job_id
  309. * The job_id of the job from the Tripal jobs management system.
  310. *
  311. * @ingroup tripal_obo_loader
  312. */
  313. function tripal_chado_load_obo_v1_2_id($obo_id, $jobid = NULL) {
  314. // Get the OBO reference.
  315. $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = :obo_id";
  316. $obo = db_query($sql, array(':obo_id' => $obo_id))->fetchObject();
  317. // Convert the module name to the real path if present
  318. if (preg_match("/\{(.*?)\}/", $obo->path, $matches)) {
  319. $module = $matches[1];
  320. $path = drupal_realpath(drupal_get_path('module', $module));
  321. $obo->path = preg_replace("/\{.*?\}/", $path, $obo->path);
  322. }
  323. // if the reference is for a remote URL then run the URL processing function
  324. if (preg_match("/^https:\/\//", $obo->path) or
  325. preg_match("/^http:\/\//", $obo->path) or
  326. preg_match("/^ftp:\/\//", $obo->path)) {
  327. tripal_chado_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
  328. }
  329. // if the reference is for a local file then run the file processing function
  330. else {
  331. // check to see if the file is located local to Drupal
  332. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  333. if (file_exists($dfile)) {
  334. tripal_chado_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
  335. }
  336. // if not local to Drupal, the file must be someplace else, just use
  337. // the full path provided
  338. else {
  339. if (file_exists($obo->path)) {
  340. tripal_chado_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
  341. }
  342. else {
  343. print "ERROR: could not find OBO file: '$obo->path'\n";
  344. }
  345. }
  346. }
  347. }
  348. /**
  349. * A wrapper function for importing the user specified OBO file into Chado by
  350. * specifying the filename and path of the OBO. It requires that the file be in OBO v1.2
  351. * compatible format. This function is typically executed via the Tripal jobs
  352. * management after a user submits a job via the Load Onotloies form.
  353. *
  354. * @param $obo_name
  355. * The name of the OBO (typially the ontology or controlled vocabulary name)
  356. * @param $file
  357. * The path on the file system where the ontology can be found
  358. * @param $job_id
  359. * The job_id of the job from the Tripal jobs management system.
  360. * @param $is_new
  361. * Set to TRUE if this is a new ontology that does not yet exist in the
  362. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  363. *
  364. * @ingroup tripal_obo_loader
  365. */
  366. function tripal_chado_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
  367. $newcvs = array();
  368. if ($is_new) {
  369. tripal_insert_obo($obo_name, $file);
  370. }
  371. $success = tripal_chado_load_obo_v1_2($file, $jobid, $newcvs);
  372. if ($success) {
  373. // update the cvtermpath table
  374. tripal_chado_load_update_cvtermpath($newcvs, $jobid);
  375. print "\nDone\n";
  376. }
  377. }
  378. /**
  379. * A wrapper function for importing the user specified OBO file into Chado by
  380. * specifying the remote URL of the OBO. It requires that the file be in OBO v1.2
  381. * compatible format. This function is typically executed via the Tripal jobs
  382. * management after a user submits a job via the Load Onotloies form.
  383. *
  384. * @param $obo_name
  385. * The name of the OBO (typially the ontology or controlled vocabulary name)
  386. * @param $url
  387. * The remote URL of the OBO file.
  388. * @param $job_id
  389. * The job_id of the job from the Tripal jobs management system.
  390. * @param $is_new
  391. * Set to TRUE if this is a new ontology that does not yet exist in the
  392. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  393. *
  394. * @ingroup tripal_obo_loader
  395. */
  396. function tripal_chado_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
  397. $newcvs = array();
  398. // first download the OBO
  399. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  400. print "Downloading URL $url, saving to $temp\n";
  401. $url_fh = fopen($url, "r");
  402. $obo_fh = fopen($temp, "w");
  403. if (!$url_fh) {
  404. tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? " .
  405. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to " .
  406. " specify the location of the file on your server.");
  407. }
  408. while (!feof($url_fh)) {
  409. fwrite($obo_fh, fread($url_fh, 255), 255);
  410. }
  411. fclose($url_fh);
  412. fclose($obo_fh);
  413. if ($is_new) {
  414. tripal_insert_obo($obo_name, $url);
  415. }
  416. // second, parse the OBO
  417. $success = tripal_chado_load_obo_v1_2($temp, $jobid, $newcvs);
  418. if ($success) {
  419. // update the cvtermpath table
  420. tripal_chado_load_update_cvtermpath($newcvs, $jobid);
  421. print "Done\n";
  422. }
  423. // now remove the temp file
  424. unlink($temp);
  425. }
  426. /**
  427. * A function for executing the cvtermpath function of Chado. This function
  428. * populates the cvtermpath table of Chado for quick lookup of term
  429. * relationships
  430. *
  431. * @param $newcvs
  432. * An associative array of controlled vocabularies to update. The key must be
  433. * the name of the vocabulary and the value the cv_id from the cv table of chado.
  434. * @param $jobid
  435. * The job_id of the job from the Tripal jobs management system.
  436. *
  437. * @ingroup tripal_obo_loader
  438. */
  439. function tripal_chado_load_update_cvtermpath($newcvs, $jobid) {
  440. print "\nUpdating cvtermpath table. This may take a while...\n";
  441. foreach ($newcvs as $namespace => $cvid) {
  442. tripal_update_cvtermpath($cvid, $jobid);
  443. }
  444. }
  445. /**
  446. * Imports a given OBO file into Chado. This function is usually called by
  447. * one of three wrapper functions: tripal_chado_load_obo_v1_2_id,
  448. * tripal_chado_load_obo_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can
  449. * be called directly if the full path to an OBO file is available on the
  450. * file system.
  451. *
  452. * @param $flie
  453. * The full path to the OBO file on the file system
  454. * @param $jobid
  455. * The job_id of the job from the Tripal jobs management system.
  456. * @param $newcvs
  457. * An empty array passed by reference that upon return will contain the list
  458. * of newly added vocabularies. The key will contain the CV name and the
  459. * value the new cv_id
  460. *
  461. *
  462. * @ingroup tripal_obo_loader
  463. */
  464. function tripal_chado_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  465. //$transaction = db_transaction();
  466. print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
  467. "If the load fails or is terminated prematurely then the entire set of \n" .
  468. "insertions/updates is rolled back and will not be found in the database\n\n";
  469. try {
  470. $header = array();
  471. // make sure our temporary table exists
  472. $ret = array();
  473. // empty the temp table
  474. $sql = "DELETE FROM {tripal_obo_temp}";
  475. chado_query($sql);
  476. print "Step 1: Preloading File $file\n";
  477. // parse the obo file
  478. $default_db = tripal_cv_obo_parse($file, $header, $jobid);
  479. // add the CV for this ontology to the database. The v1.2 definition
  480. // specifies a 'default-namespace' to be used if a 'namespace' is not
  481. // present for each stanza. Some ontologies have adopted the v1.4 method
  482. // in their v1.2 files and not including it.
  483. if (array_key_exists('default-namespace', $header)) {
  484. $defaultcv = tripal_insert_cv($header['default-namespace'][0], '');
  485. if (!$defaultcv) {
  486. tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
  487. }
  488. $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  489. }
  490. // if the 'default-namespace' is missing
  491. else {
  492. // look to see if an 'ontology' key is present. It is part of the v1.4
  493. // specification so it shouldn't be in the file, but just in case
  494. if (array_key_exists('ontology', $header)) {
  495. $defaultcv = tripal_insert_cv(strtoupper($header['ontology'][0]), '');
  496. if (!$defaultcv) {
  497. tripal_cv_obo_quiterror('Cannot add namespace ' . strtoupper($header['ontology'][0]));
  498. }
  499. $newcvs[strtoupper(strtoupper($header['ontology'][0]))] = $defaultcv->cv_id;
  500. }
  501. else {
  502. tripal_cv_obo_quiterror("Could not find a namespace for this OBO file.");
  503. }
  504. watchdog('t_obo_loader', "This OBO is missing the 'default-namespace' header. It is not possible to determine which vocabulary terms without a 'namespace' key should go. Instead, those terms will be placed in the '%vocab' vocabulary.",
  505. array('%vocab' => $defaultcv->name), WATCHDOG_WARNING);
  506. }
  507. // add any typedefs to the vocabulary first
  508. print "\nStep 2: Loading type defs...\n";
  509. tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid);
  510. // next add terms to the vocabulary
  511. print "\nStep 3: Loading terms...\n";
  512. if (!tripal_cv_obo_process_terms($defaultcv, $jobid, $newcvs, $default_db)) {
  513. tripal_cv_obo_quiterror('Cannot add terms from this ontology');
  514. }
  515. }
  516. catch (Exception $e) {
  517. //$transaction->rollback();
  518. print "\n"; // make sure we start errors on new line
  519. print "FAILED. Rolling back database changes...\n";
  520. watchdog_exception('T_obo_loader', $e);
  521. return FALSE;
  522. }
  523. return TRUE;
  524. }
  525. /**
  526. * Immediately terminates loading of the OBO file.
  527. *
  528. * @param $message
  529. * The error message to present to the user
  530. *
  531. * @ingroup tripal_obo_loader
  532. */
  533. function tripal_cv_obo_quiterror($message) {
  534. tripal_report_error("T_obo_loader", TRIPAL_ERROR, $message, array());
  535. exit;
  536. }
  537. /**
  538. * OBO files are divided into a typedefs terms section and vocabulary terms section.
  539. * This function loads the typedef terms from the OBO.
  540. *
  541. * @param $defaultcv
  542. * A database object containing a record from the cv table for the
  543. * default controlled vocabulary
  544. * @param $newcvs
  545. * An associative array of controlled vocabularies for this OBO. The key must be
  546. * the name of the vocabulary and the value the cv_id from the cv table of chado.
  547. * @param $default_db
  548. * The name of the default database.
  549. * @param $jobid
  550. * The job_id of the job from the Tripal jobs management system.
  551. *
  552. * @ingroup tripal_obo_loader
  553. */
  554. function tripal_cv_obo_load_typedefs($defaultcv, $newcvs, $default_db, $jobid) {
  555. $sql = "SELECT * FROM {tripal_obo_temp} WHERE type = 'Typedef' ";
  556. $typedefs = chado_query($sql);
  557. $sql = "
  558. SELECT count(*) as num_terms
  559. FROM {tripal_obo_temp}
  560. WHERE type = 'Typedef'
  561. ";
  562. $result = chado_query($sql)->fetchObject();
  563. $count = $result->num_terms;
  564. // calculate the interval for updates
  565. $interval = intval($count * 0.0001);
  566. if ($interval < 1) {
  567. $interval = 1;
  568. }
  569. $i = 0;
  570. foreach ($typedefs as $typedef) {
  571. $term = unserialize(base64_decode($typedef->stanza));
  572. // update the job status every interval
  573. if ($i % $interval == 0) {
  574. $complete = ($i / $count) * 33.33333333;
  575. if ($jobid) {
  576. tripal_set_job_progress($jobid, intval($complete + 33.33333333));
  577. }
  578. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  579. }
  580. tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
  581. $i++;
  582. }
  583. // Set the final status.
  584. if ($count > 0) {
  585. $complete = ($i / $count) * 33.33333333;
  586. }
  587. else {
  588. $complete = 33.33333333;
  589. }
  590. if ($jobid) {
  591. tripal_set_job_progress($jobid, intval($complete + 33.33333333));
  592. }
  593. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  594. return 1;
  595. }
  596. /**
  597. * OBO files are divided into a typedefs section and a terms section. This
  598. * function loads the typedef terms from the OBO.
  599. *
  600. * @param $defaultcv
  601. * A database object containing a record from the cv table for the
  602. * default controlled vocabulary
  603. * @param $jobid
  604. * The job_id of the job from the Tripal jobs management system.
  605. * @param $newcvs
  606. * An associative array of controlled vocabularies for this OBO. The key must be
  607. * the name of the vocabulary and the value the cv_id from the cv table of chado.
  608. * @param $default_db
  609. * The name of the default database.
  610. * @ingroup tripal_obo_loader
  611. */
  612. function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
  613. $i = 0;
  614. // iterate through each term from the OBO file and add it
  615. $sql = "
  616. SELECT * FROM {tripal_obo_temp}
  617. WHERE type = 'Term'
  618. ORDER BY id
  619. ";
  620. $terms = chado_query($sql);
  621. $sql = "
  622. SELECT count(*) as num_terms
  623. FROM {tripal_obo_temp}
  624. WHERE type = 'Term'
  625. ";
  626. $result = chado_query($sql)->fetchObject();
  627. $count = $result->num_terms;
  628. // calculate the interval for updates
  629. $interval = intval($count * 0.0001);
  630. if ($interval < 1) {
  631. $interval = 1;
  632. }
  633. foreach ($terms as $t) {
  634. $term = unserialize(base64_decode($t->stanza));
  635. // update the job status every interval
  636. if ($i % $interval == 0) {
  637. $complete = ($i / $count) * 33.33333333;
  638. if ($jobid) {
  639. tripal_set_job_progress($jobid, intval($complete + 66.666666));
  640. }
  641. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  642. }
  643. // add/update this term
  644. if (!tripal_cv_obo_process_term($term, $defaultcv->name, 0, $newcvs, $default_db)) {
  645. tripal_cv_obo_quiterror("Failed to process terms from the ontology");
  646. }
  647. $i++;
  648. }
  649. // set the final status
  650. if ($count > 0) {
  651. $complete = ($i / $count) * 33.33333333;
  652. }
  653. else {
  654. $complete = 33.33333333;
  655. }
  656. if ($jobid) {
  657. tripal_set_job_progress($jobid, intval($complete + 66.666666));
  658. }
  659. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  660. return 1;
  661. }
  662. /**
  663. * Uses the provided term array to add/update information to Chado about the
  664. * term including the term, dbxref, synonyms, properties, and relationships.
  665. *
  666. * @param $term
  667. * An array representing the cvterm.
  668. * @param $defaultcv
  669. * The name of the default controlled vocabulary
  670. * @is_relationship
  671. * Set to 1 if this term is a relationship term
  672. * @default_db
  673. * The name of the default database.
  674. *
  675. * @ingroup tripal_obo_loader
  676. */
  677. function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
  678. // make sure we have a namespace for this term
  679. if (!array_key_exists('namespace', $term) and !($defaultcv or $defaultcv == '')) {
  680. tripal_cv_obo_quiterror("Cannot add the term: no namespace defined. " . $term['id'][0]);
  681. }
  682. // construct the term array for sending to the tripal_chado_add_cvterm function
  683. // for adding a new cvterm
  684. $t = array();
  685. $t['id'] = $term['id'][0];
  686. $t['name'] = $term['name'][0];
  687. if (array_key_exists('def', $term)) {
  688. $t['definition'] = $term['def'][0];
  689. }
  690. if (array_key_exists('subset', $term)) {
  691. $t['subset'] = $term['subset'][0];
  692. }
  693. if (array_key_exists('namespace', $term)) {
  694. $t['namespace'] = $term['namespace'][0];
  695. }
  696. if (array_key_exists('is_obsolete', $term)) {
  697. $t['is_obsolete'] = $term['is_obsolete'][0];
  698. }
  699. $t['cv_name'] = $defaultcv;
  700. $t['is_relationship'] = $is_relationship;
  701. $t['db_name'] = $default_db;
  702. // add the cvterm
  703. $cvterm = tripal_insert_cvterm($t, array('update_existing' => TRUE));
  704. if (!$cvterm) {
  705. tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
  706. }
  707. if (array_key_exists('namespace', $term)) {
  708. $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  709. }
  710. // now handle other properites
  711. if (array_key_exists('is_anonymous', $term)) {
  712. //print "WARNING: unhandled tag: is_anonymous\n";
  713. }
  714. if (array_key_exists('alt_id', $term)) {
  715. foreach ($term['alt_id'] as $alt_id) {
  716. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
  717. tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
  718. }
  719. }
  720. }
  721. if (array_key_exists('subset', $term)) {
  722. //print "WARNING: unhandled tag: subset\n";
  723. }
  724. // add synonyms for this cvterm
  725. if (array_key_exists('synonym', $term)) {
  726. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  727. tripal_cv_obo_quiterror("Cannot add synonyms");
  728. }
  729. }
  730. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  731. // types to be of the v1.2 standard
  732. if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
  733. if (array_key_exists('exact_synonym', $term)) {
  734. foreach ($term['exact_synonym'] as $synonym) {
  735. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  736. $term['synonym'][] = $new;
  737. }
  738. }
  739. if (array_key_exists('narrow_synonym', $term)) {
  740. foreach ($term['narrow_synonym'] as $synonym) {
  741. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  742. $term['synonym'][] = $new;
  743. }
  744. }
  745. if (array_key_exists('broad_synonym', $term)) {
  746. foreach ($term['broad_synonym'] as $synonym) {
  747. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  748. $term['synonym'][] = $new;
  749. }
  750. }
  751. if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
  752. tripal_cv_obo_quiterror("Cannot add/update synonyms");
  753. }
  754. }
  755. // add the comment to the cvtermprop table
  756. if (array_key_exists('comment', $term)) {
  757. $comments = $term['comment'];
  758. $j = 0;
  759. foreach ($comments as $comment) {
  760. if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
  761. tripal_cv_obo_quiterror("Cannot add/update cvterm property");
  762. }
  763. $j++;
  764. }
  765. }
  766. // add any other external dbxrefs
  767. if (array_key_exists('xref', $term)) {
  768. foreach ($term['xref'] as $xref) {
  769. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  770. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  771. }
  772. }
  773. }
  774. if (array_key_exists('xref_analog', $term)) {
  775. foreach ($term['xref_analog'] as $xref) {
  776. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  777. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  778. }
  779. }
  780. }
  781. if (array_key_exists('xref_unk', $term)) {
  782. foreach ($term['xref_unk'] as $xref) {
  783. if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
  784. tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
  785. }
  786. }
  787. }
  788. // add is_a relationships for this cvterm
  789. if (array_key_exists('is_a', $term)) {
  790. foreach ($term['is_a'] as $is_a) {
  791. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
  792. tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
  793. }
  794. }
  795. }
  796. if (array_key_exists('intersection_of', $term)) {
  797. //print "WARNING: unhandled tag: intersection_of\n";
  798. }
  799. if (array_key_exists('union_of', $term)) {
  800. //print "WARNING: unhandled tag: union_on\n";
  801. }
  802. if (array_key_exists('disjoint_from', $term)) {
  803. //print "WARNING: unhandled tag: disjoint_from\n";
  804. }
  805. if (array_key_exists('relationship', $term)) {
  806. foreach ($term['relationship'] as $value) {
  807. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  808. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  809. // The Gene Ontology uses 'has_part' for transitive relationships, but
  810. // it specifically indicates that 'has_part' should not be used for
  811. // grouping annotations. Unfortunately, this means that when we
  812. // try to popoulate the cvtermpath table a 'has_part' relationships
  813. // will be used for exactly that purpose: to group annotations. This
  814. // doesn't seem to the be the case for other vocabularies such as the
  815. // sequence ontology that uses has_part as primary relationship between
  816. // terms. So, when loading the GO, we'll not include has_part
  817. // relationships.
  818. /*if ($rel == 'has_part' and $cvterm->dbxref_id->db_id->name == 'GO') {
  819. continue;
  820. }*/
  821. if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
  822. tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
  823. }
  824. }
  825. }
  826. if (array_key_exists('replaced_by', $term)) {
  827. //print "WARNING: unhandled tag: replaced_by\n";
  828. }
  829. if (array_key_exists('consider', $term)) {
  830. //print "WARNING: unhandled tag: consider\n";
  831. }
  832. if (array_key_exists('use_term', $term)) {
  833. //print "WARNING: unhandled tag: user_term\n";
  834. }
  835. if (array_key_exists('builtin', $term)) {
  836. //print "WARNING: unhandled tag: builtin\n";
  837. }
  838. return 1;
  839. }
  840. /**
  841. * Adds a cvterm relationship
  842. *
  843. * @param $cvterm
  844. * A database object for the cvterm
  845. * @param $rel
  846. * The relationship name
  847. * @param $objname
  848. * The relationship term name
  849. * @param $defaultcv
  850. * A database object containing a record from the cv table for the
  851. * default controlled vocabulary
  852. * @object_is_relationship
  853. * Set to 1 if this term is a relationship term
  854. * @default_db
  855. * The name of the default database.
  856. *
  857. * @ingroup tripal_obo_loader
  858. */
  859. function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel,
  860. $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
  861. // make sure the relationship cvterm exists
  862. $term = array(
  863. 'name' => $rel,
  864. 'id' => "$default_db:$rel",
  865. 'definition' => '',
  866. 'is_obsolete' => 0,
  867. 'cv_name' => $defaultcv,
  868. 'is_relationship' => TRUE,
  869. 'db_naame' => $default_db
  870. );
  871. $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  872. if (!$relcvterm) {
  873. // if the relationship term couldn't be found in the default_db provided
  874. // then do on more check to find it in the relationship ontology
  875. $term = array(
  876. 'name' => $rel,
  877. 'id' => "OBO_REL:$rel",
  878. 'definition' => '',
  879. 'is_obsolete' => 0,
  880. 'cv_name' => $defaultcv,
  881. 'is_relationship' => TRUE,
  882. 'db_name' => 'OBO_REL'
  883. );
  884. $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  885. if (!$relcvterm) {
  886. tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
  887. }
  888. }
  889. // get the object term
  890. $oterm = tripal_cv_obo_get_term($objname);
  891. if (!$oterm) {
  892. tripal_cv_obo_quiterror("Could not find object term $objname\n");
  893. }
  894. $objterm = array();
  895. $objterm['id'] = $oterm['id'][0];
  896. $objterm['name'] = $oterm['name'][0];
  897. if (array_key_exists('def', $oterm)) {
  898. $objterm['definition'] = $oterm['def'][0];
  899. }
  900. if (array_key_exists('subset', $oterm)) {
  901. $objterm['subset'] = $oterm['subset'][0];
  902. }
  903. if (array_key_exists('namespace', $oterm)) {
  904. $objterm['namespace'] = $oterm['namespace'][0];
  905. }
  906. if (array_key_exists('is_obsolete', $oterm)) {
  907. $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
  908. }
  909. $objterm['cv_name' ] = $defaultcv;
  910. $objterm['is_relationship'] = $object_is_relationship;
  911. $objterm['db_name'] = $default_db;
  912. $objcvterm = tripal_insert_cvterm($objterm, array('update_existing' => TRUE));
  913. if (!$objcvterm) {
  914. tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
  915. }
  916. // check to see if the cvterm_relationship already exists, if not add it
  917. $values = array(
  918. 'type_id' => $relcvterm->cvterm_id,
  919. 'subject_id' => $cvterm->cvterm_id,
  920. 'object_id' => $objcvterm->cvterm_id
  921. );
  922. $result = chado_select_record('cvterm_relationship', array('*'), $values);
  923. if (count($result) == 0) {
  924. $options = array('return_record' => FALSE);
  925. $success = chado_insert_record('cvterm_relationship', $values, $options);
  926. if (!$success) {
  927. tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  928. }
  929. }
  930. return TRUE;
  931. }
  932. /**
  933. * Retreives the term array from the temp loading table for a given term id.
  934. *
  935. * @param id
  936. * The id of the term to retrieve
  937. *
  938. * @ingroup tripal_obo_loader
  939. */
  940. function tripal_cv_obo_get_term($id) {
  941. $values = array('id' => $id);
  942. $result = chado_select_record('tripal_obo_temp', array('stanza'), $values);
  943. if (count($result) == 0) {
  944. return FALSE;
  945. }
  946. return unserialize(base64_decode($result[0]->stanza));
  947. }
  948. /**
  949. * Adds the synonyms to a term
  950. *
  951. * @param term
  952. * An array representing the cvterm. It must have a 'synonym' key/value pair.
  953. * @param cvterm
  954. * The database object of the cvterm to which the synonym will be added.
  955. *
  956. * @ingroup tripal_obo_loader
  957. */
  958. function tripal_cv_obo_add_synonyms($term, $cvterm) {
  959. // make sure we have a 'synonym_type' vocabulary
  960. $syncv = tripal_insert_cv(
  961. 'synonym_type',
  962. 'A local vocabulary added for synonym types.'
  963. );
  964. // now add the synonyms
  965. if (array_key_exists('synonym', $term)) {
  966. foreach ($term['synonym'] as $synonym) {
  967. // separate out the synonym definition and the synonym type
  968. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  969. // the scope will be 'EXACT', etc...
  970. $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  971. if (!$scope) { // if no scope then default to 'exact'
  972. $scope = 'exact';
  973. }
  974. // make sure the synonym type exists in the 'synonym_type' vocabulary
  975. $values = array(
  976. 'name' => $scope,
  977. 'cv_id' => array(
  978. 'name' => 'synonym_type',
  979. ),
  980. );
  981. $syntype = tripal_get_cvterm($values);
  982. // if it doesn't exist then add it
  983. if (!$syntype) {
  984. // build a 'term' object so we can add the missing term
  985. $term = array(
  986. 'name' => $scope,
  987. 'id' => "synonym_type:$scope",
  988. 'definition' => '',
  989. 'is_obsolete' => 0,
  990. 'cv_name' => $syncv->name,
  991. 'is_relationship' => FALSE
  992. );
  993. $syntype = tripal_insert_cvterm($term, array('update_existing' => TRUE));
  994. if (!$syntype) {
  995. tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
  996. }
  997. }
  998. // make sure the synonym doesn't already exists
  999. $values = array(
  1000. 'cvterm_id' => $cvterm->cvterm_id,
  1001. 'synonym' => $def
  1002. );
  1003. $results = chado_select_record('cvtermsynonym', array('*'), $values);
  1004. if (count($results) == 0) {
  1005. $values = array(
  1006. 'cvterm_id' => $cvterm->cvterm_id,
  1007. 'synonym' => $def,
  1008. 'type_id' => $syntype->cvterm_id
  1009. );
  1010. $options = array('return_record' => FALSE);
  1011. $success = chado_insert_record('cvtermsynonym', $values, $options);
  1012. if (!$success) {
  1013. tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
  1014. }
  1015. }
  1016. // now add the dbxrefs for the synonym if we have a comma in the middle
  1017. // of a description then this will cause problems when splitting os lets
  1018. // just change it so it won't mess up our splitting and then set it back
  1019. // later.
  1020. /**
  1021. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  1022. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  1023. foreach ($dbxrefs as $dbxref) {
  1024. $dbxref = preg_replace('/,_/',", ",$dbxref);
  1025. if ($dbxref) {
  1026. tripal_cv_obo_add_cvterm_dbxref($syn,$dbxref);
  1027. }
  1028. }
  1029. */
  1030. }
  1031. }
  1032. return TRUE;
  1033. }
  1034. /**
  1035. * Parse the OBO file and populate the templ loading table
  1036. *
  1037. * @param $file
  1038. * The path on the file system where the ontology can be found
  1039. * @param $header
  1040. * An array passed by reference that will be populated with the header
  1041. * information from the OBO file
  1042. * @param $jobid
  1043. * The job_id of the job from the Tripal jobs management system.
  1044. *
  1045. * @ingroup tripal_obo_loader
  1046. */
  1047. function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
  1048. $in_header = 1;
  1049. $stanza = array();
  1050. $default_db = '';
  1051. $line_num = 0;
  1052. $num_read = 0;
  1053. $intv_read = 0;
  1054. $filesize = filesize($obo_file);
  1055. $interval = intval($filesize * 0.01);
  1056. if ($interval < 1) {
  1057. $interval = 1;
  1058. }
  1059. // iterate through the lines in the OBO file and parse the stanzas
  1060. $fh = fopen($obo_file, 'r');
  1061. while ($line = fgets($fh)) {
  1062. $line_num++;
  1063. $size = drupal_strlen($line);
  1064. $num_read += $size;
  1065. $intv_read += $size;
  1066. $line = trim($line);
  1067. // update the job status every 1% features
  1068. if ($intv_read >= $interval) {
  1069. $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  1070. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  1071. if ($jobid) {
  1072. tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
  1073. }
  1074. $intv_read = 0;
  1075. }
  1076. // remove newlines
  1077. $line = rtrim($line);
  1078. // remove any special characters that may be hiding
  1079. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  1080. // skip empty lines
  1081. if (strcmp($line, '') == 0) {
  1082. continue;
  1083. }
  1084. //remove comments from end of lines
  1085. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  1086. // at the first stanza we're out of header
  1087. if (preg_match('/^\s*\[/', $line)) {
  1088. $in_header = 0;
  1089. // store the stanza we just finished reading
  1090. if (sizeof($stanza) > 0) {
  1091. // add the term to the temp table
  1092. $values = array(
  1093. 'id' => $stanza['id'][0],
  1094. 'stanza' => base64_encode(serialize($stanza)),
  1095. 'type' => $type,
  1096. );
  1097. $success = chado_insert_record('tripal_obo_temp', $values);
  1098. if (!$success) {
  1099. tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  1100. exit;
  1101. }
  1102. }
  1103. // get the stanza type: Term, Typedef or Instance
  1104. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  1105. // start fresh with a new array
  1106. $stanza = array();
  1107. continue;
  1108. }
  1109. // break apart the line into the tag and value but ignore any escaped colons
  1110. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  1111. $pair = explode(":", $line, 2);
  1112. $tag = $pair[0];
  1113. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  1114. // if this is the ID then look for the default DB
  1115. $matches = array();
  1116. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  1117. $default_db = $matches[1];
  1118. }
  1119. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  1120. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  1121. if ($in_header) {
  1122. if (!array_key_exists($tag, $header)) {
  1123. $header[$tag] = array();
  1124. }
  1125. $header[$tag][] = $value;
  1126. }
  1127. else {
  1128. if (!array_key_exists($tag, $stanza)) {
  1129. $stanza[$tag] = array();
  1130. }
  1131. $stanza[$tag][] = $value;
  1132. }
  1133. }
  1134. // now add the last term in the file
  1135. if (sizeof($stanza) > 0) {
  1136. $values = array(
  1137. 'id' => $stanza['id'][0],
  1138. 'stanza' => base64_encode(serialize($stanza)),
  1139. 'type' => $type,
  1140. );
  1141. chado_insert_record('tripal_obo_temp', $values);
  1142. if (!$success) {
  1143. tripal_report_error('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
  1144. exit;
  1145. }
  1146. $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
  1147. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  1148. if ($jobid) {
  1149. tripal_set_job_progress($jobid, intval(($num_read / $filesize) * 33.33333333));
  1150. }
  1151. }
  1152. return $default_db;
  1153. }
  1154. /**
  1155. * Adds a database reference to a cvterm
  1156. *
  1157. * @param cvterm
  1158. * The database object of the cvterm to which the synonym will be added.
  1159. * @param xref
  1160. * The cross refernce. It should be of the form from the OBO specification
  1161. *
  1162. * @ingroup tripal_obo_loader
  1163. */
  1164. function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
  1165. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  1166. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  1167. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  1168. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  1169. if (!$accession) {
  1170. tripal_cv_obo_quiterror();
  1171. tripal_report_error("T_obo_loader", TRIPAL_WARNING, "Cannot add a dbxref without an accession: '$xref'", NULL);
  1172. return FALSE;
  1173. }
  1174. // if the xref is a database link, handle that specially
  1175. if (strcmp($dbname, 'http') == 0) {
  1176. $accession = $xref;
  1177. $dbname = 'URL';
  1178. }
  1179. // add the database
  1180. $db = tripal_insert_db(array('name' => $dbname));
  1181. if (!$db) {
  1182. tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
  1183. }
  1184. // now add the dbxref
  1185. $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
  1186. if (!$dbxref) {
  1187. tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
  1188. }
  1189. // finally add the cvterm_dbxref but first check to make sure it exists
  1190. $values = array(
  1191. 'cvterm_id' => $cvterm->cvterm_id,
  1192. 'dbxref_id' => $dbxref->dbxref_id,
  1193. );
  1194. $result = chado_select_record('cvterm_dbxref', array('*'), $values);
  1195. if (count($result) == 0) {
  1196. $ins_options = array('return_record' => FALSE);
  1197. $result = chado_insert_record('cvterm_dbxref', $values, $ins_options);
  1198. if (!$result) {
  1199. tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
  1200. return FALSE;
  1201. }
  1202. }
  1203. return TRUE;
  1204. }
  1205. /**
  1206. * Adds a property to a cvterm
  1207. *
  1208. * @param cvterm
  1209. * A database object for the cvterm to which properties will be added
  1210. * @param $property
  1211. * The name of the property to add
  1212. * @param $value
  1213. * The value of the property
  1214. * @param rank
  1215. * The rank of the property
  1216. *
  1217. * @ingroup tripal_obo_loader
  1218. */
  1219. function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
  1220. // make sure the 'cvterm_property_type' CV exists
  1221. $cv = tripal_insert_cv('cvterm_property_type', '');
  1222. if (!$cv) {
  1223. tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
  1224. }
  1225. // get the property type cvterm. If it doesn't exist then we want to add it
  1226. $values = array(
  1227. 'name' => $property,
  1228. 'cv_id' => $cv->cv_id,
  1229. );
  1230. $results = chado_select_record('cvterm', array('*'), $values);
  1231. if (count($results) == 0) {
  1232. $term = array(
  1233. 'name' => $property,
  1234. 'id' => "internal:$property",
  1235. 'definition' => '',
  1236. 'is_obsolete' => 0,
  1237. 'cv_name' => $cv->name,
  1238. 'is_relationship' => FALSE,
  1239. );
  1240. $cvproptype = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  1241. if (!$cvproptype) {
  1242. tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
  1243. return FALSE;
  1244. }
  1245. }
  1246. else {
  1247. $cvproptype = $results[0];
  1248. }
  1249. // remove any properties that currently exist for this term. We'll reset them
  1250. if ($rank == 0) {
  1251. $values = array('cvterm_id' => $cvterm->cvterm_id);
  1252. $success = chado_delete_record('cvtermprop', $values);
  1253. if (!$success) {
  1254. tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
  1255. return FALSE;
  1256. }
  1257. }
  1258. // now add the property
  1259. $values = array(
  1260. 'cvterm_id' => $cvterm->cvterm_id,
  1261. 'type_id' => $cvproptype->cvterm_id,
  1262. 'value' => $value,
  1263. 'rank' => $rank,
  1264. );
  1265. $options = array('return_record' => FALSE);
  1266. $result = chado_insert_record('cvtermprop', $values, $options);
  1267. if (!$result) {
  1268. tripal_cv_obo_quiterror("Could not add property $property for term\n");
  1269. return FALSE;
  1270. }
  1271. return TRUE;
  1272. }
  1273. /**
  1274. * Adds a database cross reference to a cvterm
  1275. *
  1276. * @param db_id
  1277. * The database ID of the cross reference
  1278. * @param accession
  1279. * The cross reference's accession
  1280. * @param $version
  1281. * The version of the dbxref
  1282. * @param $description
  1283. * The description of the cross reference
  1284. *
  1285. * @ingroup tripal_obo_loader
  1286. */
  1287. function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
  1288. // check to see if the dbxref exists if not, add it
  1289. $values = array(
  1290. 'db_id' => $db_id,
  1291. 'accession' => $accession,
  1292. );
  1293. $result = chado_select_record('dbxref', array('dbxref_id'), $values);
  1294. if (count($result) == 0) {
  1295. $ins_values = array(
  1296. 'db_id' => $db_id,
  1297. 'accession' => $accession,
  1298. 'version' => $version,
  1299. 'description' => $description,
  1300. );
  1301. $ins_options = array('return_record' => FALSE);
  1302. $result = chado_insert_record('dbxref', $ins_values, $ins_options);
  1303. if (!$result) {
  1304. tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
  1305. return FALSE;
  1306. }
  1307. $result = chado_select_record('dbxref', array('dbxref_id'), $values);
  1308. }
  1309. return $result[0];
  1310. }
  1311. function tripal_cv_obo_form_ajax_callback($form, $form_state) {
  1312. return $form['obo_existing'];
  1313. }