OBOImporter.inc 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682
  1. <?php
  2. class OBOImporter extends TripalImporter {
  3. // --------------------------------------------------------------------------
  4. // EDITABLE STATIC CONSTANTS
  5. //
  6. // The following constants SHOULD be set for each descendent class. They are
  7. // used by the static functions to provide information to Drupal about
  8. // the field and it's default widget and formatter.
  9. // --------------------------------------------------------------------------
  10. /**
  11. * The name of this loader. This name will be presented to the site
  12. * user.
  13. */
  14. public static $name = 'OBO Vocabulary Loader';
  15. /**
  16. * The machine name for this loader. This name will be used to construct
  17. * the URL for the loader.
  18. */
  19. public static $machine_name = 'chado_obo_loader';
  20. /**
  21. * A brief description for this loader. This description will be
  22. * presented to the site user.
  23. */
  24. public static $description = 'Import vocabularies and terms in OBO format.';
  25. /**
  26. * An array containing the extensions of allowed file types.
  27. */
  28. public static $file_types = array('obo');
  29. /**
  30. * Provides information to the user about the file upload. Typically this
  31. * may include a description of the file types allowed.
  32. */
  33. public static $upload_description = 'Please provide the details for importing a new OBO file. The file must have a .obo extension.';
  34. /**
  35. * The title that should appear above the upload button.
  36. */
  37. public static $upload_title = 'New OBO File';
  38. /**
  39. * If the loader should require an analysis record. To maintain provenance
  40. * we should always indiate where the data we are uploading comes from.
  41. * The method that Tripal attempts to use for this by associating upload files
  42. * with an analysis record. The analysis record provides the details for
  43. * how the file was created or obtained. Set this to FALSE if the loader
  44. * should not require an analysis when loading. if $use_analysis is set to
  45. * true then the form values will have an 'analysis_id' key in the $form_state
  46. * array on submitted forms.
  47. */
  48. public static $use_analysis = FALSE;
  49. /**
  50. * If the $use_analysis value is set above then this value indicates if the
  51. * analysis should be required.
  52. */
  53. public static $require_analysis = TRUE;
  54. /**
  55. * Text that should appear on the button at the bottom of the importer
  56. * form.
  57. */
  58. public static $button_text = 'Import OBO File';
  59. /**
  60. * Indicates the methods that the file uploader will support.
  61. */
  62. public static $methods = array(
  63. // Allow the user to upload a file to the server.
  64. 'file_upload' => FALSE,
  65. // Allow the user to provide the path on the Tripal server for the file.
  66. 'file_local' => FALSE,
  67. // Allow the user to provide a remote URL for the file.
  68. 'file_remote' => FALSE,
  69. );
  70. /**
  71. * Be default, all loaders are automaticlly added to the Admin >
  72. * Tripal > Data Laders menu. However, if this loader should be
  73. * made available via a different menu path, then set it here. If the
  74. * value is empty then the path will be the default.
  75. */
  76. public static $menu_path = 'admin/tripal/loaders/chado_vocabs/obo_loader';
  77. public static $file_required = FALSE;
  78. /**
  79. * Keep track of vocabularies that have been added.
  80. *
  81. * @var array
  82. */
  83. private $newcvs = array();
  84. /**
  85. * @see TripalImporter::form()
  86. */
  87. public function form($form, &$form_state) {
  88. // get a list of db from chado for user to choose
  89. $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
  90. $results = db_query($sql);
  91. $obos = array();
  92. $obos[] = 'Select a Vocabulary';
  93. foreach ($results as $obo) {
  94. $obos[$obo->obo_id] = $obo->name;
  95. }
  96. $obo_id = '';
  97. if (array_key_exists('values', $form_state)) {
  98. $obo_id = array_key_exists('obo_id', $form_state['values']) ? $form_state['values']['obo_id'] : '';
  99. }
  100. $form['instructions']['info'] = array(
  101. '#type' => 'item',
  102. '#markup' => t('This page allows you to load vocabularies and ontologies
  103. that are in OBO format. Once loaded, the terms from these
  104. vocabularies can be used to create content.
  105. You may use the form below to either reload a vocabulary that is already
  106. loaded (as when new updates to that vocabulary are available) or load a new
  107. vocabulary.'),
  108. );
  109. $form['obo_existing'] = array(
  110. '#type' => 'fieldset',
  111. '#title' => t('Use a Saved Ontology OBO Reference'),
  112. '#prefix' => '<span id="obo-existing-fieldset">',
  113. '#suffix' => '</span>'
  114. );
  115. $form['obo_existing']['existing_instructions']= array(
  116. '#type' => 'item',
  117. '#markup' => t('The vocabularies listed in the select box below have bene pre-populated
  118. upon installation of Tripal or have been previously loaded. Select one to edit
  119. its settings or submit for loading. You may reload any vocabulary that has
  120. already been loaded to retrieve any new updates.'),
  121. );
  122. $form['obo_existing']['obo_id'] = array(
  123. '#title' => t('Ontology OBO File Reference'),
  124. '#type' => 'select',
  125. '#options' => $obos,
  126. '#ajax' => array(
  127. 'callback' => 'tripal_cv_obo_form_ajax_callback',
  128. 'wrapper' => 'obo-existing-fieldset',
  129. ),
  130. '#description' => t('Select a vocabulary to import.')
  131. );
  132. // If the user has selected an OBO ID then get the form elements for
  133. // updating.
  134. if ($obo_id) {
  135. $uobo_name = '';
  136. $uobo_url = '';
  137. $uobo_file = '';
  138. $vocab = db_select('tripal_cv_obo', 't')
  139. ->fields('t', array('name', 'path'))
  140. ->condition('obo_id', $obo_id)
  141. ->execute()
  142. ->fetchObject();
  143. $uobo_name = $vocab->name;
  144. if (preg_match('/^http/', $vocab->path)) {
  145. $uobo_url = $vocab->path;
  146. }
  147. else {
  148. $uobo_file = trim($vocab->path);
  149. $matches = array();
  150. if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
  151. $modpath = drupal_get_path('module', $matches[1]);
  152. $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
  153. }
  154. }
  155. // We don't want the previous value to remain. We want the new default to
  156. // show up, so remove the input values
  157. unset($form_state['input']['uobo_name']);
  158. unset($form_state['input']['uobo_url']);
  159. unset($form_state['input']['uobo_file']);
  160. $form['obo_existing']['uobo_name']= array(
  161. '#type' => 'textfield',
  162. '#title' => t('Vocabulary Name'),
  163. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  164. list above for use again later.'),
  165. '#default_value' => $uobo_name,
  166. );
  167. $form['obo_existing']['uobo_url']= array(
  168. '#type' => 'textfield',
  169. '#title' => t('Remote URL'),
  170. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  171. (e.g. http://www.obofoundry.org/ro/ro.obo)'),
  172. '#default_value' => $uobo_url,
  173. );
  174. $form['obo_existing']['uobo_file']= array(
  175. '#type' => 'textfield',
  176. '#title' => t('Local File'),
  177. '#description' => t('Please enter the file system path for an OBO
  178. definition file. If entering a path relative to
  179. the Drupal installation you may use a relative path that excludes the
  180. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  181. that Drupal relative paths have no preceeding slash.
  182. Otherwise, please provide the full path on the filesystem. The path
  183. must be accessible to the web server on which this Drupal instance is running.'),
  184. '#default_value' => $uobo_file,
  185. );
  186. $form['obo_existing']['update_obo_details'] = array(
  187. '#type' => 'submit',
  188. '#value' => 'Update Ontology Details',
  189. '#name' => 'update_obo_details'
  190. );
  191. }
  192. $form['obo_new'] = array(
  193. '#type' => 'fieldset',
  194. '#title' => t('Add a New Ontology OBO Reference'),
  195. '#collapsible' => TRUE,
  196. '#collapsed' => TRUE,
  197. );
  198. $form['obo_new']['path_instructions']= array(
  199. '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file
  200. is stored local to the server provide a file name. If the vocabulry is stored remotely,
  201. provide a URL. Only provide a URL or a local file, not both.'),
  202. );
  203. $form['obo_new']['obo_name']= array(
  204. '#type' => 'textfield',
  205. '#title' => t('New Vocabulary Name'),
  206. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  207. list above for use again later. Additionally, if a default namespace is not provided in the OBO
  208. header this name will be used as the default_namespace.'),
  209. );
  210. $form['obo_new']['obo_url']= array(
  211. '#type' => 'textfield',
  212. '#title' => t('Remote URL'),
  213. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  214. (e.g. http://www.obofoundry.org/ro/ro.obo)'),
  215. );
  216. $form['obo_new']['obo_file']= array(
  217. '#type' => 'textfield',
  218. '#title' => t('Local File'),
  219. '#description' => t('Please enter the file system path for an OBO
  220. definition file. If entering a path relative to
  221. the Drupal installation you may use a relative path that excludes the
  222. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  223. that Drupal relative paths have no preceeding slash.
  224. Otherwise, please provide the full path on the filesystem. The path
  225. must be accessible to the web server on which this Drupal instance is running.'),
  226. );
  227. return $form;
  228. }
  229. /**
  230. * @see TripalImporter::formSubmit()
  231. */
  232. public function formSubmit($form, &$form_state) {
  233. $obo_id = $form_state['values']['obo_id'];
  234. $obo_name = trim($form_state['values']['obo_name']);
  235. $obo_url = trim($form_state['values']['obo_url']);
  236. $obo_file = trim($form_state['values']['obo_file']);
  237. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  238. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  239. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  240. // If the user requested to alter the details then do that.
  241. if ($form_state['clicked_button']['#name'] == 'update_obo_details') {
  242. $form_state['rebuild'] = TRUE;
  243. $success = db_update('tripal_cv_obo')
  244. ->fields(array(
  245. 'name' => $uobo_name,
  246. 'path' => $uobo_url ? $uobo_url : $uobo_file,
  247. ))
  248. ->condition('obo_id', $obo_id)
  249. ->execute();
  250. if ($success) {
  251. drupal_set_message(t("The vocabulary !vocab has been updated.", array('!vocab' => $uobo_name)));
  252. }
  253. else {
  254. drupal_set_message(t("The vocabulary !vocab could not be updated.", array('!vocab' => $uobo_name)), 'error');
  255. }
  256. }
  257. elseif (!empty($obo_name)) {
  258. $obo_id = db_insert('tripal_cv_obo')
  259. ->fields(array(
  260. 'name' => $obo_name,
  261. 'path' => $obo_url ? $obo_url : $obo_file,
  262. ))
  263. ->execute();
  264. // Add the obo_id to the form_state vaules.
  265. $form_state['values']['obo_id'] = $obo_id;
  266. if ($obo_id) {
  267. drupal_set_message(t("The vocabulary !vocab has been added.", array('!vocab' => $obo_name)));
  268. }
  269. else {
  270. $form_state['rebuild'] = TRUE;
  271. drupal_set_message(t("The vocabulary !vocab could not be added.", array('!vocab' => $obo_name)), 'error');
  272. }
  273. }
  274. }
  275. /**
  276. * @see TripalImporter::formValidate()
  277. */
  278. public function formValidate($form, &$form_state) {
  279. $obo_id = $form_state['values']['obo_id'];
  280. $obo_name = trim($form_state['values']['obo_name']);
  281. $obo_url = trim($form_state['values']['obo_url']);
  282. $obo_file = trim($form_state['values']['obo_file']);
  283. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  284. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  285. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  286. // Make sure if the name is changed it doesn't conflict with another OBO.
  287. if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
  288. $form_state['clicked_button']['#name'] == 'update_load_obo') {
  289. // Get the current record
  290. $vocab = db_select('tripal_cv_obo', 't')
  291. ->fields('t', array('obo_id', 'name', 'path'))
  292. ->condition('name', $uobo_name)
  293. ->execute()
  294. ->fetchObject();
  295. if ($vocab and $vocab->obo_id != $obo_id) {
  296. form_set_error('uobo_name', 'The vocabulary name must be different from existing vocabularies');
  297. }
  298. // Make sure the file exists. First check if it is a relative path
  299. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $uobo_file;
  300. if (!file_exists($dfile)) {
  301. if (!file_exists($uobo_file)) {
  302. form_set_error('uobo_file', 'The specified path does not exist or cannot be read.');
  303. }
  304. }
  305. if (!$uobo_url and !$uobo_file) {
  306. form_set_error('uobo_url', 'Please provide a URL or a path for the vocabulary.');
  307. }
  308. if ($uobo_url and $uobo_file) {
  309. form_set_error('uobo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  310. }
  311. }
  312. if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
  313. // Get the current record
  314. $vocab = db_select('tripal_cv_obo', 't')
  315. ->fields('t', array('obo_id', 'name', 'path'))
  316. ->condition('name', $obo_name)
  317. ->execute()
  318. ->fetchObject();
  319. if ($vocab) {
  320. form_set_error('obo_name', 'The vocabulary name must be different from existing vocabularies');
  321. }
  322. // Make sure the file exists. First check if it is a relative path
  323. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo_file;
  324. if (!file_exists($dfile)) {
  325. if (!file_exists($obo_file)) {
  326. form_set_error('obo_file', 'The specified path does not exist or cannot be read.');
  327. }
  328. }
  329. if (!$obo_url and !$obo_file) {
  330. form_set_error('obo_url', 'Please provide a URL or a path for the vocabulary.');
  331. }
  332. if ($obo_url and $obo_file) {
  333. form_set_error('obo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  334. }
  335. }
  336. }
  337. /**
  338. * @see TripalImporter::run()
  339. *
  340. * @param $details
  341. * The following arguments are supported:
  342. * - obo_id: (required) The ID of the ontology to be imported.
  343. */
  344. public function run() {
  345. $arguments = $this->arguments['run_args'];
  346. $obo_id = $arguments['obo_id'];
  347. // Make sure the $obo_id is valid
  348. $obo = db_select('tripal_cv_obo', 'tco')
  349. ->fields('tco')
  350. ->condition('obo_id', $obo_id)
  351. ->execute()
  352. ->fetchObject();
  353. if (!$obo) {
  354. throw new Exception("Invalid OBO ID provided: '$obo_id'.");
  355. }
  356. $this->loadOBO_v1_2_id($obo);
  357. }
  358. /**
  359. * @see TripalImporter::postRun()
  360. *
  361. */
  362. public function postRun() {
  363. // Update the cv_root_mview materiailzed view.
  364. $this->logMessage("Updating the cv_root_mview materialized view...");
  365. $mview_id = tripal_get_mview_id('cv_root_mview');
  366. tripal_populate_mview($mview_id);
  367. $this->logMessage("Updating the db2cv_mview materialized view...");
  368. $mview_id = tripal_get_mview_id('db2cv_mview');
  369. tripal_populate_mview($mview_id);
  370. // Upate the cvtermpath table for each newly added CV.
  371. $this->logMessage("Updating cvtermpath table. This may take a while...");
  372. foreach ($this->newcvs as $namespace => $cvid) {
  373. $this->logMessage("- Loading paths for @vocab", array('@vocab' => $namespace));
  374. tripal_update_cvtermpath($cvid);
  375. }
  376. }
  377. /**
  378. * A wrapper function for importing the user specified OBO file into Chado by
  379. * specifying the obo_id of the OBO. It requires that the file be in OBO v1.2
  380. * compatible format. This function is typically executed via the Tripal jobs
  381. * management after a user submits a job via the Load Onotloies form.
  382. *
  383. * @param $obo_id
  384. * An obo_id from the tripal_cv_obo file that specifies which OBO file to import
  385. * @ingroup tripal_obo_loader
  386. */
  387. private function loadOBO_v1_2_id($obo) {
  388. // Convert the module name to the real path if present
  389. if (preg_match("/\{(.*?)\}/", $obo->path, $matches)) {
  390. $module = $matches[1];
  391. $path = drupal_realpath(drupal_get_path('module', $module));
  392. $obo->path = preg_replace("/\{.*?\}/", $path, $obo->path);
  393. }
  394. // if the reference is for a remote URL then run the URL processing function
  395. if (preg_match("/^https:\/\//", $obo->path) or
  396. preg_match("/^http:\/\//", $obo->path) or
  397. preg_match("/^ftp:\/\//", $obo->path)) {
  398. $this->loadOBO_v1_2_url($obo->name, $obo->path, 0);
  399. }
  400. // if the reference is for a local file then run the file processing function
  401. else {
  402. // check to see if the file is located local to Drupal
  403. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  404. if (file_exists($dfile)) {
  405. $this->loadOBO_v1_2_file($obo->name, $dfile, 0);
  406. }
  407. // if not local to Drupal, the file must be someplace else, just use
  408. // the full path provided
  409. else {
  410. if (file_exists($obo->path)) {
  411. $this->loadOBO_v1_2_file($obo->name, $obo->path, 0);
  412. }
  413. else {
  414. print "ERROR: could not find OBO file: '$obo->path'\n";
  415. }
  416. }
  417. }
  418. }
  419. /**
  420. * A wrapper function for importing the user specified OBO file into Chado by
  421. * specifying the filename and path of the OBO. It requires that the file be in OBO v1.2
  422. * compatible format. This function is typically executed via the Tripal jobs
  423. * management after a user submits a job via the Load Onotloies form.
  424. *
  425. * @param $obo_name
  426. * The name of the OBO (typially the ontology or controlled vocabulary name)
  427. * @param $file
  428. * The path on the file system where the ontology can be found
  429. * @param $is_new
  430. * Set to TRUE if this is a new ontology that does not yet exist in the
  431. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  432. *
  433. * @ingroup tripal_obo_loader
  434. */
  435. private function loadOBO_v1_2_file($obo_name, $file, $is_new = TRUE) {
  436. if ($is_new) {
  437. tripal_insert_obo($obo_name, $file);
  438. }
  439. $success = $this->loadOBO_v1_2($file, $obo_name);
  440. }
  441. /**
  442. * A wrapper function for importing the user specified OBO file into Chado by
  443. * specifying the remote URL of the OBO. It requires that the file be in OBO v1.2
  444. * compatible format. This function is typically executed via the Tripal jobs
  445. * management after a user submits a job via the Load Onotloies form.
  446. *
  447. * @param $obo_name
  448. * The name of the OBO (typially the ontology or controlled vocabulary name)
  449. * @param $url
  450. * The remote URL of the OBO file.
  451. * @param $is_new
  452. * Set to TRUE if this is a new ontology that does not yet exist in the
  453. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  454. *
  455. * @ingroup tripal_obo_loader
  456. */
  457. private function loadOBO_v1_2_url($obo_name, $url, $is_new = TRUE) {
  458. // first download the OBO
  459. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  460. print "Downloading URL $url, saving to $temp\n";
  461. $url_fh = fopen($url, "r");
  462. $obo_fh = fopen($temp, "w");
  463. if (!$url_fh) {
  464. throw new Exception("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? " .
  465. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to " .
  466. " specify the location of the file on your server.");
  467. }
  468. while (!feof($url_fh)) {
  469. fwrite($obo_fh, fread($url_fh, 255), 255);
  470. }
  471. fclose($url_fh);
  472. fclose($obo_fh);
  473. if ($is_new) {
  474. tripal_insert_obo($obo_name, $url);
  475. }
  476. // second, parse the OBO
  477. $this->loadOBO_v1_2($temp, $obo_name);
  478. // now remove the temp file
  479. unlink($temp);
  480. }
  481. /**
  482. * Imports a given OBO file into Chado. This function is usually called by
  483. * one of three wrapper functions: loadOBO_v1_2_id,
  484. * loadOBO_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can
  485. * be called directly if the full path to an OBO file is available on the
  486. * file system.
  487. *
  488. * @param $flie
  489. * The full path to the OBO file on the file system
  490. *
  491. * @ingroup tripal_obo_loader
  492. */
  493. private function loadOBO_v1_2($file, $obo_name) {
  494. $header = array();
  495. $ret = array();
  496. // Empty the temp table.
  497. $sql = "DELETE FROM {tripal_obo_temp}";
  498. chado_query($sql);
  499. $this->logMessage("Step 1: Preloading File $file...");
  500. // parse the obo file
  501. $default_db = $this->parse($file, $header);
  502. // Add the CV for this ontology to the database. The v1.2 definition
  503. // specifies a 'default-namespace' to be used if a 'namespace' is not
  504. // present for each stanza. Some ontologies have adopted the v1.4 method
  505. // in their v1.2 files and not including it.
  506. if (array_key_exists('default-namespace', $header)) {
  507. $defaultcv = tripal_insert_cv($header['default-namespace'][0], '');
  508. if (!$defaultcv) {
  509. throw new Exception('Cannot add namespace ' . $header['default-namespace'][0]);
  510. }
  511. $this->newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
  512. }
  513. // If the 'default-namespace' is missing.
  514. else {
  515. // Grab the first term accession from the file and get the short name for the cv
  516. $fh = fopen($file, 'r');
  517. while ($line = fgets($fh)) {
  518. // Grab the first item's id info to break apart.
  519. if (preg_match('/^\s*\[/', $line)) {
  520. $stanza = TRUE;
  521. continue;
  522. }
  523. if ($stanza === TRUE && (substr($line, 0, 3) === "id:")) {
  524. $parts = explode(':', $line);
  525. $short_name = strtolower($parts[1]);
  526. $short_name = preg_replace('/\s+/', '', $short_name);
  527. break;
  528. }
  529. }
  530. fclose($fh);
  531. // Check if the EBI ontology search has this ontology:
  532. try {
  533. $results = $this->oboEbiLookup($short_name, 'ontology');
  534. if (array_key_exists('default-namespace', $results['config']['annotations'])) {
  535. $results = $results['config']['annotations']['default-namespace'];
  536. if (is_array($results)) {
  537. $results = $results[0];
  538. }
  539. }
  540. elseif (array_key_exists('namespace', $results['config'])) {
  541. $results = $results['config']['namespace'];
  542. }
  543. else {
  544. $results = $short_name;
  545. }
  546. $defaultcv = tripal_insert_cv(strtoupper($results), '');
  547. $this->newcvs[$defaultcv->name] = $defaultcv->cv_id;
  548. }
  549. catch (Exception $e) {
  550. watchdog_exception('OBOImporter no such accession found in EBI Ontology Search', $e);
  551. }
  552. if (empty($defaultcv)) {
  553. throw new Exception("Could not find a namespace for this OBO file: $file");
  554. }
  555. $this->logMessage("This OBO is missing the 'default-namespace' header. It " .
  556. "is not possible to determine which vocabulary terms without a 'namespace' key " .
  557. "should go. Instead, those terms will be placed in the '!vocab' vocabulary.",
  558. array('!vocab' => $defaultcv->name), TRIPAL_WARNING);
  559. }
  560. // Add any typedefs to the vocabulary first.
  561. $this->logMessage("Step 2: Loading type defs...");
  562. $this->loadTypeDefs($defaultcv, $default_db);
  563. // Next add terms to the vocabulary.
  564. $this->logMessage("Step 3: Loading terms...");
  565. if (!$this->processTerms($defaultcv, $default_db)) {
  566. throw new Exception('Cannot add terms from this ontology');
  567. }
  568. }
  569. /**
  570. * OBO files are divided into a typedefs terms section and vocabulary terms section.
  571. * This function loads the typedef terms from the OBO.
  572. *
  573. * @param $defaultcv
  574. * A database object containing a record from the cv table for the
  575. * default controlled vocabulary
  576. * @param $default_db
  577. * The name of the default database.
  578. *
  579. * @ingroup tripal_obo_loader
  580. */
  581. private function loadTypeDefs($defaultcv, $default_db) {
  582. $sql = "SELECT * FROM {tripal_obo_temp} WHERE type = 'Typedef' ";
  583. $typedefs = chado_query($sql);
  584. $sql = "
  585. SELECT count(*) as num_terms
  586. FROM {tripal_obo_temp}
  587. WHERE type = 'Typedef'
  588. ";
  589. $result = chado_query($sql)->fetchObject();
  590. $count = $result->num_terms;
  591. $this->setTotalItems($count);
  592. $this->setItemsHandled(0);
  593. $i = 0;
  594. foreach ($typedefs as $typedef) {
  595. $this->setItemsHandled($i);
  596. $term = unserialize(base64_decode($typedef->stanza));
  597. $this->processTerm($term, $defaultcv->name, 1, $default_db);
  598. $i++;
  599. }
  600. $this->setItemsHandled($i);
  601. return 1;
  602. }
  603. /**
  604. * OBO files are divided into a typedefs section and a terms section.
  605. *
  606. * This function loads the typedef terms from the OBO.
  607. *
  608. * @param $defaultcv
  609. * A database object containing a record from the cv table for the
  610. * default controlled vocabulary
  611. * @param $default_db
  612. * The name of the default database.
  613. */
  614. private function processTerms($defaultcv, $default_db) {
  615. $i = 0;
  616. $external = FALSE;
  617. // Iterate through each term from the OBO file and add it.
  618. $sql = "
  619. SELECT * FROM {tripal_obo_temp}
  620. WHERE type = 'Term'
  621. ORDER BY id
  622. ";
  623. $terms = chado_query($sql);
  624. $sql = "
  625. SELECT count(*) as num_terms
  626. FROM {tripal_obo_temp}
  627. WHERE type = 'Term'
  628. ";
  629. $result = chado_query($sql)->fetchObject();
  630. $count = $result->num_terms;
  631. $this->setTotalItems($count);
  632. $this->setItemsHandled(0);
  633. // Iterate through the terms.
  634. foreach ($terms as $t) {
  635. $term = unserialize(base64_decode($t->stanza));
  636. $this->setItemsHandled($i);
  637. // Add/update this term.
  638. $status = $this->processTerm($term, $defaultcv->name, 0, $default_db);
  639. if (!$status) {
  640. throw new Exception("Failed to process terms from the ontology");
  641. }
  642. else if ($status === 2 && $external == FALSE) {
  643. $this->logMessage(
  644. "A term that belongs to another ontology is used within this " .
  645. "vocabulary. Therefore a lookup was performed with the EBI Ontology " .
  646. "Lookup Service to retrieve the information for this term. " .
  647. "Please note, that vocabularies with many non-local terms " .
  648. "require remote lookups and these lookups can dramatically " .
  649. "decrease loading time. " ,
  650. array('!vocab' => $defaultcv->name),
  651. TRIPAL_WARNING
  652. );
  653. $external = TRUE;
  654. }
  655. $i++;
  656. }
  657. $this->setItemsHandled($i);
  658. return 1;
  659. }
  660. /**
  661. * Uses the provided term array to add/update information to Chado about the
  662. * term including the term, dbxref, synonyms, properties, and relationships.
  663. *
  664. * @param $term
  665. * An array representing the cvterm.
  666. * @param $defaultcv
  667. * The name of the default controlled vocabulary
  668. * @is_relationship
  669. * Set to 1 if this term is a relationship term
  670. * @default_db
  671. * The name of the default database.
  672. *
  673. * @ingroup tripal_obo_loader
  674. */
  675. private function processTerm($term, $defaultcv, $is_relationship = 0, $default_db) {
  676. // make sure we have a namespace for this term
  677. if (!array_key_exists('namespace', $term) and !($defaultcv or $defaultcv == '')) {
  678. throw new Exception("Cannot add the term: no namespace defined. " . $term['id'][0]);
  679. }
  680. // construct the term array for sending to the tripal_chado_add_cvterm function
  681. // for adding a new cvterm
  682. $t = array();
  683. $t['id'] = $term['id'][0];
  684. $t['name'] = $term['name'][0];
  685. if (array_key_exists('def', $term)) {
  686. $t['definition'] = $term['def'][0];
  687. }
  688. if (array_key_exists('subset', $term)) {
  689. $t['subset'] = $term['subset'][0];
  690. }
  691. if (array_key_exists('namespace', $term)) {
  692. $t['namespace'] = $term['namespace'][0];
  693. }
  694. if (array_key_exists('is_obsolete', $term)) {
  695. $t['is_obsolete'] = $term['is_obsolete'][0];
  696. }
  697. // Check the id isn't a reference to another term.
  698. //TODO: Check chado for the accession, so we can avoid lookups where possible.
  699. if (strpos($t['id'], ':')) {
  700. $pair = explode(":", $t['id']);
  701. $ontology_id = $pair[0];
  702. $accession_num = $pair[1];
  703. if (is_numeric($accession_num) && ($ontology_id != $default_db)) {
  704. // Check that the term isn't already in Chado.
  705. $results = $this->oboEbiLookup($t['id'], 'term');
  706. if (isset($results['label'])) {
  707. $t['name'] = $results['label'];
  708. $defaultcv = $results['ontology_name'];
  709. $default_db = $results['ontology_prefix'];
  710. $external = TRUE;
  711. }
  712. if (!isset($results['label'])) {
  713. $results = $this->oboEbiLookup($t['id'], 'query');
  714. if (array_key_exists('docs', $results)) {
  715. if (!empty($results['response']['docs'])) {
  716. if (count($results['response']['docs']) > 1) {
  717. foreach ($results['response']['docs'] as $doc) {
  718. if ($doc['obo_id'] == $t['id']) {
  719. $t['name'] = $doc['label'];
  720. $defaultcv = $doc['ontology_name'];
  721. $default_db = $doc['ontology_prefix'];
  722. $external = true;
  723. }
  724. }
  725. } else {
  726. $t['name'] = $results['response']['docs'][0]['label'];
  727. $defaultcv = $results['response']['docs'][0]['ontology_name'];
  728. $default_db = $results['response']['docs'][0]['ontology_prefix'];
  729. $external = true;
  730. }
  731. }
  732. }
  733. }
  734. if ($results['response']['numFound'] == 0 && !isset($results['label'])) {
  735. // The first search doesn't work, so let's try a broader one.
  736. $results = $this->oboEbiLookup($t['id'], 'query-non-local');
  737. if (!empty($results)) {
  738. if (array_key_exists('docs', $results)) {
  739. if (!empty($results['docs'])) {
  740. $accession = $t['id'];
  741. $accession_underscore = str_replace(":", "_", $accession);
  742. foreach ($results['response']['docs'] as $item) {
  743. if ($item['label'] != $accession && $item['label'] != $accession_underscore) {
  744. //Found the first place a label is other than the accession is used, so take
  745. // that info and then end the loop.
  746. $t['name'] = $item['label'];
  747. $defaultcv = $item['ontology_name'];
  748. $default_db = $item['ontology_prefix'];
  749. $external = true;
  750. break;
  751. }
  752. }
  753. }
  754. }
  755. }
  756. }
  757. }
  758. }
  759. // Check that the default_cv is in the cv table.
  760. $sql = "
  761. SELECT CV.name
  762. FROM {cv} CV
  763. WHERE CV.name = '$defaultcv'
  764. ";
  765. $results = chado_query($sql)->fetchObject();
  766. if (!$results){
  767. //The controlled vocabulary is not in the cv term table and needs to be added.
  768. $ontology_info = $this->oboEbiLookup($defaultcv, 'ontology');
  769. if (!empty($ontology_info['config'])){
  770. // CV Name.
  771. if (array_key_exists('namespace', $ontology_info['config'])) {
  772. $cv_info = $ontology_info['config']['namespace'];
  773. }
  774. elseif (array_key_exists('default-namespace', $ontology_info['config']['annotations'])) {
  775. $cv_info = $ontology_info['config']['annotations']['default-namespace'];
  776. }
  777. //CV Description.
  778. if (array_key_exists('description', $ontology_info['config'])) {
  779. $description = $ontology_info['config']['description'];
  780. }
  781. else {
  782. $description = '';
  783. }
  784. $cv_returned = chado_insert_cv($cv_info, $description);
  785. if($cv_returned) {
  786. $defaultcv = $cv_returned->name;
  787. // Now add the db entry.
  788. $values = array(
  789. 'name' => $ontology_info['config']['preferredPrefix'],
  790. 'description' => $ontology_info['config']['description'],
  791. 'url' => $ontology_info['config']['versionIri'],
  792. );
  793. $db_returned = chado_insert_db($values);
  794. if ($db_returned) {
  795. $default_db = $db_returned->name;
  796. }
  797. }
  798. }
  799. }
  800. $t['cv_name'] = $defaultcv;
  801. $t['is_relationship'] = $is_relationship;
  802. $t['db_name'] = $default_db;
  803. // The name being empty regularly causes problems, so let's check it's there.
  804. if (empty($t['name'])){
  805. $results = $this->oboEbiLookup($t['id'], 'term');
  806. if (isset($results['label'])) {
  807. $t['name'] = $results['label'];
  808. }
  809. }
  810. $cvterm = tripal_insert_cvterm($t, array('update_existing' => TRUE));
  811. if (!$cvterm) {
  812. throw new Exception("Cannot add the term " . $term['id'][0]);
  813. }
  814. // Remove any relationships that this term already has (in case it was
  815. // updated) and we'll re-add them.
  816. $sql = "
  817. DELETE FROM {cvterm_relationship} CVTR
  818. WHERE CVTR.subject_id = :cvterm_id
  819. ";
  820. chado_query($sql, array(':cvterm_id' => $cvterm->cvterm_id));
  821. if (array_key_exists('namespace', $term)) {
  822. $this->newcvs[$term['namespace'][0]] = $cvterm->cv_id;
  823. }
  824. // now handle other properites
  825. if (array_key_exists('is_anonymous', $term)) {
  826. //print "WARNING: unhandled tag: is_anonymous\n";
  827. }
  828. if (array_key_exists('alt_id', $term)) {
  829. foreach ($term['alt_id'] as $alt_id) {
  830. if (!$this->addCvtermDbxref($cvterm, $alt_id)) {
  831. throw new Exception("Cannot add alternate id $alt_id");
  832. }
  833. }
  834. }
  835. if (array_key_exists('subset', $term)) {
  836. //print "WARNING: unhandled tag: subset\n";
  837. }
  838. // add synonyms for this cvterm
  839. if (array_key_exists('synonym', $term)) {
  840. if (!$this->addSynonym($term, $cvterm)) {
  841. throw new Exception("Cannot add synonyms");
  842. }
  843. }
  844. // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
  845. // types to be of the v1.2 standard
  846. if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
  847. if (array_key_exists('exact_synonym', $term)) {
  848. foreach ($term['exact_synonym'] as $synonym) {
  849. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  850. $term['synonym'][] = $new;
  851. }
  852. }
  853. if (array_key_exists('narrow_synonym', $term)) {
  854. foreach ($term['narrow_synonym'] as $synonym) {
  855. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  856. $term['synonym'][] = $new;
  857. }
  858. }
  859. if (array_key_exists('broad_synonym', $term)) {
  860. foreach ($term['broad_synonym'] as $synonym) {
  861. $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  862. $term['synonym'][] = $new;
  863. }
  864. }
  865. if (!$this->addSynonym($term, $cvterm)) {
  866. throw new Exception("Cannot add/update synonyms");
  867. }
  868. }
  869. // add the comment to the cvtermprop table
  870. if (array_key_exists('comment', $term)) {
  871. $comments = $term['comment'];
  872. $j = 0;
  873. foreach ($comments as $comment) {
  874. if (!$this->addCvtermProp($cvterm, 'comment', $comment, $j)) {
  875. throw new Exception("Cannot add/update cvterm property");
  876. }
  877. $j++;
  878. }
  879. }
  880. // add any other external dbxrefs
  881. if (array_key_exists('xref', $term)) {
  882. foreach ($term['xref'] as $xref) {
  883. if (!$this->addCvtermDbxref($cvterm, $xref)) {
  884. throw new Exception("Cannot add/update cvterm database reference (dbxref).");
  885. }
  886. }
  887. }
  888. if (array_key_exists('xref_analog', $term)) {
  889. foreach ($term['xref_analog'] as $xref) {
  890. if (!$this->addCvtermDbxref($cvterm, $xref)) {
  891. throw new Exception("Cannot add/update cvterm database reference (dbxref).");
  892. }
  893. }
  894. }
  895. if (array_key_exists('xref_unk', $term)) {
  896. foreach ($term['xref_unk'] as $xref) {
  897. if (!$this->addCvtermDbxref($cvterm, $xref)) {
  898. throw new Exception("Cannot add/update cvterm database reference (dbxref).");
  899. }
  900. }
  901. }
  902. // add is_a relationships for this cvterm
  903. if (array_key_exists('is_a', $term)) {
  904. foreach ($term['is_a'] as $is_a) {
  905. if (!$this->addRelationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
  906. throw new Exception("Cannot add relationship is_a: $is_a");
  907. }
  908. }
  909. }
  910. if (array_key_exists('intersection_of', $term)) {
  911. //print "WARNING: unhandled tag: intersection_of\n";
  912. }
  913. if (array_key_exists('union_of', $term)) {
  914. //print "WARNING: unhandled tag: union_on\n";
  915. }
  916. if (array_key_exists('disjoint_from', $term)) {
  917. //print "WARNING: unhandled tag: disjoint_from\n";
  918. }
  919. if (array_key_exists('relationship', $term)) {
  920. foreach ($term['relationship'] as $value) {
  921. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  922. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  923. if (!$this->addRelationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
  924. throw new Exception("Cannot add relationship $rel: $object");
  925. }
  926. }
  927. }
  928. if (array_key_exists('replaced_by', $term)) {
  929. //print "WARNING: unhandled tag: replaced_by\n";
  930. }
  931. if (array_key_exists('consider', $term)) {
  932. //print "WARNING: unhandled tag: consider\n";
  933. }
  934. if (array_key_exists('use_term', $term)) {
  935. //print "WARNING: unhandled tag: user_term\n";
  936. }
  937. if (array_key_exists('builtin', $term)) {
  938. //print "WARNING: unhandled tag: builtin\n";
  939. }
  940. if ($external ) {
  941. return 2;
  942. }
  943. return 1;
  944. }
  945. /**
  946. * Adds a cvterm relationship
  947. *
  948. * @param $cvterm
  949. * A database object for the cvterm
  950. * @param $rel
  951. * The relationship name
  952. * @param $objname
  953. * The relationship term name
  954. * @param $defaultcv
  955. * A database object containing a record from the cv table for the
  956. * default controlled vocabulary
  957. * @object_is_relationship
  958. * Set to 1 if this term is a relationship term
  959. * @default_db
  960. * The name of the default database.
  961. *
  962. * @ingroup tripal_obo_loader
  963. */
  964. private function addRelationship($cvterm, $defaultcv, $rel,
  965. $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
  966. $reference_term = FALSE;
  967. $in_obo = $this->getTerm($objname);
  968. // If an accession was passed we need to see if we can find the actual label.
  969. if (strpos($rel, ':') || strpos($objname, ':') && empty($in_obo['name'])) {
  970. if (strpos($rel, ':')) {
  971. $term_id = $rel;
  972. }
  973. elseif (strpos($objname, ':')) {
  974. $term_id = $objname;
  975. }
  976. $reference_term = TRUE;
  977. $pair = explode(":", $term_id);
  978. $ontology_id = $pair[0];
  979. $accession_num = $pair[1];
  980. $rel_name = '';
  981. if (is_numeric($accession_num)) {
  982. $results = $this->oboEbiLookup($term_id, 'term');
  983. if (isset($results['label'])) {
  984. $rel_name = $results['label'];
  985. $oterm = $results;
  986. }
  987. if (empty($rel_name)) {
  988. $results = $this->oboEbiLookup($term_id, 'query');
  989. if (array_key_exists('docs', $results['response'])){
  990. if(!empty($results['response']['docs'])) {
  991. if (count($results['response']['docs']) > 1) {
  992. foreach ($results['response']['docs'] as $doc) {
  993. if ($doc['obo_id'] == $term_id) {
  994. $rel_name = $doc['label'];
  995. $oterm = $doc;
  996. }
  997. }
  998. }
  999. else {
  1000. $rel_name = $results['response']['docs'][0]['label'];
  1001. $oterm = $results['response']['docs'][0];
  1002. }
  1003. }
  1004. }
  1005. }
  1006. if (empty($rel_name)) {
  1007. // The first search doesn't work, so let's try a broader one.
  1008. $results = $this->oboEbiLookup($term_id, 'query-non-local');
  1009. if (!empty($results)) {
  1010. if (array_key_exists('docs', $results['response'])) {
  1011. if (!empty($results['response']['docs'])) {
  1012. foreach ($results['response']['docs'] as $item) {
  1013. if ($item['obo_id'] == $term_id) {
  1014. //Found the first place a label is other than the accession is used, so take
  1015. // that info and then end the loop.
  1016. $rel_name = $item['label'];
  1017. $oterm = $item;
  1018. break;
  1019. }
  1020. }
  1021. }
  1022. }
  1023. }
  1024. }
  1025. }
  1026. }
  1027. // Make sure the relationship cvterm exists.
  1028. $term = array(
  1029. 'name' => $rel,
  1030. 'id' => "$default_db:$rel",
  1031. 'definition' => '',
  1032. 'is_obsolete' => 0,
  1033. 'cv_name' => $defaultcv,
  1034. 'is_relationship' => TRUE,
  1035. 'db_name' => $default_db
  1036. );
  1037. $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  1038. if (!$relcvterm) {
  1039. // If the relationship term couldn't be found in the default_db provided
  1040. // then do on more check to find it in the relationship ontology
  1041. $term = array(
  1042. 'name' => $rel,
  1043. 'id' => "OBO_REL:$rel",
  1044. 'definition' => '',
  1045. 'is_obsolete' => 0,
  1046. 'cv_name' => $defaultcv,
  1047. 'is_relationship' => TRUE,
  1048. 'db_name' => 'OBO_REL'
  1049. );
  1050. $relcvterm = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  1051. if (!$relcvterm) {
  1052. throw new Exception("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
  1053. }
  1054. }
  1055. // Get the object term.
  1056. if ($reference_term === TRUE && !empty($oterm)) {
  1057. $objterm = array();
  1058. $objterm['id'] = $oterm['label'];
  1059. $objterm['name'] = $oterm['obo_id'];
  1060. if (array_key_exists('def', $oterm)) {
  1061. $objterm['definition'] = $oterm['def'];
  1062. }
  1063. if (array_key_exists('subset', $oterm)) {
  1064. $objterm['subset'] = $oterm['subset'];
  1065. }
  1066. if (array_key_exists('namespace', $oterm)) {
  1067. $objterm['namespace'] = $oterm['ontology_name'];
  1068. }
  1069. if (array_key_exists('is_obsolete', $oterm)) {
  1070. $objterm['is_obsolete'] = $oterm['is_obsolete'];
  1071. }
  1072. }
  1073. else {
  1074. $oterm = $this->getTerm($objname);
  1075. if (!$oterm) {
  1076. throw new Exception("Could not find object term $objname\n");
  1077. }
  1078. $objterm = array();
  1079. $objterm['id'] = $oterm['id'][0];
  1080. $objterm['name'] = $oterm['name'][0];
  1081. if (array_key_exists('def', $oterm)) {
  1082. $objterm['definition'] = $oterm['def'][0];
  1083. }
  1084. if (array_key_exists('subset', $oterm)) {
  1085. $objterm['subset'] = $oterm['subset'][0];
  1086. }
  1087. if (array_key_exists('namespace', $oterm)) {
  1088. $objterm['namespace'] = $oterm['namespace'][0];
  1089. }
  1090. if (array_key_exists('is_obsolete', $oterm)) {
  1091. $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
  1092. }
  1093. }
  1094. $objterm['cv_name' ] = $defaultcv;
  1095. $objterm['is_relationship'] = $object_is_relationship;
  1096. $objterm['db_name'] = $default_db;
  1097. $objcvterm = tripal_insert_cvterm($objterm, array('update_existing' => TRUE));
  1098. if (!$objcvterm) {
  1099. throw new Exception("Cannot add cvterm " . $objterm['name']);
  1100. }
  1101. // check to see if the cvterm_relationship already exists, if not add it
  1102. $values = array(
  1103. 'type_id' => $relcvterm->cvterm_id,
  1104. 'subject_id' => $cvterm->cvterm_id,
  1105. 'object_id' => $objcvterm->cvterm_id
  1106. );
  1107. $result = chado_select_record('cvterm_relationship', array('*'), $values);
  1108. if (count($result) == 0) {
  1109. $options = array('return_record' => FALSE);
  1110. $success = chado_insert_record('cvterm_relationship', $values, $options);
  1111. if (!$success) {
  1112. throw new Exception("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
  1113. }
  1114. }
  1115. return TRUE;
  1116. }
  1117. /**
  1118. * Retrieves the term array from the temp loading table for a given term id.
  1119. *
  1120. * @param id
  1121. * The id of the term to retrieve
  1122. *
  1123. * @ingroup tripal_obo_loader
  1124. */
  1125. private function getTerm($id) {
  1126. $values = array('id' => $id);
  1127. $result = chado_select_record('tripal_obo_temp', array('stanza'), $values);
  1128. if (count($result) == 0) {
  1129. return FALSE;
  1130. }
  1131. return unserialize(base64_decode($result[0]->stanza));
  1132. }
  1133. /**
  1134. * Adds the synonyms to a term
  1135. *
  1136. * @param term
  1137. * An array representing the cvterm. It must have a 'synonym' key/value pair.
  1138. * @param cvterm
  1139. * The database object of the cvterm to which the synonym will be added.
  1140. *
  1141. * @ingroup tripal_obo_loader
  1142. */
  1143. private function addSynonym($term, $cvterm) {
  1144. // make sure we have a 'synonym_type' vocabulary
  1145. $syncv = tripal_insert_cv(
  1146. 'synonym_type',
  1147. 'A local vocabulary added for synonym types.'
  1148. );
  1149. // now add the synonyms
  1150. if (array_key_exists('synonym', $term)) {
  1151. foreach ($term['synonym'] as $synonym) {
  1152. // separate out the synonym definition and the synonym type
  1153. $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
  1154. // the scope will be 'EXACT', etc...
  1155. $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
  1156. if (!$scope) { // if no scope then default to 'exact'
  1157. $scope = 'exact';
  1158. }
  1159. // make sure the synonym type exists in the 'synonym_type' vocabulary
  1160. $values = array(
  1161. 'name' => $scope,
  1162. 'cv_id' => array(
  1163. 'name' => 'synonym_type',
  1164. ),
  1165. );
  1166. $syntype = tripal_get_cvterm($values);
  1167. // if it doesn't exist then add it
  1168. if (!$syntype) {
  1169. // build a 'term' object so we can add the missing term
  1170. $term = array(
  1171. 'name' => $scope,
  1172. 'id' => "synonym_type:$scope",
  1173. 'definition' => '',
  1174. 'is_obsolete' => 0,
  1175. 'cv_name' => $syncv->name,
  1176. 'is_relationship' => FALSE
  1177. );
  1178. $syntype = tripal_insert_cvterm($term, array('update_existing' => TRUE));
  1179. if (!$syntype) {
  1180. throw new Exception("Cannot add synonym type: internal:$scope");
  1181. }
  1182. }
  1183. // make sure the synonym doesn't already exists
  1184. $values = array(
  1185. 'cvterm_id' => $cvterm->cvterm_id,
  1186. 'synonym' => $def
  1187. );
  1188. $results = chado_select_record('cvtermsynonym', array('*'), $values);
  1189. if (count($results) == 0) {
  1190. $values = array(
  1191. 'cvterm_id' => $cvterm->cvterm_id,
  1192. 'synonym' => $def,
  1193. 'type_id' => $syntype->cvterm_id
  1194. );
  1195. $options = array('return_record' => FALSE);
  1196. $success = chado_insert_record('cvtermsynonym', $values, $options);
  1197. if (!$success) {
  1198. throw new Exception("Failed to insert the synonym for term: $cvterm->name ($def)");
  1199. }
  1200. }
  1201. // now add the dbxrefs for the synonym if we have a comma in the middle
  1202. // of a description then this will cause problems when splitting os lets
  1203. // just change it so it won't mess up our splitting and then set it back
  1204. // later.
  1205. /**
  1206. $synonym = preg_replace('/(".*?),\s(.*?")/','$1,_$2',$synonym);
  1207. $dbxrefs = preg_split("/, /",preg_replace('/^.*\[(.*?)\]$/','\1',$synonym));
  1208. foreach ($dbxrefs as $dbxref) {
  1209. $dbxref = preg_replace('/,_/',", ",$dbxref);
  1210. if ($dbxref) {
  1211. $this->addCvtermDbxref($syn,$dbxref);
  1212. }
  1213. }
  1214. */
  1215. }
  1216. }
  1217. return TRUE;
  1218. }
  1219. /**
  1220. * Parse the OBO file and populate the templ loading table
  1221. *
  1222. * @param $file
  1223. * The path on the file system where the ontology can be found
  1224. * @param $header
  1225. * An array passed by reference that will be populated with the header
  1226. * information from the OBO file
  1227. *
  1228. * @ingroup tripal_obo_loader
  1229. */
  1230. private function parse($obo_file, &$header) {
  1231. $in_header = 1;
  1232. $stanza = array();
  1233. $default_db = '';
  1234. $line_num = 0;
  1235. $num_read = 0;
  1236. $type = '';
  1237. $filesize = filesize($obo_file);
  1238. $this->setTotalItems($filesize);
  1239. $this->setItemsHandled(0);
  1240. // iterate through the lines in the OBO file and parse the stanzas
  1241. $fh = fopen($obo_file, 'r');
  1242. while ($line = fgets($fh)) {
  1243. $line_num++;
  1244. $size = drupal_strlen($line);
  1245. $num_read += $size;
  1246. $line = trim($line);
  1247. $this->setItemsHandled($num_read);
  1248. // remove newlines
  1249. $line = rtrim($line);
  1250. // remove any special characters that may be hiding
  1251. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  1252. // skip empty lines
  1253. if (strcmp($line, '') == 0) {
  1254. continue;
  1255. }
  1256. //remove comments from end of lines
  1257. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
  1258. // Remove annotations surrounded by brackets. These are found
  1259. // in the Trait Ontology (e.g. TO:1000023 {is_inferred="true"})
  1260. // That construct has useful info, but it is not in the OBO 1.4 format
  1261. // specifications.
  1262. $line = preg_replace('/\{.*?\}/', '', $line);
  1263. // at the first stanza we're out of header
  1264. if (preg_match('/^\s*\[/', $line)) {
  1265. $in_header = 0;
  1266. // store the stanza we just finished reading
  1267. if (sizeof($stanza) > 0) {
  1268. // add the term to the temp table
  1269. $values = array(
  1270. 'id' => $stanza['id'][0],
  1271. 'stanza' => base64_encode(serialize($stanza)),
  1272. 'type' => $type,
  1273. );
  1274. $success = chado_insert_record('tripal_obo_temp', $values);
  1275. if (!$success) {
  1276. throw new Exception("Cannot insert stanza into temporary table.");
  1277. }
  1278. }
  1279. // get the stanza type: Term, Typedef or Instance
  1280. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  1281. // start fresh with a new array
  1282. $stanza = array();
  1283. continue;
  1284. }
  1285. // break apart the line into the tag and value but ignore any escaped colons
  1286. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  1287. $pair = explode(":", $line, 2);
  1288. $tag = $pair[0];
  1289. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  1290. // if this is the ID then look for the default DB
  1291. $matches = array();
  1292. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  1293. $default_db = $matches[1];
  1294. }
  1295. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  1296. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  1297. if ($in_header) {
  1298. if (!array_key_exists($tag, $header)) {
  1299. $header[$tag] = array();
  1300. }
  1301. $header[$tag][] = $value;
  1302. }
  1303. else {
  1304. if (!array_key_exists($tag, $stanza)) {
  1305. $stanza[$tag] = array();
  1306. }
  1307. $stanza[$tag][] = $value;
  1308. }
  1309. }
  1310. // now add the last term in the file
  1311. if (sizeof($stanza) > 0) {
  1312. $values = array(
  1313. 'id' => $stanza['id'][0],
  1314. 'stanza' => base64_encode(serialize($stanza)),
  1315. 'type' => $type,
  1316. );
  1317. chado_insert_record('tripal_obo_temp', $values);
  1318. if (!$success) {
  1319. throw new Exception("Cannot insert stanza into temporary table.");
  1320. }
  1321. $this->setItemsHandled($num_read);
  1322. }
  1323. return $default_db;
  1324. }
  1325. /**
  1326. * Adds a database reference to a cvterm
  1327. *
  1328. * @param cvterm
  1329. * The database object of the cvterm to which the synonym will be added.
  1330. * @param xref
  1331. * The cross refernce. It should be of the form from the OBO specification
  1332. *
  1333. * @ingroup tripal_obo_loader
  1334. */
  1335. private function addCvtermDbxref($cvterm, $xref) {
  1336. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  1337. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  1338. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  1339. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  1340. if (!$accession) {
  1341. throw new Exception("Cannot add a dbxref without an accession: '$xref'");
  1342. }
  1343. // if the xref is a database link, handle that specially
  1344. if (strcmp($dbname, 'http') == 0) {
  1345. $accession = $xref;
  1346. $dbname = 'URL';
  1347. }
  1348. // add the database
  1349. $db = tripal_insert_db(array('name' => $dbname));
  1350. if (!$db) {
  1351. throw new Exception("Cannot find database '$dbname' in Chado.");
  1352. }
  1353. // now add the dbxref
  1354. $dbxref = $this->addDbxref($db->db_id, $accession, '', $description);
  1355. if (!$dbxref) {
  1356. throw new Exception("Cannot find or add the database reference (dbxref)");
  1357. }
  1358. // finally add the cvterm_dbxref but first check to make sure it exists
  1359. $values = array(
  1360. 'cvterm_id' => $cvterm->cvterm_id,
  1361. 'dbxref_id' => $dbxref->dbxref_id,
  1362. );
  1363. $result = chado_select_record('cvterm_dbxref', array('*'), $values);
  1364. if (count($result) == 0) {
  1365. $ins_options = array('return_record' => FALSE);
  1366. $result = chado_insert_record('cvterm_dbxref', $values, $ins_options);
  1367. if (!$result) {
  1368. throw new Exception("Cannot add cvterm_dbxref: $xref");
  1369. }
  1370. }
  1371. return TRUE;
  1372. }
  1373. /**
  1374. * Adds a property to a cvterm
  1375. *
  1376. * @param cvterm
  1377. * A database object for the cvterm to which properties will be added
  1378. * @param $property
  1379. * The name of the property to add
  1380. * @param $value
  1381. * The value of the property
  1382. * @param rank
  1383. * The rank of the property
  1384. *
  1385. * @ingroup tripal_obo_loader
  1386. */
  1387. private function addCvtermProp($cvterm, $property, $value, $rank) {
  1388. // make sure the 'cvterm_property_type' CV exists
  1389. $cv = tripal_insert_cv('cvterm_property_type', '');
  1390. if (!$cv) {
  1391. throw new Exception("Cannot add/find cvterm_property_type cvterm");
  1392. }
  1393. // get the property type cvterm. If it doesn't exist then we want to add it
  1394. $values = array(
  1395. 'name' => $property,
  1396. 'cv_id' => $cv->cv_id,
  1397. );
  1398. $results = chado_select_record('cvterm', array('*'), $values);
  1399. if (count($results) == 0) {
  1400. $term = array(
  1401. 'name' => $property,
  1402. 'id' => "internal:$property",
  1403. 'definition' => '',
  1404. 'is_obsolete' => 0,
  1405. 'cv_name' => $cv->name,
  1406. 'is_relationship' => FALSE,
  1407. );
  1408. $cvproptype = tripal_insert_cvterm($term, array('update_existing' => FALSE));
  1409. if (!$cvproptype) {
  1410. throw new Exception("Cannot add cvterm property: internal:$property");
  1411. }
  1412. }
  1413. else {
  1414. $cvproptype = $results[0];
  1415. }
  1416. // remove any properties that currently exist for this term. We'll reset them
  1417. if ($rank == 0) {
  1418. $values = array('cvterm_id' => $cvterm->cvterm_id);
  1419. $success = chado_delete_record('cvtermprop', $values);
  1420. if (!$success) {
  1421. throw new Exception("Could not remove existing properties to update property $property for term\n");
  1422. }
  1423. }
  1424. // now add the property
  1425. $values = array(
  1426. 'cvterm_id' => $cvterm->cvterm_id,
  1427. 'type_id' => $cvproptype->cvterm_id,
  1428. 'value' => $value,
  1429. 'rank' => $rank,
  1430. );
  1431. $options = array('return_record' => FALSE);
  1432. $result = chado_insert_record('cvtermprop', $values, $options);
  1433. if (!$result) {
  1434. throw new Exception("Could not add property $property for term\n");
  1435. }
  1436. return TRUE;
  1437. }
  1438. /**
  1439. * Adds a database cross reference to a cvterm
  1440. *
  1441. * @param db_id
  1442. * The database ID of the cross reference
  1443. * @param accession
  1444. * The cross reference's accession
  1445. * @param $version
  1446. * The version of the dbxref
  1447. * @param $description
  1448. * The description of the cross reference
  1449. *
  1450. * @ingroup tripal_obo_loader
  1451. */
  1452. private function addDbxref($db_id, $accession, $version='', $description='') {
  1453. // check to see if the dbxref exists if not, add it
  1454. $values = array(
  1455. 'db_id' => $db_id,
  1456. 'accession' => $accession,
  1457. );
  1458. $result = chado_select_record('dbxref', array('dbxref_id'), $values);
  1459. if (count($result) == 0) {
  1460. $ins_values = array(
  1461. 'db_id' => $db_id,
  1462. 'accession' => $accession,
  1463. 'version' => $version,
  1464. 'description' => $description,
  1465. );
  1466. $ins_options = array('return_record' => FALSE);
  1467. $result = chado_insert_record('dbxref', $ins_values, $ins_options);
  1468. if (!$result) {
  1469. throw new Exception("Failed to insert the dbxref record $accession");
  1470. }
  1471. $result = chado_select_record('dbxref', array('dbxref_id'), $values);
  1472. }
  1473. return $result[0];
  1474. }
  1475. /**
  1476. * API call to Ontology Lookup Service provided by
  1477. * https://www.ebi.ac.uk/ols/docs/api#resources-terms
  1478. *
  1479. * @param accession
  1480. * Accession term for query
  1481. * @param type_of_search
  1482. * Either ontology, term, query, or query-non-local
  1483. *
  1484. * @ingroup tripal_obo_loader
  1485. */
  1486. private function oboEbiLookup($accession, $type_of_search) {
  1487. //Grab just the ontology from the $accession.
  1488. $parts = explode(':', $accession);
  1489. $ontology = strtolower($parts[0]);
  1490. $ontology = preg_replace('/\s+/', '', $ontology);
  1491. if ($type_of_search == 'ontology') {
  1492. $options = array();
  1493. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology;
  1494. $response = drupal_http_request($full_url, $options);
  1495. if(!empty($response)){
  1496. $response = drupal_json_decode($response->data);
  1497. }
  1498. }
  1499. elseif ($type_of_search == 'term') {
  1500. //The IRI of the terms, this value must be double URL encoded
  1501. $iri = urlencode(urlencode("http://purl.obolibrary.org/obo/" . str_replace(':' , '_', $accession)));
  1502. $options = array();
  1503. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology . '/' . 'terms/' . $iri;
  1504. $response = drupal_http_request($full_url, $options);
  1505. if(!empty($response)){
  1506. $response = drupal_json_decode($response->data);
  1507. }
  1508. }
  1509. elseif($type_of_search == 'query') {
  1510. $options = array();
  1511. $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id&local=true';
  1512. $response = drupal_http_request($full_url, $options);
  1513. if(!empty($response)){
  1514. $response = drupal_json_decode($response->data);
  1515. }
  1516. }
  1517. elseif($type_of_search == 'query-non-local') {
  1518. $options = array();
  1519. $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id';
  1520. $response = drupal_http_request($full_url, $options);
  1521. if(!empty($response)){
  1522. $response = drupal_json_decode($response->data);
  1523. }
  1524. }
  1525. return $response;
  1526. }
  1527. }
  1528. /**
  1529. * Ajax callback for the OBOImporter::form() function.
  1530. */
  1531. function tripal_cv_obo_form_ajax_callback($form, $form_state) {
  1532. return $form['obo_existing'];
  1533. }