OBOImporter.inc 78 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342
  1. <?php
  2. class OBOImporter extends TripalImporter {
  3. // --------------------------------------------------------------------------
  4. // EDITABLE STATIC CONSTANTS
  5. //
  6. // The following constants SHOULD be set for each descendent class. They are
  7. // used by the static functions to provide information to Drupal about
  8. // the field and it's default widget and formatter.
  9. // --------------------------------------------------------------------------
  10. /**
  11. * The name of this loader. This name will be presented to the site
  12. * user.
  13. */
  14. public static $name = 'OBO Vocabulary Loader';
  15. /**
  16. * The machine name for this loader. This name will be used to construct
  17. * the URL for the loader.
  18. */
  19. public static $machine_name = 'chado_obo_loader';
  20. /**
  21. * A brief description for this loader. This description will be
  22. * presented to the site user.
  23. */
  24. public static $description = 'Import vocabularies and terms in OBO format.';
  25. /**
  26. * An array containing the extensions of allowed file types.
  27. */
  28. public static $file_types = array('obo');
  29. /**
  30. * Provides information to the user about the file upload. Typically this
  31. * may include a description of the file types allowed.
  32. */
  33. public static $upload_description = 'Please provide the details for importing a new OBO file. The file must have a .obo extension.';
  34. /**
  35. * The title that should appear above the upload button.
  36. */
  37. public static $upload_title = 'New OBO File';
  38. /**
  39. * If the loader should require an analysis record. To maintain provenance
  40. * we should always indiate where the data we are uploading comes from.
  41. * The method that Tripal attempts to use for this by associating upload files
  42. * with an analysis record. The analysis record provides the details for
  43. * how the file was created or obtained. Set this to FALSE if the loader
  44. * should not require an analysis when loading. if $use_analysis is set to
  45. * true then the form values will have an 'analysis_id' key in the $form_state
  46. * array on submitted forms.
  47. */
  48. public static $use_analysis = FALSE;
  49. /**
  50. * If the $use_analysis value is set above then this value indicates if the
  51. * analysis should be required.
  52. */
  53. public static $require_analysis = TRUE;
  54. /**
  55. * Text that should appear on the button at the bottom of the importer
  56. * form.
  57. */
  58. public static $button_text = 'Import OBO File';
  59. /**
  60. * Indicates the methods that the file uploader will support.
  61. */
  62. public static $methods = array(
  63. // Allow the user to upload a file to the server.
  64. 'file_upload' => FALSE,
  65. // Allow the user to provide the path on the Tripal server for the file.
  66. 'file_local' => FALSE,
  67. // Allow the user to provide a remote URL for the file.
  68. 'file_remote' => FALSE,
  69. );
  70. /**
  71. * Be default, all loaders are automaticlly added to the Admin >
  72. * Tripal > Data Laders menu. However, if this loader should be
  73. * made available via a different menu path, then set it here. If the
  74. * value is empty then the path will be the default.
  75. */
  76. public static $menu_path = 'admin/tripal/loaders/chado_vocabs/obo_loader';
  77. public static $file_required = FALSE;
  78. /**
  79. * Keep track of vocabularies that have been added.
  80. *
  81. * @var array
  82. */
  83. private $obo_namespaces = array();
  84. /**
  85. * Holds the list of all CVs on this site. By storing them here it saves
  86. * us query time later.
  87. */
  88. private $all_cvs = [];
  89. /**
  90. * Holds the list of all DBs on this site. By storing them here it saves
  91. * us query time later.
  92. *
  93. * @var array
  94. */
  95. private $all_dbs = [];
  96. /**
  97. * When adding synonyms we need to know the cvterm_ids of the synonym types.
  98. * This array holds those.
  99. *
  100. * @var array
  101. */
  102. private $syn_types = [
  103. 'exact' => NULL,
  104. 'broad' => NULL,
  105. 'narrow' => NULL,
  106. 'related' => NULL,
  107. ];
  108. // An alternative cache to the temp_obo table.
  109. private $termStanzaCache = [
  110. 'ids' => [],
  111. 'count' => [
  112. 'Typedef' => 0,
  113. 'Term' => 0,
  114. 'Instance' => 0,
  115. ],
  116. 'types' => [
  117. 'Typedef' => [],
  118. 'Term' => [],
  119. 'Instance' => [],
  120. ],
  121. ];
  122. /**
  123. * Indicates how terms are cached. Values can be 'memory' or 'table'. If
  124. * 'memory' then the $termStanzaCache variable is used. If 'table', then the
  125. * tripal_obo_temp table is used.
  126. * @var string
  127. */
  128. private $cache_type = 'memory';
  129. /**
  130. * The default namespace for all terms that don't have a 'namespace' in their
  131. * term stanza.
  132. *
  133. * @var string
  134. */
  135. private $default_namespace = '';
  136. /**
  137. * The default database preix for this ontology.
  138. *
  139. * @var string
  140. */
  141. private $default_db = '';
  142. /**
  143. * An array of used cvterm objects so that we don't have to lookup them
  144. * up repeatedly.
  145. */
  146. private $used_terms = [];
  147. /**
  148. * An array of base IRIs returned from the EBI OLS lookup servcie. We
  149. * don't want to continually query OLS for the same ontology base IRIs.
  150. */
  151. private $baseIRIs = [];
  152. /**
  153. * A flag to keep track if the user was warned about slownesss when doing
  154. * EBI Lookups.
  155. *
  156. * @var string
  157. */
  158. private $ebi_warned = FALSE;
  159. /**
  160. * A flag that indicates if this ontology is just a subset of a much larger
  161. * one. Examples include the GO slims.
  162. *
  163. * @var string
  164. */
  165. private $is_subset = FALSE;
  166. /**
  167. * @see TripalImporter::form()
  168. */
  169. public function form($form, &$form_state) {
  170. // get a list of db from chado for user to choose
  171. $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
  172. $results = db_query($sql);
  173. $obos = array();
  174. $obos[] = 'Select a Vocabulary';
  175. foreach ($results as $obo) {
  176. $obos[$obo->obo_id] = $obo->name;
  177. }
  178. $obo_id = '';
  179. if (array_key_exists('values', $form_state)) {
  180. $obo_id = array_key_exists('obo_id', $form_state['values']) ? $form_state['values']['obo_id'] : '';
  181. }
  182. $form['instructions']['info'] = array(
  183. '#type' => 'item',
  184. '#markup' => t('This page allows you to load vocabularies and ontologies
  185. that are in OBO format. Once loaded, the terms from these
  186. vocabularies can be used to create content.
  187. You may use the form below to either reload a vocabulary that is already
  188. loaded (as when new updates to that vocabulary are available) or load a new
  189. vocabulary.'),
  190. );
  191. $form['obo_existing'] = array(
  192. '#type' => 'fieldset',
  193. '#title' => t('Use a Saved Ontology OBO Reference'),
  194. '#prefix' => '<span id="obo-existing-fieldset">',
  195. '#suffix' => '</span>'
  196. );
  197. $form['obo_existing']['existing_instructions']= array(
  198. '#type' => 'item',
  199. '#markup' => t('The vocabularies listed in the select box below have bene pre-populated
  200. upon installation of Tripal or have been previously loaded. Select one to edit
  201. its settings or submit for loading. You may reload any vocabulary that has
  202. already been loaded to retrieve any new updates.'),
  203. );
  204. $form['obo_existing']['obo_id'] = array(
  205. '#title' => t('Ontology OBO File Reference'),
  206. '#type' => 'select',
  207. '#options' => $obos,
  208. '#ajax' => array(
  209. 'callback' => 'tripal_cv_obo_form_ajax_callback',
  210. 'wrapper' => 'obo-existing-fieldset',
  211. ),
  212. '#description' => t('Select a vocabulary to import.')
  213. );
  214. // If the user has selected an OBO ID then get the form elements for
  215. // updating.
  216. if ($obo_id) {
  217. $uobo_name = '';
  218. $uobo_url = '';
  219. $uobo_file = '';
  220. $vocab = db_select('tripal_cv_obo', 't')
  221. ->fields('t', array('name', 'path'))
  222. ->condition('obo_id', $obo_id)
  223. ->execute()
  224. ->fetchObject();
  225. $uobo_name = $vocab->name;
  226. if (preg_match('/^http/', $vocab->path)) {
  227. $uobo_url = $vocab->path;
  228. }
  229. else {
  230. $uobo_file = trim($vocab->path);
  231. $matches = array();
  232. if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
  233. $modpath = drupal_get_path('module', $matches[1]);
  234. $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
  235. }
  236. }
  237. // We don't want the previous value to remain. We want the new default to
  238. // show up, so remove the input values
  239. unset($form_state['input']['uobo_name']);
  240. unset($form_state['input']['uobo_url']);
  241. unset($form_state['input']['uobo_file']);
  242. $form['obo_existing']['uobo_name']= array(
  243. '#type' => 'textfield',
  244. '#title' => t('Vocabulary Name'),
  245. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  246. list above for use again later.'),
  247. '#default_value' => $uobo_name,
  248. );
  249. $form['obo_existing']['uobo_url']= array(
  250. '#type' => 'textfield',
  251. '#title' => t('Remote URL'),
  252. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  253. (e.g. http://www.obofoundry.org/ro/ro.obo)'),
  254. '#default_value' => $uobo_url,
  255. );
  256. $form['obo_existing']['uobo_file']= array(
  257. '#type' => 'textfield',
  258. '#title' => t('Local File'),
  259. '#description' => t('Please enter the file system path for an OBO
  260. definition file. If entering a path relative to
  261. the Drupal installation you may use a relative path that excludes the
  262. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  263. that Drupal relative paths have no preceeding slash.
  264. Otherwise, please provide the full path on the filesystem. The path
  265. must be accessible to the web server on which this Drupal instance is running.'),
  266. '#default_value' => $uobo_file,
  267. );
  268. $form['obo_existing']['update_obo_details'] = array(
  269. '#type' => 'submit',
  270. '#value' => 'Update Ontology Details',
  271. '#name' => 'update_obo_details'
  272. );
  273. }
  274. $form['obo_new'] = array(
  275. '#type' => 'fieldset',
  276. '#title' => t('Add a New Ontology OBO Reference'),
  277. '#collapsible' => TRUE,
  278. '#collapsed' => TRUE,
  279. );
  280. $form['obo_new']['path_instructions']= array(
  281. '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file
  282. is stored local to the server provide a file name. If the vocabulry is stored remotely,
  283. provide a URL. Only provide a URL or a local file, not both.'),
  284. );
  285. $form['obo_new']['obo_name']= array(
  286. '#type' => 'textfield',
  287. '#title' => t('New Vocabulary Name'),
  288. '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
  289. list above for use again later. Additionally, if a default namespace is not provided in the OBO
  290. header this name will be used as the default_namespace.'),
  291. );
  292. $form['obo_new']['obo_url']= array(
  293. '#type' => 'textfield',
  294. '#title' => t('Remote URL'),
  295. '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
  296. (e.g. http://www.obofoundry.org/ro/ro.obo)'),
  297. );
  298. $form['obo_new']['obo_file']= array(
  299. '#type' => 'textfield',
  300. '#title' => t('Local File'),
  301. '#description' => t('Please enter the file system path for an OBO
  302. definition file. If entering a path relative to
  303. the Drupal installation you may use a relative path that excludes the
  304. Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
  305. that Drupal relative paths have no preceeding slash.
  306. Otherwise, please provide the full path on the filesystem. The path
  307. must be accessible to the web server on which this Drupal instance is running.'),
  308. );
  309. return $form;
  310. }
  311. /**
  312. * @see TripalImporter::formSubmit()
  313. */
  314. public function formSubmit($form, &$form_state) {
  315. $obo_id = $form_state['values']['obo_id'];
  316. $obo_name = trim($form_state['values']['obo_name']);
  317. $obo_url = trim($form_state['values']['obo_url']);
  318. $obo_file = trim($form_state['values']['obo_file']);
  319. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  320. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  321. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  322. // If the user requested to alter the details then do that.
  323. if ($form_state['clicked_button']['#name'] == 'update_obo_details') {
  324. $form_state['rebuild'] = TRUE;
  325. $success = db_update('tripal_cv_obo')
  326. ->fields(array(
  327. 'name' => $uobo_name,
  328. 'path' => $uobo_url ? $uobo_url : $uobo_file,
  329. ))
  330. ->condition('obo_id', $obo_id)
  331. ->execute();
  332. if ($success) {
  333. drupal_set_message(t("The vocabulary !vocab has been updated.", array('!vocab' => $uobo_name)));
  334. }
  335. else {
  336. drupal_set_message(t("The vocabulary !vocab could not be updated.", array('!vocab' => $uobo_name)), 'error');
  337. }
  338. }
  339. elseif (!empty($obo_name)) {
  340. $obo_id = db_insert('tripal_cv_obo')
  341. ->fields(array(
  342. 'name' => $obo_name,
  343. 'path' => $obo_url ? $obo_url : $obo_file,
  344. ))
  345. ->execute();
  346. // Add the obo_id to the form_state vaules.
  347. $form_state['values']['obo_id'] = $obo_id;
  348. if ($obo_id) {
  349. drupal_set_message(t("The vocabulary !vocab has been added.", array('!vocab' => $obo_name)));
  350. }
  351. else {
  352. $form_state['rebuild'] = TRUE;
  353. drupal_set_message(t("The vocabulary !vocab could not be added.", array('!vocab' => $obo_name)), 'error');
  354. }
  355. }
  356. }
  357. /**
  358. * @see TripalImporter::formValidate()
  359. */
  360. public function formValidate($form, &$form_state) {
  361. $obo_id = $form_state['values']['obo_id'];
  362. $obo_name = trim($form_state['values']['obo_name']);
  363. $obo_url = trim($form_state['values']['obo_url']);
  364. $obo_file = trim($form_state['values']['obo_file']);
  365. $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
  366. $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
  367. $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
  368. // Make sure if the name is changed it doesn't conflict with another OBO.
  369. if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
  370. $form_state['clicked_button']['#name'] == 'update_load_obo') {
  371. // Get the current record
  372. $vocab = db_select('tripal_cv_obo', 't')
  373. ->fields('t', array('obo_id', 'name', 'path'))
  374. ->condition('name', $uobo_name)
  375. ->execute()
  376. ->fetchObject();
  377. if ($vocab and $vocab->obo_id != $obo_id) {
  378. form_set_error('uobo_name', 'The vocabulary name must be different from existing vocabularies');
  379. }
  380. // Make sure the file exists. First check if it is a relative path
  381. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $uobo_file;
  382. if (!file_exists($dfile)) {
  383. if (!file_exists($uobo_file)) {
  384. form_set_error('uobo_file', t('The specified path, !path, does not exist or cannot be read.'), ['!path' => $dfile]);
  385. }
  386. }
  387. if (!$uobo_url and !$uobo_file) {
  388. form_set_error('uobo_url', 'Please provide a URL or a path for the vocabulary.');
  389. }
  390. if ($uobo_url and $uobo_file) {
  391. form_set_error('uobo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  392. }
  393. }
  394. if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
  395. // Get the current record
  396. $vocab = db_select('tripal_cv_obo', 't')
  397. ->fields('t', array('obo_id', 'name', 'path'))
  398. ->condition('name', $obo_name)
  399. ->execute()
  400. ->fetchObject();
  401. if ($vocab) {
  402. form_set_error('obo_name', 'The vocabulary name must be different from existing vocabularies');
  403. }
  404. // Make sure the file exists. First check if it is a relative path
  405. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo_file;
  406. if (!file_exists($dfile)) {
  407. if (!file_exists($obo_file)) {
  408. form_set_error('obo_file', t('The specified path, !path, does not exist or cannot be read.'), ['!path' => $dfile]);
  409. }
  410. }
  411. if (!$obo_url and !$obo_file) {
  412. form_set_error('obo_url', 'Please provide a URL or a path for the vocabulary.');
  413. }
  414. if ($obo_url and $obo_file) {
  415. form_set_error('obo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
  416. }
  417. }
  418. }
  419. /**
  420. * @see TripalImporter::run()
  421. *
  422. * @param $details
  423. * The following arguments are supported:
  424. * - obo_id: (required) The ID of the ontology to be imported.
  425. */
  426. public function run() {
  427. $arguments = $this->arguments['run_args'];
  428. $obo_id = $arguments['obo_id'];
  429. // Make sure the $obo_id is valid
  430. $obo = db_select('tripal_cv_obo', 'tco')
  431. ->fields('tco')
  432. ->condition('obo_id', $obo_id)
  433. ->execute()
  434. ->fetchObject();
  435. if (!$obo) {
  436. throw new Exception("Invalid OBO ID provided: '$obo_id'.");
  437. }
  438. // Get the list of all CVs so we can save on lookups later
  439. $sql = "SELECT * FROM {cv} CV";
  440. $cvs = chado_query($sql);
  441. while ($cv = $cvs->fetchObject()) {
  442. $this->all_cvs[$cv->name] = $cv;
  443. }
  444. // Get the list of all DBs so we can save on lookups later
  445. $sql = "SELECT * FROM {db} DB";
  446. $dbs = chado_query($sql);
  447. while ($db = $dbs->fetchObject()) {
  448. $this->all_dbs[$db->name] = $db;
  449. }
  450. // Get the 'Subgroup' term that we will use for adding subsets.
  451. $term = chado_get_cvterm(['id' => 'NCIT:C25693']);
  452. $this->used_terms['NCIT:C25693'] = $term->cvterm_id;
  453. // Get the 'Comment' term that we will use for adding comments.
  454. $term = chado_get_cvterm(['id' => 'rdfs:comment']);
  455. $this->used_terms['rdfs:comment'] = $term->cvterm_id;
  456. // Make sure we have a 'synonym_type' vocabulary.
  457. $syn_cv = new ChadoRecord('cv');
  458. $syn_cv->setValues(['name' => 'synonym_type']);
  459. $syn_cv->save();
  460. $this->all_cvs['synonym_type'] = (object) $syn_cv->getValues();
  461. // Make sure we have a 'synonym_type' database.
  462. $syn_db = new ChadoRecord('db');
  463. $syn_db->setValues(['name' => 'synonym_type']);
  464. $syn_db->save();
  465. $this->all_dbs['synonym_type'] = (object) $syn_db->getValues();
  466. // Make sure the synonym types exists in the 'synonym_type' vocabulary.
  467. foreach (array_keys($this->syn_types) as $syn_type) {
  468. $syn_dbxref = new ChadoRecord('dbxref');
  469. $syn_dbxref->setValues([
  470. 'accession' => $syn_type,
  471. 'db_id' => $syn_db->getID(),
  472. ]);
  473. $syn_dbxref->save();
  474. $syn_term = new ChadoRecord('cvterm');
  475. $syn_term->setValues([
  476. 'name' => $syn_type,
  477. 'cv_id' => $syn_cv->getID(),
  478. ]);
  479. if (!$syn_term->find()) {
  480. $syn_term->setValues([
  481. 'name' => $syn_type,
  482. 'definition' => '',
  483. 'is_obsolete' => 0,
  484. 'cv_id' => $syn_cv->getID(),
  485. 'is_relationshiptype' => 0,
  486. 'dbxref_id' => $syn_dbxref->getID(),
  487. ]);
  488. $syn_term->insert();
  489. }
  490. $this->syn_types[$syn_type] = (object) $syn_term->getValues();
  491. }
  492. // Run the importer!
  493. $this->loadOBO_v1_2_id($obo);
  494. }
  495. /**
  496. * @see TripalImporter::postRun()
  497. *
  498. */
  499. public function postRun() {
  500. // Update the cv_root_mview materiailzed view.
  501. $this->logMessage("Updating the cv_root_mview materialized view...");
  502. $mview_id = tripal_get_mview_id('cv_root_mview');
  503. tripal_populate_mview($mview_id);
  504. $this->logMessage("Updating the db2cv_mview materialized view...");
  505. $mview_id = tripal_get_mview_id('db2cv_mview');
  506. tripal_populate_mview($mview_id);
  507. // Upate the cvtermpath table for each newly added CV.
  508. $this->logMessage("Updating cvtermpath table. This may take a while...");
  509. foreach ($this->obo_namespaces as $namespace => $cv_id) {
  510. $this->logMessage("- Loading paths for @vocab", array('@vocab' => $namespace));
  511. tripal_update_cvtermpath($cv_id);
  512. }
  513. }
  514. /**
  515. * A wrapper function for importing the user specified OBO file into Chado by
  516. * specifying the obo_id of the OBO. It requires that the file be in OBO v1.2
  517. * compatible format. This function is typically executed via the Tripal jobs
  518. * management after a user submits a job via the Load Onotloies form.
  519. *
  520. * @param $obo_id
  521. * An obo_id from the tripal_cv_obo file that specifies which OBO file to import
  522. * @ingroup tripal_obo_loader
  523. */
  524. private function loadOBO_v1_2_id($obo) {
  525. // Convert the module name to the real path if present
  526. if (preg_match("/\{(.*?)\}/", $obo->path, $matches)) {
  527. $module = $matches[1];
  528. $path = drupal_realpath(drupal_get_path('module', $module));
  529. $obo->path = preg_replace("/\{.*?\}/", $path, $obo->path);
  530. }
  531. // if the reference is for a remote URL then run the URL processing function
  532. if (preg_match("/^https:\/\//", $obo->path) or
  533. preg_match("/^http:\/\//", $obo->path) or
  534. preg_match("/^ftp:\/\//", $obo->path)) {
  535. $this->loadOBO_v1_2_url($obo->name, $obo->path, 0);
  536. }
  537. // if the reference is for a local file then run the file processing function
  538. else {
  539. // check to see if the file is located local to Drupal
  540. $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
  541. if (file_exists($dfile)) {
  542. $this->loadOBO_v1_2_file($obo->name, $dfile, 0);
  543. }
  544. // if not local to Drupal, the file must be someplace else, just use
  545. // the full path provided
  546. else {
  547. if (file_exists($obo->path)) {
  548. $this->loadOBO_v1_2_file($obo->name, $obo->path, 0);
  549. }
  550. else {
  551. print "ERROR: could not find OBO file: '$obo->path'\n";
  552. }
  553. }
  554. }
  555. }
  556. /**
  557. * A wrapper function for importing the user specified OBO file into Chado by
  558. * specifying the filename and path of the OBO. It requires that the file be in OBO v1.2
  559. * compatible format. This function is typically executed via the Tripal jobs
  560. * management after a user submits a job via the Load Onotloies form.
  561. *
  562. * @param $obo_name
  563. * The name of the OBO (typially the ontology or controlled vocabulary name)
  564. * @param $file
  565. * The path on the file system where the ontology can be found
  566. * @param $is_new
  567. * Set to TRUE if this is a new ontology that does not yet exist in the
  568. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  569. *
  570. * @ingroup tripal_obo_loader
  571. */
  572. private function loadOBO_v1_2_file($obo_name, $file, $is_new = TRUE) {
  573. if ($is_new) {
  574. tripal_insert_obo($obo_name, $file);
  575. }
  576. $success = $this->loadOBO_v1_2($file, $obo_name);
  577. }
  578. /**
  579. * A wrapper function for importing the user specified OBO file into Chado by
  580. * specifying the remote URL of the OBO. It requires that the file be in OBO v1.2
  581. * compatible format. This function is typically executed via the Tripal jobs
  582. * management after a user submits a job via the Load Onotloies form.
  583. *
  584. * @param $obo_name
  585. * The name of the OBO (typially the ontology or controlled vocabulary name)
  586. * @param $url
  587. * The remote URL of the OBO file.
  588. * @param $is_new
  589. * Set to TRUE if this is a new ontology that does not yet exist in the
  590. * tripal_cv_obo table. If TRUE the OBO will be added to the table.
  591. *
  592. * @ingroup tripal_obo_loader
  593. */
  594. private function loadOBO_v1_2_url($obo_name, $url, $is_new = TRUE) {
  595. // first download the OBO
  596. $temp = tempnam(sys_get_temp_dir(), 'obo_');
  597. print "Downloading URL $url, saving to $temp\n";
  598. $url_fh = fopen($url, "r");
  599. $obo_fh = fopen($temp, "w");
  600. if (!$url_fh) {
  601. throw new Exception("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? " .
  602. " if you are unable to download the file you may manually downlod the OBO file and use the web interface to " .
  603. " specify the location of the file on your server.");
  604. }
  605. while (!feof($url_fh)) {
  606. fwrite($obo_fh, fread($url_fh, 255), 255);
  607. }
  608. fclose($url_fh);
  609. fclose($obo_fh);
  610. if ($is_new) {
  611. tripal_insert_obo($obo_name, $url);
  612. }
  613. // second, parse the OBO
  614. $this->loadOBO_v1_2($temp, $obo_name);
  615. // now remove the temp file
  616. unlink($temp);
  617. }
  618. /**
  619. * Imports a given OBO file into Chado. This function is usually called by
  620. * one of three wrapper functions: loadOBO_v1_2_id,
  621. * loadOBO_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can
  622. * be called directly if the full path to an OBO file is available on the
  623. * file system.
  624. *
  625. * @param $flie
  626. * The full path to the OBO file on the file system
  627. *
  628. * @ingroup tripal_obo_loader
  629. */
  630. private function loadOBO_v1_2($file, $obo_name) {
  631. $header = array();
  632. $ret = array();
  633. // Empty the temp table.
  634. $this->clearTermStanzaCache();
  635. // Parse the obo file.
  636. $this->logMessage("Step 1: Preloading File $file...");
  637. $this->parse($file, $header);
  638. // Cache the relationships of terms.
  639. $this->logMessage("Step 2: Examining relationships...");
  640. $this->cacheRelationships();
  641. // Add any typedefs to the vocabulary first.
  642. $this->logMessage("Step 3: Loading type defs...");
  643. $this->processTypeDefs();
  644. // Next add terms to the vocabulary.
  645. $this->logMessage("Step 4: Loading terms...");
  646. $this->processTerms();
  647. // Empty the term cache.
  648. $this->logMessage("Step 5: Cleanup...");
  649. $this->clearTermStanzaCache();
  650. }
  651. /**
  652. * OBO files are divided into a typedefs terms section and vocabulary terms section.
  653. * This function loads the typedef terms from the OBO.
  654. *
  655. * @ingroup tripal_obo_loader
  656. */
  657. private function processTypeDefs() {
  658. $typedefs = $this->getCachedTermStanzas('Typedef');
  659. $count = $this->getCacheSize('Typedef');
  660. $this->setTotalItems($count);
  661. $this->setItemsHandled(0);
  662. $this->setInterval(5);
  663. $i = 1;
  664. foreach ($typedefs as $t) {
  665. // TODO: it would be better if we had a term iterator so that we
  666. // don't have to distinguish here between the table vs memory cache type.
  667. if ($this->cache_type == 'table') {
  668. $stanza = unserialize(base64_decode($t->stanza));
  669. }
  670. else {
  671. $stanza = $this->termStanzaCache['ids'][$t];
  672. }
  673. $this->setItemsHandled($i++);
  674. $this->processTerm($stanza, TRUE);
  675. }
  676. $this->setItemsHandled($i);
  677. return 1;
  678. }
  679. /**
  680. * This function loads all of the [Term] termsfrom the OBO.
  681. */
  682. private function processTerms() {
  683. $i = 0;
  684. $external = FALSE;
  685. $terms = $this->getCachedTermStanzas('Term');
  686. $count = $this->getCacheSize('Term');
  687. $this->setTotalItems($count);
  688. $this->setItemsHandled(0);
  689. $this->setInterval(1);
  690. // Iterate through the terms.
  691. foreach ($terms as $t) {
  692. // TODO: it would be better if we had a term iterator so that we
  693. // don't have to distringuish here between the table vs memory cache type.
  694. if ($this->cache_type == 'table') {
  695. $term = unserialize(base64_decode($t->stanza));
  696. }
  697. else {
  698. $term = $this->termStanzaCache['ids'][$t];
  699. }
  700. $this->setItemsHandled($i);
  701. // Add/update this term.
  702. $this->processTerm($term, FALSE);
  703. $i++;
  704. }
  705. $this->setItemsHandled($i);
  706. return 1;
  707. }
  708. /**
  709. * Sets the default CV and DB for this loader.
  710. *
  711. * Unfortunately, not all OBOs include both the 'ontology' and the
  712. * 'default-namespace' in their headers, so we have to do our best to
  713. * work out what these two should be.
  714. *
  715. */
  716. private function setDefaults($header) {
  717. $short_name = '';
  718. $namespace = '';
  719. // Get the 'ontology' and 'default-namespace' headers. Unfortunately,
  720. // not all OBO files contain these.
  721. if (array_key_exists('ontology', $header)) {
  722. $short_name = strtoupper($header['ontology'][0]);
  723. }
  724. if (array_key_exists('default-namespace', $header)) {
  725. $namespace = $header['default-namespace'][0];
  726. }
  727. // The OBO specification allows the 'ontology' header tag to be nested for
  728. // subsets (e.g. go/subsets/goslim_plant). We need to simplify that down
  729. // to the top-level item.
  730. $matches = [];
  731. if (preg_match('/^(.+?)\/.*/', $short_name, $matches)) {
  732. $short_name = $matches[1];
  733. $this->is_subset = TRUE;
  734. }
  735. // If we have the DB short name (or ontology header) but not the default
  736. // namespace then we may be able to find it via an EBI lookup.
  737. if (!$namespace and $short_name) {
  738. $namespace = $this->findEBIOntologyNamespace($short_name);
  739. }
  740. // If we have the namespace but not the short name then we have to
  741. // do a few tricks to try and find it.
  742. if ($namespace and !$short_name) {
  743. // First see if we've seen this ontology before and get it's currently
  744. // loaded database.
  745. $sql = "SELECT dbname FROM {db2cv_mview} WHERE cvname = :cvname";
  746. $short_name = chado_query($sql, [':cvname' => $namespace])->fetchField();
  747. if (!$short_name and array_key_exists('namespace-id-rule', $header)) {
  748. $matches = [];
  749. if (preg_match('/^.*\s(.+?):.+$/', $header['namespace-id-rule'][0],$matches)){
  750. $short_name = $matches[1];
  751. }
  752. }
  753. // Try the EBI Lookup: still experimental.
  754. if (!$short_name) {
  755. //$short_name = $this->findEBIOntologyPrefix($namespace);
  756. }
  757. }
  758. // If we can't find the namespace or the short_name then bust.
  759. if (!$namespace and !$short_name) {
  760. throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" and "ontology" headers.');
  761. }
  762. // Set the defaults.
  763. $this->default_namespace = $namespace;
  764. $this->default_db = $short_name;
  765. $this->addDB($this->default_db);
  766. $this->addCV($this->default_namespace);
  767. }
  768. /**
  769. * This function searches EBI to find the ontology details for this OBO.
  770. *
  771. * @param $ontology
  772. * The ontology name from the OBO headers.
  773. *
  774. * @throws Exception
  775. */
  776. private function findEBIOntologyNamespace($ontology) {
  777. // Check if the EBI ontology search has this ontology:
  778. try {
  779. $results = $this->oboEbiLookup($ontology, 'ontology');
  780. if (array_key_exists('default-namespace', $results['config']['annotations'])) {
  781. $namespace = $results['config']['annotations']['default-namespace'];
  782. if (is_array($namespace)) {
  783. $namespace = $namespace[0];
  784. }
  785. }
  786. elseif (array_key_exists('namespace', $results['config'])) {
  787. $namespace = $results['config']['namespace'];
  788. }
  789. // If we can't find the namespace at EBI, then just default to using the
  790. // same namespace as the DB short name.
  791. else {
  792. $namespace = $this->default_db;
  793. }
  794. return $namespace;
  795. }
  796. catch (Exception $e) {
  797. watchdog_exception('Cannot find the namespace for this ontology.', $e);
  798. throw $e;
  799. }
  800. }
  801. /**
  802. * Finds the ontology prefix (DB short name) using EBI.
  803. *
  804. * @param $namespace
  805. * The namespace for ontology.
  806. */
  807. private function findEBIOntologyPrefix($namespace) {
  808. // NOTE: this code is not yet completed.. It's not clear it will
  809. // actually work.
  810. $options = array();
  811. $page = 1;
  812. $size = 25;
  813. $full_url = 'https://www.ebi.ac.uk/ols/api/ontologies?page=' . $page. '&size=' . $size;
  814. while ($response = drupal_http_request($full_url, $options)) {
  815. $response = drupal_json_decode($response->data);
  816. foreach ($response['_embedded']['ontologies'] as $ontology) {
  817. $namespace = $ontology['config']['namespace'];
  818. }
  819. $page++;
  820. $full_url = 'https://www.ebi.ac.uk/ols/api/ontologies?page=' . $page. '&size=' . $size;
  821. }
  822. }
  823. /**
  824. * A helper function to get details about a foreign term.
  825. *
  826. * A foreign term is one that does not belong to the ontology.
  827. *
  828. * @param $t
  829. * A term array that contains these keys at a minimum: id, name,
  830. * definition, subset, namespace, is_obsolete.
  831. */
  832. private function findEBITerm($id) {
  833. // Warn the user if we're looking up terms in EBI as this will slow the
  834. // loader if there are many lookups.
  835. if ($this->ebi_warned == FALSE) {
  836. $this->logMessage(
  837. "A term that belongs to another ontology is used within this " .
  838. "vocabulary. Therefore a lookup will be performed with the EBI Ontology " .
  839. "Lookup Service to retrieve the information for this term. " .
  840. "Please note, that vocabularies with many non-local terms " .
  841. "require remote lookups and these lookups can dramatically " .
  842. "decrease loading time. " ,
  843. ['!vocab' => $this->default_namespace], TRIPAL_WARNING);
  844. $this->ebi_warned = TRUE;
  845. // This ontology may havem ultiple remote terms and that takes a while
  846. // to load so lets change the progress interval down to give
  847. // updates more often.
  848. $this->setInterval(1);
  849. }
  850. $this->logMessage("Performing EBI OLS Lookup for: !id", ['!id' => $id]);
  851. // Get the short name and accession for the term.
  852. $pair = explode(":", $id, 2);
  853. $short_name = $pair[0];
  854. $accession = $pair[1];
  855. // First get the ontology so we can build an IRI for the term
  856. $base_iri = '';
  857. $ontologyID = '';
  858. if (array_key_exists($short_name, $this->baseIRIs)) {
  859. list($ontologyID, $base_iri) = $this->baseIRIs[$short_name];
  860. }
  861. else {
  862. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $short_name;
  863. $response = drupal_http_request($full_url, []);
  864. if (!$response) {
  865. throw new Exception(t('Did not get a response from EBI OLS trying to lookup ontology: !ontology',
  866. ['!ontology' => $short_name]));
  867. }
  868. $ontology_results = drupal_json_decode($response->data);
  869. if ($ontology_results['error']) {
  870. throw new Exception(t('Cannot find the ontology via an EBI OLS lookup: !short_name. ' .
  871. 'EBI Reported: !message. ' .
  872. 'Consider finding the OBO file for this ontology and manually loading it first.',
  873. ['!message' => $ontology_results['message'], '!short_name' => $short_name]));
  874. }
  875. $base_iri = $ontology_results['config']['baseUris'][0];
  876. $ontologyID = $ontology_results['ontologyId'];
  877. $this->baseIRIs[$short_name] = [$ontologyID, $base_iri];
  878. }
  879. // Next get the term.
  880. $iri = urlencode(urlencode($base_iri . $accession));
  881. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontologyID . '/terms/' . $iri;
  882. $response = drupal_http_request($full_url, []);
  883. if(!$response){
  884. throw new Exception(t('Did not get a response from EBI OLS trying to lookup term: !id',
  885. ['!id' => $id]));
  886. }
  887. $results = drupal_json_decode($response->data);
  888. // If EBI sent an error message then throw an error.
  889. if ($results['error']) {
  890. throw new Exception(t('Cannot find the term via an EBI OLS lookup: !term. ' .
  891. 'EBI Reported: !message.' .
  892. 'Consider finding the OBO file for this ontology and manually loading it first.',
  893. ['!message' => $results['message'], '!term' => $id]));
  894. }
  895. // TODO: what do we do if the term is not defined by this ontology?
  896. if ($results['is_defining_ontology'] != 1) {
  897. }
  898. // Make an OBO stanza array as if this term were in the OBO file and
  899. // return it.
  900. $this->logMessage("Found !term in EBI OLS.", ['!term' => $id]);
  901. $stanza = [];
  902. $stanza['id'][] = $id;
  903. $stanza['name'][] = $results['label'];
  904. $stanza['def'][] = $results['def'];
  905. $stanza['namespace'][] = $results['ontology_name'];
  906. $stanza['is_obsolete'][] = $results['is_obsolete'];
  907. $stanza['db_name'][] = $short_name;
  908. $stanza['comment'][] = 'Term obtained using the EBI Ontology Lookup Service.';
  909. if (array_key_exists('subset', $results)) {
  910. $stanza['subset'][] = $results['subset'];
  911. }
  912. // If this term has been replaced then get the new term.
  913. if (array_key_exists('term_replaced_by', $results) and isset($results['term_replaced_by'])) {
  914. $replaced_by = $results['term_replaced_by'];
  915. $replaced_by = preg_replace('/_/', ':', $replaced_by);
  916. $this->logMessage("The term, !term, is replaced by, !replaced",
  917. ['!term' => $id, '!replaced' => $replaced_by]);
  918. // Before we try to look for the replacement term, let's try to find it.
  919. // in our list of cached terms.
  920. if (array_key_exists($replaced_by, $this->termStanzaCache['ids'])) {
  921. $this->logMessage("Found term, !replaced in the term cache.",
  922. ['!term' => $id, '!replaced' => $replaced_by]);
  923. return $this->termStanzaCache['ids'][$id];
  924. }
  925. // Next look in the database.
  926. $rpair = explode(":", $replaced_by, 2);
  927. $found = $this->lookupTerm($rpair[0], $rpair[1]);
  928. if ($found) {
  929. $this->logMessage("Found term, !replaced in the local data store.",
  930. ['!term' => $id, '!replaced' => $replaced_by]);
  931. return $found;
  932. }
  933. // Look for this new term.
  934. $stanza = $this->findEBITerm($replaced_by);
  935. }
  936. return $stanza;
  937. }
  938. /**
  939. * Inserts a new cvterm using the OBO stanza array provided.
  940. *
  941. * The stanza passed to this function should always come from the term cache,
  942. * not directly from the OBO file because the cached terms have been
  943. * updated to include all necessary values. This function also removes
  944. * all properties assocaited with the term so that those can be added
  945. * fresh.
  946. *
  947. * @param $stanza
  948. * An OBO stanza array as returned by getCachedTermStanza().
  949. * @param $is_relationship
  950. * Set to TRUE if this term is a relationship term.
  951. * @param $update_if_exists
  952. * Set to TRUE to update the term if it exists.
  953. *
  954. * @return
  955. * The cvterm ID.
  956. */
  957. private function saveTerm($stanza, $is_relationship = FALSE) {
  958. // Get the term ID.
  959. $id = $stanza['id'][0];
  960. // First check if we've already used this term.
  961. if ($this->used_terms[$id]) {
  962. return $this->used_terms[$id];
  963. }
  964. // Get the term properties.
  965. $id = $stanza['id'][0];
  966. $name = $stanza['name'][0];
  967. $cvname = $stanza['namespace'][0];
  968. $dbname = $stanza['db_name'][0];
  969. $namespace = $stanza['namespace'][0];
  970. // Does this term ID have both a short name and accession? If so, then
  971. // separate out these components, otherwise we will use the id as both
  972. // the id and accession.
  973. $accession = '';
  974. $matches = [];
  975. if (preg_match('/^(.+?):(.*)$/', $id, $matches)) {
  976. $accession = $matches[2];
  977. }
  978. else {
  979. $accession = $id;
  980. }
  981. // Get the definition if there is one.
  982. $definition = '';
  983. if (array_key_exists('definition', $stanza)) {
  984. $definition = preg_replace('/^\"(.*)\"/', '\1', $stanza['def'][0]);
  985. }
  986. // Set the flag if this term is obsolete.
  987. $is_obsolete = 0;
  988. if (array_key_exists('is_obsolete', $stanza)) {
  989. $is_obsolete = $stanza['is_obsolete'][0] == TRUE ? 1 : 0;
  990. }
  991. // Is this term borrowed from another ontology?
  992. $is_borrowed = $this->isTermBorrowed($stanza);
  993. // The cvterm ChadoRecord object.
  994. $cvterm = NULL;
  995. // Get the CV and DB objects.
  996. $cv = $this->all_cvs[$cvname];
  997. $db = $this->all_dbs[$dbname];
  998. // If this is set to TRUE then we should insert the term.
  999. $do_cvterm_insert = FALSE;
  1000. // We need to locate terms using their dbxref. This is because term names
  1001. // can sometimes change, so we don't want to look up the term by it's name.
  1002. // the unique ID which is in the accession will never change.
  1003. $dbxref = new ChadoRecord('dbxref');
  1004. $dbxref->setValues([
  1005. 'db_id' => $db->db_id,
  1006. 'accession' => $accession
  1007. ]);
  1008. if ($dbxref->find()) {
  1009. // Now see if there is a cvterm for this dbxref. A dbxref can only be
  1010. // used one time. If so, then update the term, but only if this term
  1011. // belongs to namespace created by this OBO. We don't want to update
  1012. // terms from other OBOs that may be borrowed by this one.
  1013. $cvterm = new ChadoRecord('cvterm');
  1014. $cvterm->setValues(['dbxref_id' => $dbxref->getID()]);
  1015. if ($cvterm->find()) {
  1016. if (!$is_borrowed) {
  1017. $cvterm->setValue('name', $name);
  1018. $cvterm->setValue('definition', $definition);
  1019. $cvterm->setValue('is_obsolete', $is_obsolete);
  1020. // Case #1: The name of the cvterm with this accession matches
  1021. // the name in the stanza. All is well and we can update.
  1022. if ($cvterm->getValue('name') == $stanza['name'][0]) {
  1023. // We'll do the upate at the end of this large if block.
  1024. }
  1025. // Case #2: The name of this cvterm is different from the stanza.
  1026. else {
  1027. // So the names are different, Does a term exist with
  1028. // the name for this term already? We can't update a term to
  1029. // have the same name as another. It will break the cvterm
  1030. // unique constraints.
  1031. $check_cvterm = new ChadoRecord('cvterm');
  1032. $check_cvterm->setValues(['cv_id' => $cv->cv_id, 'name' => $name]);
  1033. if ($check_cvterm->find()) {
  1034. // Get the accession of this conflicting term and see if it
  1035. // exists in the OBO that's being loaded.
  1036. $check_dbxref = new ChadoRecord('dbxref', $cvterm->getValue('dbxref_id'));
  1037. $check_db = new ChadoRecord('db', $check_dbxref->getValue('db_id'));
  1038. $check_accession = $check_db->getValue('name') . ':' . $check_dbxref->getValue('accession');
  1039. $check_stanza = $this->getCachedTermStanza($check_accession);
  1040. // Case 2a: The other term that currently has the same name is
  1041. // missing in the OBO file (i.e. no stanza). So, that means
  1042. // that this term probably got relgated to an alt_id. We do
  1043. // not want to delete this term because it may be linked to other
  1044. // records. Instead, let's update it's name to let folks know
  1045. // what happend to it and so we can get around the unique
  1046. // constraint. An example of this is the GO:0015881 and
  1047. // GO:1902598 terms where the latter became an alt_id of the
  1048. // first and no longer has its own entry.
  1049. if (!$check_stanza) {
  1050. $check_cvterm->setValue('name', $check_cvterm->getValue('name') . ' (removed from . ' . $check_db->getValue('name') . ')');
  1051. $check_cvterm->update();
  1052. }
  1053. // Case 2b: The other term is in the OBO file (ie. has a stanza).
  1054. // That means that there has been some name swapping between
  1055. // terms. We need to temporarily rename that term so that
  1056. // we don't have a unique constraint violation when we update
  1057. // this one. An example of this is where GO:000425 and
  1058. // GO:0030242 changed names and one was renamed to the previous
  1059. // name of the other.
  1060. else {
  1061. $check_cvterm->setValue('name', $check_cvterm->getValue('name') . ' (term name needs update)');
  1062. $check_cvterm->update();
  1063. }
  1064. }
  1065. // Case 2c: The name has changed but there is no ther term
  1066. // with the same new name. We are good!
  1067. else {
  1068. // Do nothing, let the update for this term occur at the
  1069. // end of the if block.
  1070. }
  1071. }
  1072. // Now update this cvterm record.
  1073. $cvterm->update();
  1074. }
  1075. }
  1076. else {
  1077. $do_cvterm_insert = TRUE;
  1078. }
  1079. }
  1080. else {
  1081. // The dbxref doesn't exist, so let's add it.
  1082. $dbxref->insert();
  1083. $do_cvterm_insert = TRUE;
  1084. }
  1085. if ($do_cvterm_insert) {
  1086. // Before inserting the term let's check to see if it already exists. The
  1087. // cvterm table has two unique constraints, one on the dbxref and another
  1088. // on the name, cv_id and is_obsolete columns. If for some reason the
  1089. // term exists but is assocaited with a different dbxref we'll have
  1090. // a problem. Because the is_obsolete may be set this time, but the
  1091. // term may not have it set from a previous load we will check for the
  1092. // presence of the term without the is_obsolete.
  1093. $cvterm = new ChadoRecord('cvterm');
  1094. $cvterm->setValues([
  1095. 'cv_id' => $cv->cv_id,
  1096. 'name' => $name,
  1097. ]);
  1098. if ($cvterm->find()) {
  1099. // We found the term so that means it's assocaited with a different
  1100. // dbxref. We need to correct it.
  1101. if (!$is_borrowed) {
  1102. $old_dbxref = new ChadoRecord('dbxref');
  1103. $old_dbxref->setValues(['dbxref_id' => $cvterm->getValue('dbxref_id')]);
  1104. $old_dbxref->find();
  1105. $old_db = new ChadoRecord('db');
  1106. $old_db->setValues(['db_id' => $dbxref->getValue('db_id')]);
  1107. $old_db->find();
  1108. $this->logMessage('Correcting misassigned accession: !id => "!name". Previously was: !old_id.',
  1109. ['!id' => $id, '!name' => $name,
  1110. '!old_id' => $old_db->getValue('name') . ':' . $old_dbxref->getValue('accession')]);
  1111. $cvterm->setValue('dbxref_id', $dbxref->getID());
  1112. $cvterm->setValue('definition', $definition);
  1113. $cvterm->setValue('dbxref_id', $dbxref->getID());
  1114. $cvterm->setValue('is_obsolete', $is_obsolete);
  1115. $cvterm->update();
  1116. }
  1117. }
  1118. else {
  1119. // The term doesnt exist, so let's just do our insert.
  1120. $cvterm->setValues([
  1121. 'cv_id' => $cv->cv_id,
  1122. 'name' => $name,
  1123. 'definition' => $definition,
  1124. 'dbxref_id' => $dbxref->getID(),
  1125. 'is_relationshiptype' => $is_relationship,
  1126. 'is_obsolete' => $is_obsolete,
  1127. ]);
  1128. $cvterm->insert();
  1129. }
  1130. }
  1131. // Save the cvterm_id for this term so we don't look it up again.
  1132. $cvterm_id = $cvterm->getID();
  1133. $this->used_terms[$id] = $cvterm_id;
  1134. // Return the cvterm_id.
  1135. return $cvterm_id;
  1136. }
  1137. /**
  1138. * Uses the provided term array to add/update information to Chado about the
  1139. * term including the term, dbxref, synonyms, properties, and relationships.
  1140. *
  1141. * @param $term
  1142. * An array representing the cvterm.
  1143. * @is_relationship
  1144. * Set to 1 if this term is a relationship term
  1145. *
  1146. * @ingroup tripal_obo_loader
  1147. */
  1148. private function processTerm($stanza, $is_relationship = 0) {
  1149. //
  1150. // First things first--save the term.
  1151. //
  1152. // If the term does not exist it is inserted, if it does exist it just
  1153. // retrieves the cvterm_id.
  1154. //
  1155. $cvterm_id = $this->saveTerm($stanza, FALSE);
  1156. $id = $stanza['id'][0];
  1157. // If this term is borrowed from another ontology? If so then we will
  1158. // not update it.
  1159. if ($this->isTermBorrowed($stanza)) {
  1160. return;
  1161. }
  1162. // If this term belongs to this OBO (not borrowed from another OBO) then
  1163. // remove any relationships, properties, xrefs, and synonyms that this
  1164. // term alreadyn has so that they can be re-added.
  1165. $sql = "
  1166. DELETE FROM {cvterm_relationship}
  1167. WHERE subject_id = :cvterm_id
  1168. ";
  1169. chado_query($sql, array(':cvterm_id' => $cvterm_id));
  1170. // If this is an obsolete term then clear out the relationships where
  1171. // this term is the object.
  1172. if (in_array('is_obsolete', $stanza) and $stanza['is_obsolete'] == 'true') {
  1173. $sql = "
  1174. DELETE FROM {cvterm_relationship}
  1175. WHERE object_id = :cvterm_id
  1176. ";
  1177. chado_query($sql, array(':cvterm_id' => $cvterm_id));
  1178. }
  1179. $sql = "
  1180. DELETE FROM {cvtermprop}
  1181. WHERE cvterm_id = :cvterm_id
  1182. ";
  1183. chado_query($sql, array(':cvterm_id' => $cvterm_id));
  1184. $sql = "
  1185. DELETE FROM {cvterm_dbxref}
  1186. WHERE cvterm_id = :cvterm_id
  1187. ";
  1188. chado_query($sql, array(':cvterm_id' => $cvterm_id));
  1189. $sql = "
  1190. DELETE FROM {cvtermsynonym} CVTSYN
  1191. WHERE cvterm_id = :cvterm_id
  1192. ";
  1193. chado_query($sql, array(':cvterm_id' => $cvterm_id));
  1194. // We should never have the problem where we don't have a cvterm_id. The
  1195. // saveTerm() function should always return on. But if for some unkonwn
  1196. // reason we don't have one then fail.
  1197. if (!$cvterm_id) {
  1198. throw new Exception(t('Missing cvterm after saving term: !term',
  1199. ['!term' => print_r($stanza, TRUE)]));
  1200. }
  1201. //
  1202. // Handle: alt_id
  1203. //
  1204. if (array_key_exists('alt_id', $stanza)) {
  1205. foreach ($stanza['alt_id'] as $alt_id) {
  1206. $this->addAltID($id, $cvterm_id, $alt_id);
  1207. }
  1208. }
  1209. //
  1210. // Handle: synonym
  1211. //
  1212. if (array_key_exists('synonym', $stanza)) {
  1213. foreach ($stanza['synonym'] as $synonym) {
  1214. $this->addSynonym($id, $cvterm_id, $synonym);
  1215. }
  1216. }
  1217. //
  1218. // Handle: exact_synonym
  1219. //
  1220. if (array_key_exists('exact_synonym', $stanza)) {
  1221. foreach ($stanza['exact_synonym'] as $synonym) {
  1222. $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
  1223. $this->addSynonym($id, $cvterm_id, $fixed);
  1224. }
  1225. }
  1226. //
  1227. // Handle: narrow_synonym
  1228. //
  1229. if (array_key_exists('narrow_synonym', $stanza)) {
  1230. foreach ($stanza['narrow_synonym'] as $synonym) {
  1231. $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
  1232. $this->addSynonym($id, $cvterm_id, $fixed);
  1233. }
  1234. }
  1235. //
  1236. // Handle: broad_synonym
  1237. //
  1238. if (array_key_exists('broad_synonym', $stanza)) {
  1239. foreach ($stanza['broad_synonym'] as $synonym) {
  1240. $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
  1241. $this->addSynonym($id, $cvterm_id, $fixed);
  1242. }
  1243. }
  1244. //
  1245. // Handle: comment
  1246. //
  1247. if (array_key_exists('comment', $stanza)) {
  1248. $comments = $stanza['comment'];
  1249. foreach ($comments as $rank => $comment) {
  1250. $this->addComment($id, $cvterm_id, $comment, $rank);
  1251. }
  1252. }
  1253. //
  1254. // Handle: xref
  1255. //
  1256. if (array_key_exists('xref', $stanza)) {
  1257. foreach ($stanza['xref'] as $xref) {
  1258. $this->addXref($id, $cvterm_id, $xref);
  1259. }
  1260. }
  1261. //
  1262. // Handle: xref_analog
  1263. //
  1264. if (array_key_exists('xref_analog', $stanza)) {
  1265. foreach ($stanza['xref_analog'] as $xref) {
  1266. $this->addXref($id, $cvterm_id, $xref);
  1267. }
  1268. }
  1269. //
  1270. // Handle: xref_unk
  1271. //
  1272. if (array_key_exists('xref_unk', $stanza)) {
  1273. foreach ($stanza['xref_unk'] as $xref) {
  1274. $this->addXref($id, $cvterm_id, $xref);
  1275. }
  1276. }
  1277. //
  1278. // Handle: subset
  1279. //
  1280. if (array_key_exists('subset', $stanza)) {
  1281. foreach ($stanza['subset'] as $subset) {
  1282. $this->addSubset($id, $cvterm_id, $subset);
  1283. }
  1284. }
  1285. //
  1286. // Handle: is_a
  1287. //
  1288. if (array_key_exists('is_a', $stanza)) {
  1289. foreach ($stanza['is_a'] as $is_a) {
  1290. $this->addRelationship($id, $cvterm_id, 'is_a', $is_a);
  1291. }
  1292. }
  1293. //
  1294. // Handle: relationship
  1295. //
  1296. if (array_key_exists('relationship', $stanza)) {
  1297. foreach ($stanza['relationship'] as $value) {
  1298. $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  1299. $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  1300. $this->addRelationship($id, $cvterm_id, $rel, $object);
  1301. }
  1302. }
  1303. /**
  1304. * The following properties are currently unsupported:
  1305. *
  1306. * - intersection_of
  1307. * - union_of
  1308. * - disjoint_from
  1309. * - replaced_by
  1310. * - consider
  1311. * - use_term
  1312. * - builtin
  1313. * - is_anonymous
  1314. *
  1315. */
  1316. }
  1317. /**
  1318. * Adds a cvterm relationship
  1319. *
  1320. * @param $cvterm_id
  1321. * A cvterm_id of the term to which the relationship will be added.
  1322. * @param $rel_id
  1323. * The relationship term ID
  1324. * @param $obj_id
  1325. * The relationship object term ID.
  1326. *
  1327. * @ingroup tripal_obo_loader
  1328. */
  1329. private function addRelationship($id, $cvterm_id, $rel_id, $obj_id) {
  1330. // Get the cached terms for both the relationship and the object. They
  1331. // shold be there, but just in case something went wrong, we'll throw
  1332. // an exception if we can't find them.
  1333. $rel_stanza = $this->getCachedTermStanza($rel_id);
  1334. if (!$rel_stanza) {
  1335. throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
  1336. 'The term, !rel, is not in the term cache.',
  1337. ['!source' => $id, '!rel' => $rel_id, '!name' => $obj_id]));
  1338. }
  1339. $rel_cvterm_id = $this->saveTerm($rel_stanza, TRUE);
  1340. // Make sure the object term exists in the cache.
  1341. $obj_stanza = $this->getCachedTermStanza($obj_id);
  1342. if (!$obj_stanza) {
  1343. throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
  1344. 'The term, !object, is not in the term cache.',
  1345. ['!source' => $id, '!rel' => $rel_id, '!object' => $obj_id]));
  1346. }
  1347. $obj_cvterm_id = $this->saveTerm($obj_stanza);
  1348. // Add the cvterm_relationship.
  1349. $cvterm_relationship = new ChadoRecord('cvterm_relationship');
  1350. $cvterm_relationship->setValues([
  1351. 'type_id' => $rel_cvterm_id,
  1352. 'subject_id' => $cvterm_id,
  1353. 'object_id' => $obj_cvterm_id
  1354. ]);
  1355. // If the insert fails then catch the error and generate a more meaningful
  1356. // message that helps with debugging.
  1357. try {
  1358. $cvterm_relationship->insert();
  1359. }
  1360. catch (Exception $e) {
  1361. throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
  1362. 'ERROR: !error.',
  1363. ['!source' => $id, '!rel' => $rel_id, '!object' => $obj_id, '!error' => $e->getMessage()]));
  1364. }
  1365. }
  1366. /**
  1367. * Retrieves the term array from the temp loading table for a given term id.
  1368. *
  1369. * @param id
  1370. * The id of the term to retrieve
  1371. *
  1372. * @ingroup tripal_obo_loader
  1373. */
  1374. private function getCachedTermStanza($id) {
  1375. if ($this->cache_type == 'table') {
  1376. $values = array('id' => $id);
  1377. $result = chado_select_record('tripal_obo_temp', array('stanza'), $values);
  1378. if (count($result) == 0) {
  1379. return FALSE;
  1380. }
  1381. return unserialize(base64_decode($result['stanza']));
  1382. }
  1383. if (array_key_exists($id, $this->termStanzaCache['ids'])) {
  1384. return $this->termStanzaCache['ids'][$id];
  1385. }
  1386. else {
  1387. return FALSE;
  1388. }
  1389. }
  1390. /**
  1391. * Using the term's short-name and accession try to find it in Chado.
  1392. *
  1393. * @param $short_name
  1394. * The term's ontolgy prefix (database short name)
  1395. * @param $accession
  1396. * The term's accession.
  1397. * @return array|NULL
  1398. */
  1399. private function lookupTerm($short_name, $accession) {
  1400. // Does the database already exist?
  1401. if (!array_key_exists($short_name, $this->all_dbs)) {
  1402. return NULL;
  1403. }
  1404. $db = $this->all_dbs[$short_name];
  1405. // Check if the dbxref exists.
  1406. $dbxref = new ChadoRecord('dbxref');
  1407. $dbxref->setValues([
  1408. 'db_id' => $db->db_id,
  1409. 'accession' => $accession,
  1410. ]);
  1411. if (!$dbxref->find()) {
  1412. return NULL;
  1413. }
  1414. // If the dbxref exists then see if it has a corresponding cvterm.
  1415. $cvterm = new ChadoRecord('cvterm');
  1416. $cvterm->setValues(['dbxref_id' => $dbxref->getID()]);
  1417. if (!$cvterm->find()) {
  1418. return NULL;
  1419. }
  1420. // Get the CV for this term.
  1421. $cv = new ChadoRecord('cv');
  1422. $cv->setValues(['cv_id' => $cvterm->getValue('cv_id')]);
  1423. $cv->find();
  1424. // Create a new stanza using the values of this cvterm.
  1425. $stanza = [];
  1426. $stanza['id'][0] = $short_name . ':' . $accession;
  1427. $stanza['name'][0] = $cvterm->getValue('name');
  1428. $stanza['def'][0] = $cvterm->getValue('definition');
  1429. $stanza['namespace'][0] = $cv->getValue('name');
  1430. $stanza['is_obsolete'][] = $cvterm->getValue('is_obsolete');
  1431. $stanza['db_name'][] = $db->name;
  1432. $stanza['cv_name'][] = $cv->getValue('name');
  1433. return $stanza;
  1434. }
  1435. /**
  1436. * Adds a term stanza from the OBO file to the cache for easier lookup.
  1437. *
  1438. * @param $stanza
  1439. * The stanza from the OBO file for the term.
  1440. * @throws Exception
  1441. */
  1442. private function cacheTermStanza($stanza, $type) {
  1443. // Make sure we have defaults.
  1444. if (!$this->default_namespace) {
  1445. throw new Exception('Cannot cache terms without a default CV.' . print_r($stanza, TRUE));
  1446. }
  1447. if (!$this->default_db) {
  1448. throw new Exception('Cannot cache terms without a default DB.' . print_r($stanza, TRUE));
  1449. }
  1450. $id = $stanza['id'][0];
  1451. // First check if this term is already in the cache, if so then skip it.
  1452. if ($this->getCachedTermStanza($id)) {
  1453. return;
  1454. }
  1455. // Does this term have a database short name prefix in the ID (accession)?
  1456. // If not then we'll add the default CV as the namespace. If it does and
  1457. // the short name is not the default for this vocabulary then we'll look
  1458. // it up.
  1459. $matches = [];
  1460. if (preg_match('/^(.+):(.+)$/', $id, $matches)) {
  1461. $short_name = $matches[1];
  1462. $accession = $matches[2];
  1463. // If the term is borrowed then let's try to deal with it.
  1464. if ($short_name != $this->default_db) {
  1465. // First try to lookup the term and replace the stanza with the updated
  1466. // details.
  1467. $found = $this->lookupTerm($short_name, $accession);
  1468. if ($found) {
  1469. $stanza = $found;
  1470. }
  1471. // If we can't find the term in the database then do an EBI lookup.
  1472. else {
  1473. $stanza = $this->findEBITerm($id);
  1474. // Make sure the DBs and CVs exist and are added to our cache.
  1475. $this->addDB($stanza['db_name'][0]);
  1476. $this->addCV($stanza['namespace'][0]);
  1477. }
  1478. }
  1479. // If the term belongs to this OBO then let's set the 'db_name'.
  1480. else {
  1481. if (!array_key_exists('namespace', $stanza)) {
  1482. $stanza['namespace'][] = $this->default_namespace;
  1483. }
  1484. $stanza['db_name'][] = $short_name;
  1485. }
  1486. // Make sure the db for this term is added to Chado. If it already is
  1487. // then this function won't re-add it.
  1488. $this->addDB($short_name);
  1489. }
  1490. // If there is no DB short name prefix for the id.
  1491. else {
  1492. if (!array_key_exists('namespace', $stanza)) {
  1493. $stanza['namespace'][] = $this->default_namespace;
  1494. }
  1495. $stanza['db_name'][] = $this->default_db;
  1496. }
  1497. if ($this->cache_type == 'table') {
  1498. // Add the term to the temp table.
  1499. $values = [
  1500. 'id' => $id,
  1501. 'stanza' => base64_encode(serialize($stanza)),
  1502. 'type' => $type,
  1503. ];
  1504. $success = chado_insert_record('tripal_obo_temp', $values);
  1505. if (!$success) {
  1506. throw new Exception("Cannot insert stanza into temporary table.");
  1507. }
  1508. return;
  1509. }
  1510. // The is_a field can have constructs like this: {is_inferred="true"}
  1511. // We need to remove those if they exist.
  1512. if (array_key_exists('is_a', $stanza)) {
  1513. foreach ($stanza['is_a'] as $index => $is_a) {
  1514. $stanza['is_a'][$index] = trim(preg_replace('/\{.+?\}/', '', $is_a));
  1515. }
  1516. }
  1517. if (array_key_exists('relationship', $stanza)) {
  1518. foreach ($stanza['relationship'] as $index => $relationship) {
  1519. $stanza['relationship'][$index] = trim(preg_replace('/\{.+?\}/', '', $relationship));
  1520. }
  1521. }
  1522. // Clean up any synonym definitions. We only handle the synonym in
  1523. // quotes and the type.
  1524. if (array_key_exists('synonym', $stanza)) {
  1525. foreach ($stanza['synonym'] as $index => $synonym) {
  1526. if (preg_match('/\"(.*?)\".*(EXACT|NARROW|BROAD|RELATED)/', $synonym, $matches)) {
  1527. $stanza['synonym'][$index] = '"' . $matches[1] . '" ' . $matches[2];
  1528. }
  1529. }
  1530. }
  1531. // Now before saving, remove any duplicates. Sometimes the OBOs have
  1532. // the same item duplicated in the stanza multiple times. This will
  1533. // result in duplicate contraint violations in the tables. We can either
  1534. // check on every insert if the record exists increasing loading time or
  1535. // remove duplicates here.
  1536. foreach ($stanza as $key => $values) {
  1537. $stanza[$key] = array_unique($values);
  1538. }
  1539. $this->termStanzaCache['ids'][$id] = $stanza;
  1540. $this->termStanzaCache['count'][$type]++;
  1541. $this->termStanzaCache['types'][$type][] = $id;
  1542. }
  1543. /**
  1544. * Returns the size of a given term type from the cache.
  1545. * @param $type
  1546. * The term type: Typedef, Term, etc.
  1547. */
  1548. private function getCacheSize($type) {
  1549. if ($this->cache_type == 'table') {
  1550. $sql = "
  1551. SELECT count(*) as num_terms
  1552. FROM {tripal_obo_temp}
  1553. WHERE type = :type
  1554. ";
  1555. $result = chado_query($sql, [':type' => $type])->fetchObject();
  1556. return $result->num_terms;
  1557. }
  1558. return $this->termStanzaCache['count'][$type];
  1559. }
  1560. /**
  1561. * Retrieves all term IDs for a given type.
  1562. *
  1563. * If the cache is using the tripal_obo_temp table then it
  1564. * returns an iterable Database handle.
  1565. */
  1566. private function getCachedTermStanzas($type) {
  1567. if ($this->cache_type == 'table') {
  1568. $sql = "SELECT id FROM {tripal_obo_temp} WHERE type = 'Typedef' ";
  1569. $typedefs = chado_query($sql);
  1570. return $typedefs;
  1571. }
  1572. return $this->termStanzaCache['types'][$type];
  1573. }
  1574. /**
  1575. * Clear's the term cache.
  1576. */
  1577. private function clearTermStanzaCache() {
  1578. if ($this->cache_type == 'table') {
  1579. $sql = "DELETE FROM {tripal_obo_temp}";
  1580. chado_query($sql);
  1581. return;
  1582. }
  1583. $this->termStanzaCache = [
  1584. 'ids' => [],
  1585. 'count' => [
  1586. 'Typedef' => 0,
  1587. 'Term' => 0,
  1588. 'Instance' => 0,
  1589. ],
  1590. 'types' => [
  1591. 'Typedef' => [],
  1592. 'Term' => [],
  1593. 'Instance' => [],
  1594. ],
  1595. ];
  1596. }
  1597. /**
  1598. * Adds the synonyms to a term
  1599. *
  1600. * @param $cvterm_id
  1601. * The cvterm_id of the term to which the synonym will be added.
  1602. * @param $synonym
  1603. * The value of the 'synonym' line of the term stanza.
  1604. *
  1605. * @ingroup tripal_obo_loader
  1606. */
  1607. private function addSynonym($id, $cvterm_id, $synonym) {
  1608. $def = $synonym;
  1609. $syn_type = '';
  1610. // Separate out the synonym definition and type (e.g. EXACT).
  1611. $matches = [];
  1612. if (preg_match('/\"(.*?)\".*(EXACT|NARROW|BROAD|RELATED)/', $synonym, $matches)) {
  1613. $def = $matches[1];
  1614. $syn_type = $matches[2];
  1615. }
  1616. // Get the syn type cvterm.
  1617. if (!$syn_type) {
  1618. $syn_type = 'exact';
  1619. }
  1620. $syn_type = $this->syn_types[$syn_type];
  1621. // Now save the new synonym.
  1622. $cvtermsynonym = new ChadoRecord('cvtermsynonym');
  1623. $cvtermsynonym->setValues([
  1624. 'cvterm_id' => $cvterm_id,
  1625. 'synonym' => $def,
  1626. 'type_id' => $syn_type->cvterm_id
  1627. ]);
  1628. // If the insert fails then catch the error and generate a more meaningful
  1629. // message that helps with debugging.
  1630. try {
  1631. $cvtermsynonym->insert();
  1632. }
  1633. catch (Exception $e) {
  1634. throw new Exception(t('Cannot add synonym, "!synonym" to term: !id. ' .
  1635. 'ERROR: !error.',
  1636. ['!synonym' => $def, '!id' => $id, '!error' => $e->getMessage()]));
  1637. }
  1638. }
  1639. /**
  1640. * Parse the OBO file and populate the templ loading table
  1641. *
  1642. * @param $file
  1643. * The path on the file system where the ontology can be found
  1644. * @param $header
  1645. * An array passed by reference that will be populated with the header
  1646. * information from the OBO file
  1647. *
  1648. * @ingroup tripal_obo_loader
  1649. */
  1650. private function parse($obo_file, &$header) {
  1651. // Set to 1 if we are in the top header lines of the file.
  1652. $in_header = TRUE;
  1653. // Holds the full stanza for the term.
  1654. $stanza = array();
  1655. // Holds the default database for the term.
  1656. $db_short_name = '';
  1657. $line_num = 0;
  1658. $num_read = 0;
  1659. // The type of term: Typedef or Term (inside the [] brackets]
  1660. $type = '';
  1661. $filesize = filesize($obo_file);
  1662. $this->setTotalItems($filesize);
  1663. $this->setItemsHandled(0);
  1664. $this->setInterval(5);
  1665. // iterate through the lines in the OBO file and parse the stanzas
  1666. $fh = fopen($obo_file, 'r');
  1667. while ($line = fgets($fh)) {
  1668. $line_num++;
  1669. $size = drupal_strlen($line);
  1670. $num_read += $size;
  1671. $line = trim($line);
  1672. $this->setItemsHandled($num_read);
  1673. // remove newlines
  1674. $line = rtrim($line);
  1675. // remove any special characters that may be hiding
  1676. $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
  1677. // skip empty lines
  1678. if (strcmp($line, '') == 0) {
  1679. continue;
  1680. }
  1681. // Remove comments from end of lines.
  1682. $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);
  1683. // At the first stanza we're out of header.
  1684. if (preg_match('/^\s*\[/', $line)) {
  1685. // After parsing the header we need to get information about this OBO.
  1686. if ($in_header == TRUE) {
  1687. $this->setDefaults($header);
  1688. $in_header = FALSE;
  1689. }
  1690. // Store the stanza we just finished reading.
  1691. if (sizeof($stanza) > 0) {
  1692. // If this term has a namespace then we want to keep track of it.
  1693. if (array_key_exists('namespace', $stanza)) {
  1694. $namespace = $stanza['namespace'][0];
  1695. $cv = $this->all_cvs[$namespace];
  1696. $this->obo_namespaces[$namespace] = $cv->cv_id;
  1697. }
  1698. $this->cacheTermStanza($stanza, $type);
  1699. }
  1700. // Get the stanza type: Term, Typedef or Instance
  1701. $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
  1702. // start fresh with a new array
  1703. $stanza = array();
  1704. continue;
  1705. }
  1706. // break apart the line into the tag and value but ignore any escaped colons
  1707. preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
  1708. $pair = explode(":", $line, 2);
  1709. $tag = $pair[0];
  1710. $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
  1711. // if this is the ID line then get the database short name from the ID.
  1712. $matches = array();
  1713. if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
  1714. $db_short_name = $matches[1];
  1715. }
  1716. $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
  1717. $value = preg_replace("/\|-\|-\|/", "\:", $value);
  1718. if ($in_header) {
  1719. if (!array_key_exists($tag, $header)) {
  1720. $header[$tag] = array();
  1721. }
  1722. $header[$tag][] = $value;
  1723. }
  1724. else {
  1725. if (!array_key_exists($tag, $stanza)) {
  1726. $stanza[$tag] = array();
  1727. }
  1728. $stanza[$tag][] = $value;
  1729. }
  1730. }
  1731. // now add the last term in the file
  1732. if (sizeof($stanza) > 0) {
  1733. // If this term has a namespace then we want to keep track of it.
  1734. if (array_key_exists('namespace', $stanza)) {
  1735. $namespace = $stanza['namespace'][0];
  1736. $cv = $this->all_cvs[$namespace];
  1737. $this->obo_namespaces[$namespace] = $cv->cv_id;
  1738. }
  1739. $this->cacheTermStanza($stanza, $type);
  1740. $this->setItemsHandled($num_read);
  1741. }
  1742. // Make sure there are CV records for all namespaces.
  1743. $message = t('Found the following namespaces: !namespaces.',
  1744. ['!namespaces' => implode(', ', array_keys($this->obo_namespaces))]);
  1745. foreach ($this->obo_namespaces as $namespace => $cv_id) {
  1746. $this->addCV($namespace);
  1747. }
  1748. $this->logMessage($message);
  1749. }
  1750. /**
  1751. * Iterates through all of the cached terms and caches any relationships
  1752. */
  1753. private function cacheRelationships() {
  1754. // Now that we have all of the terms parsed and loaded into the cache,
  1755. // lets run through them one more time cache any terms in relationships
  1756. // as well.
  1757. $terms = $this->getCachedTermStanzas('Term');
  1758. $count = $this->getCacheSize('Term');
  1759. $this->setTotalItems($count);
  1760. $this->setItemsHandled(0);
  1761. $this->setInterval(25);
  1762. // Iterate through the terms.
  1763. $i = 1;
  1764. foreach ($terms as $t) {
  1765. // TODO: it would be better if we had a term iterator so that we
  1766. // don't have to distinguish here between the table vs memory cache type.
  1767. if ($this->cache_type == 'table') {
  1768. $stanza = unserialize(base64_decode($t->stanza));
  1769. }
  1770. else {
  1771. $stanza = $this->termStanzaCache['ids'][$t];
  1772. }
  1773. // Check if this stanza has an is_a relationship that needs lookup.
  1774. if (array_key_exists('is_a', $stanza)) {
  1775. foreach ($stanza['is_a'] as $object_term) {
  1776. $rstanza = [];
  1777. $rstanza['id'][] = $object_term;
  1778. $this->cacheTermStanza($rstanza, 'Term');
  1779. }
  1780. }
  1781. // Check if this stanza has any additional relationships for lookup.
  1782. if (array_key_exists('relationship', $stanza)) {
  1783. foreach ($stanza['relationship'] as $value) {
  1784. // Get the relationship term and the object term
  1785. $rel_term = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
  1786. $object_term = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
  1787. $rstanza = [];
  1788. $rstanza['id'][] = $rel_term;
  1789. $this->cacheTermStanza($rstanza, 'Typedef');
  1790. $rstanza = [];
  1791. $rstanza['id'][] = $object_term;
  1792. $this->cacheTermStanza($rstanza, 'Term');
  1793. }
  1794. }
  1795. }
  1796. $this->setItemsHandled($i++);
  1797. // Last of all, we need to add the "is_a" relationship It's part of the
  1798. // OBO specification as a built-in relationship but not all vocabularies
  1799. // include that term.
  1800. if (!$this->getCachedTermStanza('is_a')) {
  1801. $stanza = [];
  1802. $stanza['id'][] = 'is_a';
  1803. $stanza['name'][] = 'is_a';
  1804. $stanza['namespace'][] = $this->default_namespace;
  1805. $stanza['db_name'][] = $this->default_db;
  1806. $this->cacheTermStanza($stanza, 'Typedef');
  1807. }
  1808. }
  1809. /**
  1810. * Adds a property to the cvterm indicating it belongs to a subset.
  1811. * @param $cvterm_id
  1812. * The cvterm_id of the term to which the subset will be added.
  1813. * @param $subset
  1814. * The name of the subset.
  1815. */
  1816. private function addSubset($id, $cvterm_id, $subset) {
  1817. $cvtermprop = new ChadoRecord('cvtermprop');
  1818. $cvtermprop->setValues([
  1819. 'cvterm_id' => $cvterm_id,
  1820. 'type_id' => $this->used_terms['NCIT:C25693'],
  1821. 'value' => $subset
  1822. ]);
  1823. // If the insert fails then catch the error and generate a more meaningful
  1824. // message that helps with debugging.
  1825. try {
  1826. $cvtermprop->insert();
  1827. }
  1828. catch (Exception $e) {
  1829. throw new Exception(t('Cannot add subset, "!subset" to term: !id. ' .
  1830. 'ERROR: !error.',
  1831. ['!subset' => $subset, '!id' => $id, '!error' => $e->getMessage()]));
  1832. }
  1833. }
  1834. /**
  1835. * Adds a database to Chado if it doesn't exist.
  1836. *
  1837. * @param $dbname
  1838. * The name of the database to add.
  1839. *
  1840. * @return
  1841. * A Chado database object.
  1842. */
  1843. private function addDB($dbname) {
  1844. // Add the database if it doesn't exist.
  1845. $db = NULL;
  1846. if (array_key_exists($dbname, $this->all_dbs)) {
  1847. $db = $this->all_dbs[$dbname];
  1848. }
  1849. else {
  1850. // If it's not in the cache we can assume it doesn't exist and insert.
  1851. $db = new ChadoRecord('db');
  1852. $db->setValues(['name' => $dbname]);
  1853. $db->insert();
  1854. $db = (object) $db->getValues();
  1855. $this->all_dbs[$dbname] = $db;
  1856. }
  1857. return $db;
  1858. }
  1859. /**
  1860. * Adds a vocabulary to Chado if it doesn't exist.
  1861. *
  1862. * @param $cvname
  1863. * The name of the vocabulary to add.
  1864. *
  1865. * @return
  1866. * A Chado cv object.
  1867. */
  1868. private function addCV($cvname) {
  1869. // TODO: we need to get the description and title from EBI for these
  1870. // ontology so that we can put something in the proper fields when
  1871. // adding a new CV or DB.
  1872. // Add the CV record if it doesn't exist.
  1873. $cv = NULL;
  1874. if (array_key_exists($cvname, $this->all_cvs)) {
  1875. $cv = $this->all_dbs[$cvname];
  1876. }
  1877. else {
  1878. // If it's not in the cache we can assume it doesn't exist and insert.
  1879. $cv = new ChadoRecord('cv');
  1880. $cv->setValues(['name' => $cvname]);
  1881. $cv->insert();
  1882. $cv = (object) $cv->getValues();
  1883. $this->all_cvs[$cvname] = $cv;
  1884. $this->obo_namespaces[$cvname] = $cv->cv_id;
  1885. }
  1886. return $cv;
  1887. }
  1888. /**
  1889. * Indicates if the term belongs to this OBO or if it was borrowed
  1890. * .
  1891. * @param $stanza
  1892. */
  1893. private function isTermBorrowed($stanza) {
  1894. $namespace = $stanza['namespace'][0];
  1895. if (array_key_exists($namespace, $this->obo_namespaces)) {
  1896. return FALSE;
  1897. }
  1898. return TRUE;
  1899. }
  1900. /**
  1901. * Adds an alternative ID
  1902. *
  1903. * @param $cvterm_id
  1904. * The cvterm_id of the term to which the synonym will be added.
  1905. * @param $alt_id
  1906. * The cross refernce. It should be of the form from the OBO specification
  1907. *
  1908. * @ingroup tripal_obo_loader
  1909. */
  1910. private function addAltID($id, $cvterm_id, $alt_id) {
  1911. $dbname = '';
  1912. $accession = '';
  1913. $matches = [];
  1914. if (preg_match('/^(.+?):(.*)$/', $alt_id, $matches)) {
  1915. $dbname = $matches[1];
  1916. $accession = $matches[2];
  1917. }
  1918. if (!$accession) {
  1919. throw new Exception("Cannot add an Alt ID without an accession: '$alt_id'");
  1920. }
  1921. // Add the database if it doesn't exist.
  1922. $db = $this->addDB($dbname);
  1923. $db_id = $db->db_id;
  1924. // Now add the dbxref if it doesn't alredy exist
  1925. $dbxref = new ChadoRecord('dbxref');
  1926. $dbxref->setValues([
  1927. 'db_id' => $db_id,
  1928. 'accession' => $accession
  1929. ]);
  1930. if (!$dbxref->find()) {
  1931. $dbxref->insert();
  1932. }
  1933. // Now add the cvterm_dbxref record.
  1934. $cvterm_dbxref = new ChadoRecord('cvterm_dbxref');
  1935. $cvterm_dbxref->setValues([
  1936. 'cvterm_id' => $cvterm_id,
  1937. 'dbxref_id' => $dbxref->getID(),
  1938. ]);
  1939. $cvterm_dbxref->insert();
  1940. }
  1941. /**
  1942. * Adds a database reference to a cvterm
  1943. *
  1944. * @param $cvterm_id
  1945. * The cvterm_id of the term to which the synonym will be added.
  1946. * @param xref
  1947. * The cross refernce. It should be of the form from the OBO specification
  1948. *
  1949. * @ingroup tripal_obo_loader
  1950. */
  1951. private function addXref($id, $cvterm_id, $xref) {
  1952. $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
  1953. $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
  1954. $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
  1955. $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
  1956. if (!$accession) {
  1957. throw new Exception("Cannot add an xref without an accession: '$xref'");
  1958. }
  1959. // If the xref is a database link then skip those for now.
  1960. if (strcmp($dbname, 'http') == 0) {
  1961. return;
  1962. }
  1963. // Add the database if it doesn't exist.
  1964. $db = $this->addDB($dbname);
  1965. $db_id = $db->db_id;
  1966. // Now add the dbxref if it doesn't alredy exist
  1967. $dbxref = new ChadoRecord('dbxref');
  1968. $dbxref->setValues([
  1969. 'db_id' => $db_id,
  1970. 'accession' => $accession
  1971. ]);
  1972. if (!$dbxref->find()) {
  1973. $dbxref->insert();
  1974. }
  1975. // Now add the cvterm_dbxref record.
  1976. $cvterm_dbxref = new ChadoRecord('cvterm_dbxref');
  1977. $cvterm_dbxref->setValues([
  1978. 'cvterm_id' => $cvterm_id,
  1979. 'dbxref_id' => $dbxref->getID(),
  1980. ]);
  1981. $cvterm_dbxref->insert();
  1982. }
  1983. /**
  1984. * Adds a comment to a cvterm.
  1985. *
  1986. * @param $cvterm_id
  1987. * A cvterm_id of the term to which properties will be added
  1988. * @param $comment
  1989. * The comment to add to the cvterm.
  1990. * @param rank
  1991. * The rank of the comment
  1992. *
  1993. * @ingroup tripal_obo_loader
  1994. */
  1995. private function addComment($id, $cvterm_id, $comment, $rank) {
  1996. // Get the comment type id.
  1997. $comment_type_id = $this->used_terms['rdfs:comment'];
  1998. // Add the comment as a property of the cvterm.
  1999. $cvtermprop = new ChadoRecord('cvtermprop');
  2000. $cvtermprop->setValues([
  2001. 'cvterm_id' => $cvterm_id,
  2002. 'type_id' => $comment_type_id,
  2003. 'value' => $comment,
  2004. 'rank' => $rank,
  2005. ]);
  2006. // If the insert fails then catch the error and generate a more meaningful
  2007. // message that helps with debugging.
  2008. try {
  2009. $cvtermprop->insert();
  2010. }
  2011. catch (Exception $e) {
  2012. throw new Exception(t('Cannot add comment, "!comment" to term: !id. ' .
  2013. 'ERROR: !error.',
  2014. ['!comment' => $comment, '!id' => $id, '!error' => $e->getMessage()]));
  2015. }
  2016. }
  2017. /**
  2018. * API call to Ontology Lookup Service provided by
  2019. * https://www.ebi.ac.uk/ols/docs/api#resources-terms
  2020. *
  2021. * @param accession
  2022. * Accession term for query
  2023. * @param type_of_search
  2024. * Either ontology, term, query, or query-non-local
  2025. *
  2026. * @ingroup tripal_obo_loader
  2027. */
  2028. private function oboEbiLookup($accession, $type_of_search) {
  2029. //Grab just the ontology from the $accession.
  2030. $parts = explode(':', $accession);
  2031. $ontology = strtolower($parts[0]);
  2032. $ontology = preg_replace('/\s+/', '', $ontology);
  2033. if ($type_of_search == 'ontology') {
  2034. $options = array();
  2035. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology;
  2036. $response = drupal_http_request($full_url, $options);
  2037. if(!empty($response)){
  2038. $response = drupal_json_decode($response->data);
  2039. }
  2040. }
  2041. elseif ($type_of_search == 'term') {
  2042. //The IRI of the terms, this value must be double URL encoded
  2043. $iri = urlencode(urlencode("http://purl.obolibrary.org/obo/" . str_replace(':' , '_', $accession)));
  2044. $options = array();
  2045. $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology . '/' . 'terms/' . $iri;
  2046. $response = drupal_http_request($full_url, $options);
  2047. if(!empty($response)){
  2048. $response = drupal_json_decode($response->data);
  2049. }
  2050. }
  2051. elseif($type_of_search == 'query') {
  2052. $options = array();
  2053. $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id&local=true';
  2054. $response = drupal_http_request($full_url, $options);
  2055. if(!empty($response)){
  2056. $response = drupal_json_decode($response->data);
  2057. }
  2058. }
  2059. elseif($type_of_search == 'query-non-local') {
  2060. $options = array();
  2061. $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id';
  2062. $response = drupal_http_request($full_url, $options);
  2063. if(!empty($response)){
  2064. $response = drupal_json_decode($response->data);
  2065. }
  2066. }
  2067. return $response;
  2068. }
  2069. }
  2070. /**
  2071. * Ajax callback for the OBOImporter::form() function.
  2072. */
  2073. function tripal_cv_obo_form_ajax_callback($form, $form_state) {
  2074. return $form['obo_existing'];
  2075. }