12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432 |
- <?php
- class OBOImporter extends TripalImporter {
- // --------------------------------------------------------------------------
- // EDITABLE STATIC CONSTANTS
- //
- // The following constants SHOULD be set for each descendent class. They are
- // used by the static functions to provide information to Drupal about
- // the field and it's default widget and formatter.
- // --------------------------------------------------------------------------
- /**
- * The name of this loader. This name will be presented to the site
- * user.
- */
- public static $name = 'OBO Vocabulary Loader';
- /**
- * The machine name for this loader. This name will be used to construct
- * the URL for the loader.
- */
- public static $machine_name = 'chado_obo_loader';
- /**
- * A brief description for this loader. This description will be
- * presented to the site user.
- */
- public static $description = 'Import vocabularies and terms in OBO format.';
- /**
- * An array containing the extensions of allowed file types.
- */
- public static $file_types = array('obo');
- /**
- * Provides information to the user about the file upload. Typically this
- * may include a description of the file types allowed.
- */
- public static $upload_description = 'Please provide the details for importing a new OBO file. The file must have a .obo extension.';
- /**
- * The title that should appear above the upload button.
- */
- public static $upload_title = 'New OBO File';
- /**
- * If the loader should require an analysis record. To maintain provenance
- * we should always indiate where the data we are uploading comes from.
- * The method that Tripal attempts to use for this by associating upload files
- * with an analysis record. The analysis record provides the details for
- * how the file was created or obtained. Set this to FALSE if the loader
- * should not require an analysis when loading. if $use_analysis is set to
- * true then the form values will have an 'analysis_id' key in the $form_state
- * array on submitted forms.
- */
- public static $use_analysis = FALSE;
- /**
- * If the $use_analysis value is set above then this value indicates if the
- * analysis should be required.
- */
- public static $require_analysis = TRUE;
- /**
- * Text that should appear on the button at the bottom of the importer
- * form.
- */
- public static $button_text = 'Import OBO File';
- /**
- * Indicates the methods that the file uploader will support.
- */
- public static $methods = array(
- // Allow the user to upload a file to the server.
- 'file_upload' => FALSE,
- // Allow the user to provide the path on the Tripal server for the file.
- 'file_local' => FALSE,
- // Allow the user to provide a remote URL for the file.
- 'file_remote' => FALSE,
- );
- /**
- * Be default, all loaders are automaticlly added to the Admin >
- * Tripal > Data Laders menu. However, if this loader should be
- * made available via a different menu path, then set it here. If the
- * value is empty then the path will be the default.
- */
- public static $menu_path = 'admin/tripal/loaders/chado_vocabs/obo_loader';
- public static $file_required = FALSE;
- /**
- * Keep track of vocabularies that have been added.
- *
- * @var array
- */
- private $obo_namespaces = array();
-
-
- /**
- * Holds the list of all CVs on this site. By storing them here it saves
- * us query time later.
- */
- private $all_cvs = [];
-
- /**
- * Holds the list of all DBs on this site. By storing them here it saves
- * us query time later.
- *
- * @var array
- */
- private $all_dbs = [];
-
- /**
- * When adding synonyms we need to know the cvterm_ids of the synonym types.
- * This array holds those.
- *
- * @var array
- */
- private $syn_types = [
- 'exact' => NULL,
- 'broad' => NULL,
- 'narrow' => NULL,
- 'related' => NULL,
- ];
-
-
- // An alternative cache to the temp_obo table.
- private $termStanzaCache = [
- 'ids' => [],
- 'count' => [
- 'Typedef' => 0,
- 'Term' => 0,
- 'Instance' => 0,
- ],
- 'types' => [
- 'Typedef' => [],
- 'Term' => [],
- 'Instance' => [],
- ],
- ];
-
- /**
- * Indicates how terms are cached. Values can be 'memory' or 'table'. If
- * 'memory' then the $termStanzaCache variable is used. If 'table', then the
- * tripal_obo_temp table is used.
- * @var string
- */
- private $cache_type = 'memory';
-
- /**
- * The default namespace for all terms that don't have a 'namespace' in their
- * term stanza.
- *
- * @var string
- */
- private $default_namespace = '';
-
- /**
- * The default database preix for this ontology.
- *
- * @var string
- */
- private $default_db = '';
-
-
- /**
- * An array of used cvterm objects so that we don't have to lookup them
- * up repeatedly.
- */
- private $used_terms = [];
-
-
- /**
- * An array of base IRIs returned from the EBI OLS lookup servcie. We
- * don't want to continually query OLS for the same ontology base IRIs.
- */
- private $baseIRIs = [];
-
- /**
- * A flag to keep track if the user was warned about slownesss when doing
- * EBI Lookups.
- *
- * @var string
- */
- private $ebi_warned = FALSE;
-
- /**
- * A flag that indicates if this ontology is just a subset of a much larger
- * one. Examples include the GO slims.
- *
- * @var string
- */
- private $is_subset = FALSE;
-
- /**
- * Sometimes an OBO can define two terms with the same name but different
- * IDs (e.g. GO:0001404 and GO:0007125). We need to find these and
- * deal with them. This array keeps track of term names as we see them for
- * easy lookup later.
- * @var array
- */
- private $term_names = [];
- /**
- * @see TripalImporter::form()
- */
- public function form($form, &$form_state) {
- // get a list of db from chado for user to choose
- $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
- $results = db_query($sql);
- $obos = array();
- $obos[] = 'Select a Vocabulary';
- foreach ($results as $obo) {
- $obos[$obo->obo_id] = $obo->name;
- }
- $obo_id = '';
- if (array_key_exists('values', $form_state)) {
- $obo_id = array_key_exists('obo_id', $form_state['values']) ? $form_state['values']['obo_id'] : '';
- }
- $form['instructions']['info'] = array(
- '#type' => 'item',
- '#markup' => t('This page allows you to load vocabularies and ontologies
- that are in OBO format. Once loaded, the terms from these
- vocabularies can be used to create content.
- You may use the form below to either reload a vocabulary that is already
- loaded (as when new updates to that vocabulary are available) or load a new
- vocabulary.'),
- );
- $form['obo_existing'] = array(
- '#type' => 'fieldset',
- '#title' => t('Use a Saved Ontology OBO Reference'),
- '#prefix' => '<span id="obo-existing-fieldset">',
- '#suffix' => '</span>'
- );
- $form['obo_existing']['existing_instructions']= array(
- '#type' => 'item',
- '#markup' => t('The vocabularies listed in the select box below have bene pre-populated
- upon installation of Tripal or have been previously loaded. Select one to edit
- its settings or submit for loading. You may reload any vocabulary that has
- already been loaded to retrieve any new updates.'),
- );
- $form['obo_existing']['obo_id'] = array(
- '#title' => t('Ontology OBO File Reference'),
- '#type' => 'select',
- '#options' => $obos,
- '#ajax' => array(
- 'callback' => 'tripal_cv_obo_form_ajax_callback',
- 'wrapper' => 'obo-existing-fieldset',
- ),
- '#description' => t('Select a vocabulary to import.')
- );
- // If the user has selected an OBO ID then get the form elements for
- // updating.
- if ($obo_id) {
- $uobo_name = '';
- $uobo_url = '';
- $uobo_file = '';
- $vocab = db_select('tripal_cv_obo', 't')
- ->fields('t', array('name', 'path'))
- ->condition('obo_id', $obo_id)
- ->execute()
- ->fetchObject();
- $uobo_name = $vocab->name;
- if (preg_match('/^http/', $vocab->path)) {
- $uobo_url = $vocab->path;
- }
- else {
- $uobo_file = trim($vocab->path);
- $matches = array();
- if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
- $modpath = drupal_get_path('module', $matches[1]);
- $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
- }
- }
- // We don't want the previous value to remain. We want the new default to
- // show up, so remove the input values
- unset($form_state['input']['uobo_name']);
- unset($form_state['input']['uobo_url']);
- unset($form_state['input']['uobo_file']);
- $form['obo_existing']['uobo_name']= array(
- '#type' => 'textfield',
- '#title' => t('Vocabulary Name'),
- '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
- list above for use again later.'),
- '#default_value' => $uobo_name,
- );
- $form['obo_existing']['uobo_url']= array(
- '#type' => 'textfield',
- '#title' => t('Remote URL'),
- '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
- (e.g. http://www.obofoundry.org/ro/ro.obo)'),
- '#default_value' => $uobo_url,
- );
- $form['obo_existing']['uobo_file']= array(
- '#type' => 'textfield',
- '#title' => t('Local File'),
- '#description' => t('Please enter the file system path for an OBO
- definition file. If entering a path relative to
- the Drupal installation you may use a relative path that excludes the
- Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
- that Drupal relative paths have no preceeding slash.
- Otherwise, please provide the full path on the filesystem. The path
- must be accessible to the web server on which this Drupal instance is running.'),
- '#default_value' => $uobo_file,
- );
- $form['obo_existing']['update_obo_details'] = array(
- '#type' => 'submit',
- '#value' => 'Update Ontology Details',
- '#name' => 'update_obo_details'
- );
- }
- $form['obo_new'] = array(
- '#type' => 'fieldset',
- '#title' => t('Add a New Ontology OBO Reference'),
- '#collapsible' => TRUE,
- '#collapsed' => TRUE,
- );
- $form['obo_new']['path_instructions']= array(
- '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file
- is stored local to the server provide a file name. If the vocabulry is stored remotely,
- provide a URL. Only provide a URL or a local file, not both.'),
- );
- $form['obo_new']['obo_name']= array(
- '#type' => 'textfield',
- '#title' => t('New Vocabulary Name'),
- '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down
- list above for use again later. Additionally, if a default namespace is not provided in the OBO
- header this name will be used as the default_namespace.'),
- );
- $form['obo_new']['obo_url']= array(
- '#type' => 'textfield',
- '#title' => t('Remote URL'),
- '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed.
- (e.g. http://www.obofoundry.org/ro/ro.obo)'),
- );
- $form['obo_new']['obo_file']= array(
- '#type' => 'textfield',
- '#title' => t('Local File'),
- '#description' => t('Please enter the file system path for an OBO
- definition file. If entering a path relative to
- the Drupal installation you may use a relative path that excludes the
- Drupal installation directory (e.g. sites/default/files/xyz.obo). Note
- that Drupal relative paths have no preceeding slash.
- Otherwise, please provide the full path on the filesystem. The path
- must be accessible to the web server on which this Drupal instance is running.'),
- );
- return $form;
- }
- /**
- * @see TripalImporter::formSubmit()
- */
- public function formSubmit($form, &$form_state) {
- $obo_id = $form_state['values']['obo_id'];
- $obo_name = trim($form_state['values']['obo_name']);
- $obo_url = trim($form_state['values']['obo_url']);
- $obo_file = trim($form_state['values']['obo_file']);
- $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
- $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
- $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
- // If the user requested to alter the details then do that.
- if ($form_state['clicked_button']['#name'] == 'update_obo_details') {
- $form_state['rebuild'] = TRUE;
- $success = db_update('tripal_cv_obo')
- ->fields(array(
- 'name' => $uobo_name,
- 'path' => $uobo_url ? $uobo_url : $uobo_file,
- ))
- ->condition('obo_id', $obo_id)
- ->execute();
- if ($success) {
- drupal_set_message(t("The vocabulary !vocab has been updated.", array('!vocab' => $uobo_name)));
- }
- else {
- drupal_set_message(t("The vocabulary !vocab could not be updated.", array('!vocab' => $uobo_name)), 'error');
- }
- }
- elseif (!empty($obo_name)) {
- $obo_id = db_insert('tripal_cv_obo')
- ->fields(array(
- 'name' => $obo_name,
- 'path' => $obo_url ? $obo_url : $obo_file,
- ))
- ->execute();
- // Add the obo_id to the form_state vaules.
- $form_state['values']['obo_id'] = $obo_id;
- if ($obo_id) {
- drupal_set_message(t("The vocabulary !vocab has been added.", array('!vocab' => $obo_name)));
- }
- else {
- $form_state['rebuild'] = TRUE;
- drupal_set_message(t("The vocabulary !vocab could not be added.", array('!vocab' => $obo_name)), 'error');
- }
- }
- }
- /**
- * @see TripalImporter::formValidate()
- */
- public function formValidate($form, &$form_state) {
- $obo_id = $form_state['values']['obo_id'];
- $obo_name = trim($form_state['values']['obo_name']);
- $obo_url = trim($form_state['values']['obo_url']);
- $obo_file = trim($form_state['values']['obo_file']);
- $uobo_name = array_key_exists('uobo_name', $form_state['values']) ? trim($form_state['values']['uobo_name']) : '';
- $uobo_url = array_key_exists('uobo_url', $form_state['values']) ? trim($form_state['values']['uobo_url']) : '';
- $uobo_file = array_key_exists('uobo_file', $form_state['values']) ? trim($form_state['values']['uobo_file']) : '';
- // Make sure if the name is changed it doesn't conflict with another OBO.
- if ($form_state['clicked_button']['#name'] == 'update_obo_details' or
- $form_state['clicked_button']['#name'] == 'update_load_obo') {
- // Get the current record
- $vocab = db_select('tripal_cv_obo', 't')
- ->fields('t', array('obo_id', 'name', 'path'))
- ->condition('name', $uobo_name)
- ->execute()
- ->fetchObject();
- if ($vocab and $vocab->obo_id != $obo_id) {
- form_set_error('uobo_name', 'The vocabulary name must be different from existing vocabularies');
- }
- // Make sure the file exists. First check if it is a relative path
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $uobo_file;
- if (!file_exists($dfile)) {
- if (!file_exists($uobo_file)) {
- form_set_error('uobo_file', t('The specified path, !path, does not exist or cannot be read.'), ['!path' => $dfile]);
- }
- }
- if (!$uobo_url and !$uobo_file) {
- form_set_error('uobo_url', 'Please provide a URL or a path for the vocabulary.');
- }
- if ($uobo_url and $uobo_file) {
- form_set_error('uobo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
- }
- }
- if ($form_state['clicked_button']['#name'] == 'add_new_obo') {
- // Get the current record
- $vocab = db_select('tripal_cv_obo', 't')
- ->fields('t', array('obo_id', 'name', 'path'))
- ->condition('name', $obo_name)
- ->execute()
- ->fetchObject();
- if ($vocab) {
- form_set_error('obo_name', 'The vocabulary name must be different from existing vocabularies');
- }
- // Make sure the file exists. First check if it is a relative path
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo_file;
- if (!file_exists($dfile)) {
- if (!file_exists($obo_file)) {
- form_set_error('obo_file', t('The specified path, !path, does not exist or cannot be read.'), ['!path' => $dfile]);
- }
- }
- if (!$obo_url and !$obo_file) {
- form_set_error('obo_url', 'Please provide a URL or a path for the vocabulary.');
- }
- if ($obo_url and $obo_file) {
- form_set_error('obo_url', 'Please provide only a URL or a path for the vocabulary, but not both.');
- }
- }
- }
- /**
- * @see TripalImporter::run()
- *
- * @param $details
- * The following arguments are supported:
- * - obo_id: (required) The ID of the ontology to be imported.
- */
- public function run() {
-
- $arguments = $this->arguments['run_args'];
- $obo_id = $arguments['obo_id'];
- // Make sure the $obo_id is valid
- $obo = db_select('tripal_cv_obo', 'tco')
- ->fields('tco')
- ->condition('obo_id', $obo_id)
- ->execute()
- ->fetchObject();
- if (!$obo) {
- throw new Exception("Invalid OBO ID provided: '$obo_id'.");
- }
-
- // Get the list of all CVs so we can save on lookups later
- $sql = "SELECT * FROM {cv} CV";
- $cvs = chado_query($sql);
- while ($cv = $cvs->fetchObject()) {
- $this->all_cvs[$cv->name] = $cv;
- }
-
- // Get the list of all DBs so we can save on lookups later
- $sql = "SELECT * FROM {db} DB";
- $dbs = chado_query($sql);
- while ($db = $dbs->fetchObject()) {
- $this->all_dbs[$db->name] = $db;
- }
- // Get the 'Subgroup' term that we will use for adding subsets.
- $term = chado_get_cvterm(['id' => 'NCIT:C25693']);
- $this->used_terms['NCIT:C25693'] = $term->cvterm_id;
-
- // Get the 'Comment' term that we will use for adding comments.
- $term = chado_get_cvterm(['id' => 'rdfs:comment']);
- $this->used_terms['rdfs:comment'] = $term->cvterm_id;
-
- // Make sure we have a 'synonym_type' vocabulary.
- $syn_cv = new ChadoRecord('cv');
- $syn_cv->setValues(['name' => 'synonym_type']);
- $syn_cv->save();
- $this->all_cvs['synonym_type'] = (object) $syn_cv->getValues();
-
- // Make sure we have a 'synonym_type' database.
- $syn_db = new ChadoRecord('db');
- $syn_db->setValues(['name' => 'synonym_type']);
- $syn_db->save();
- $this->all_dbs['synonym_type'] = (object) $syn_db->getValues();
- // Make sure the synonym types exists in the 'synonym_type' vocabulary.
- foreach (array_keys($this->syn_types) as $syn_type) {
- $syn_dbxref = new ChadoRecord('dbxref');
- $syn_dbxref->setValues([
- 'accession' => $syn_type,
- 'db_id' => $syn_db->getID(),
- ]);
- $syn_dbxref->save();
-
- $syn_term = new ChadoRecord('cvterm');
- $syn_term->setValues([
- 'name' => $syn_type,
- 'cv_id' => $syn_cv->getID(),
- ]);
- if (!$syn_term->find()) {
- $syn_term->setValues([
- 'name' => $syn_type,
- 'definition' => '',
- 'is_obsolete' => 0,
- 'cv_id' => $syn_cv->getID(),
- 'is_relationshiptype' => 0,
- 'dbxref_id' => $syn_dbxref->getID(),
- ]);
- $syn_term->insert();
- }
- $this->syn_types[$syn_type] = (object) $syn_term->getValues();
- }
-
- // Run the importer!
- $this->loadOBO_v1_2_id($obo);
- }
- /**
- * @see TripalImporter::postRun()
- *
- */
- public function postRun() {
- // Update the cv_root_mview materiailzed view.
- $this->logMessage("Updating the cv_root_mview materialized view...");
- $mview_id = tripal_get_mview_id('cv_root_mview');
- tripal_populate_mview($mview_id);
- $this->logMessage("Updating the db2cv_mview materialized view...");
- $mview_id = tripal_get_mview_id('db2cv_mview');
- tripal_populate_mview($mview_id);
- // Upate the cvtermpath table for each newly added CV.
- $this->logMessage("Updating cvtermpath table. This may take a while...");
- foreach ($this->obo_namespaces as $namespace => $cv_id) {
- $this->logMessage("- Loading paths for @vocab", array('@vocab' => $namespace));
- chado_update_cvtermpath($cv_id, $this->job);
- }
- }
- /**
- * A wrapper function for importing the user specified OBO file into Chado by
- * specifying the obo_id of the OBO. It requires that the file be in OBO v1.2
- * compatible format. This function is typically executed via the Tripal jobs
- * management after a user submits a job via the Load Onotloies form.
- *
- * @param $obo_id
- * An obo_id from the tripal_cv_obo file that specifies which OBO file to import
- * @ingroup tripal_obo_loader
- */
- private function loadOBO_v1_2_id($obo) {
- // Convert the module name to the real path if present
- if (preg_match("/\{(.*?)\}/", $obo->path, $matches)) {
- $module = $matches[1];
- $path = drupal_realpath(drupal_get_path('module', $module));
- $obo->path = preg_replace("/\{.*?\}/", $path, $obo->path);
- }
- // if the reference is for a remote URL then run the URL processing function
- if (preg_match("/^https:\/\//", $obo->path) or
- preg_match("/^http:\/\//", $obo->path) or
- preg_match("/^ftp:\/\//", $obo->path)) {
- $this->loadOBO_v1_2_url($obo->name, $obo->path, 0);
- }
- // if the reference is for a local file then run the file processing function
- else {
- // check to see if the file is located local to Drupal
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
- if (file_exists($dfile)) {
- $this->loadOBO_v1_2_file($obo->name, $dfile, 0);
- }
- // if not local to Drupal, the file must be someplace else, just use
- // the full path provided
- else {
- if (file_exists($obo->path)) {
- $this->loadOBO_v1_2_file($obo->name, $obo->path, 0);
- }
- else {
- print "ERROR: could not find OBO file: '$obo->path'\n";
- }
- }
- }
- }
- /**
- * A wrapper function for importing the user specified OBO file into Chado by
- * specifying the filename and path of the OBO. It requires that the file be in OBO v1.2
- * compatible format. This function is typically executed via the Tripal jobs
- * management after a user submits a job via the Load Onotloies form.
- *
- * @param $obo_name
- * The name of the OBO (typially the ontology or controlled vocabulary name)
- * @param $file
- * The path on the file system where the ontology can be found
- * @param $is_new
- * Set to TRUE if this is a new ontology that does not yet exist in the
- * tripal_cv_obo table. If TRUE the OBO will be added to the table.
- *
- * @ingroup tripal_obo_loader
- */
- private function loadOBO_v1_2_file($obo_name, $file, $is_new = TRUE) {
- if ($is_new) {
- tripal_insert_obo($obo_name, $file);
- }
- $success = $this->loadOBO_v1_2($file, $obo_name);
- }
- /**
- * A wrapper function for importing the user specified OBO file into Chado by
- * specifying the remote URL of the OBO. It requires that the file be in OBO v1.2
- * compatible format. This function is typically executed via the Tripal jobs
- * management after a user submits a job via the Load Onotloies form.
- *
- * @param $obo_name
- * The name of the OBO (typially the ontology or controlled vocabulary name)
- * @param $url
- * The remote URL of the OBO file.
- * @param $is_new
- * Set to TRUE if this is a new ontology that does not yet exist in the
- * tripal_cv_obo table. If TRUE the OBO will be added to the table.
- *
- * @ingroup tripal_obo_loader
- */
- private function loadOBO_v1_2_url($obo_name, $url, $is_new = TRUE) {
- // first download the OBO
- $temp = tempnam(sys_get_temp_dir(), 'obo_');
- print "Downloading URL $url, saving to $temp\n";
- $url_fh = fopen($url, "r");
- $obo_fh = fopen($temp, "w");
- if (!$url_fh) {
- throw new Exception("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? " .
- " if you are unable to download the file you may manually downlod the OBO file and use the web interface to " .
- " specify the location of the file on your server.");
- }
- while (!feof($url_fh)) {
- fwrite($obo_fh, fread($url_fh, 255), 255);
- }
- fclose($url_fh);
- fclose($obo_fh);
- if ($is_new) {
- tripal_insert_obo($obo_name, $url);
- }
- // second, parse the OBO
- $this->loadOBO_v1_2($temp, $obo_name);
- // now remove the temp file
- unlink($temp);
- }
- /**
- * Imports a given OBO file into Chado. This function is usually called by
- * one of three wrapper functions: loadOBO_v1_2_id,
- * loadOBO_v1_2_file or tirpal_cv_load_obo_v1_2_url. But, it can
- * be called directly if the full path to an OBO file is available on the
- * file system.
- *
- * @param $flie
- * The full path to the OBO file on the file system
- *
- * @ingroup tripal_obo_loader
- */
- private function loadOBO_v1_2($file, $obo_name) {
- $header = array();
- $ret = array();
- // Empty the temp table.
- $this->clearTermStanzaCache();
- // Parse the obo file.
- $this->logMessage("Step 1: Preloading File $file...");
- $this->parse($file, $header);
-
- // Cache the relationships of terms.
- $this->logMessage("Step 2: Examining relationships...");
- $this->cacheRelationships();
-
- // Add any typedefs to the vocabulary first.
- $this->logMessage("Step 3: Loading type defs...");
- $this->processTypeDefs();
- // Next add terms to the vocabulary.
- $this->logMessage("Step 4: Loading terms...");
- $this->processTerms();
-
- // Empty the term cache.
- $this->logMessage("Step 5: Cleanup...");
- $this->clearTermStanzaCache();
- }
- /**
- * OBO files are divided into a typedefs terms section and vocabulary terms section.
- * This function loads the typedef terms from the OBO.
- *
- * @ingroup tripal_obo_loader
- */
- private function processTypeDefs() {
-
- $typedefs = $this->getCachedTermStanzas('Typedef');
- $count = $this->getCacheSize('Typedef');
- $this->setTotalItems($count);
- $this->setItemsHandled(0);
- $this->setInterval(5);
- $i = 1;
- foreach ($typedefs as $t) {
-
- // TODO: it would be better if we had a term iterator so that we
- // don't have to distinguish here between the table vs memory cache type.
- if ($this->cache_type == 'table') {
- $stanza = unserialize(base64_decode($t->stanza));
- }
- else {
- $stanza = $this->termStanzaCache['ids'][$t];
- }
- $this->setItemsHandled($i++);
- $this->processTerm($stanza, TRUE);
- }
- $this->setItemsHandled($i);
- return 1;
- }
- /**
- * This function loads all of the [Term] termsfrom the OBO.
- */
- private function processTerms() {
- $i = 0;
- $external = FALSE;
-
- $terms = $this->getCachedTermStanzas('Term');
- $count = $this->getCacheSize('Term');
- $this->setTotalItems($count);
- $this->setItemsHandled(0);
- $this->setInterval(1);
- // Iterate through the terms.
- foreach ($terms as $t) {
- // TODO: it would be better if we had a term iterator so that we
- // don't have to distringuish here between the table vs memory cache type.
- if ($this->cache_type == 'table') {
- $term = unserialize(base64_decode($t->stanza));
- }
- else {
- $term = $this->termStanzaCache['ids'][$t];
- }
- $this->setItemsHandled($i);
- // Add/update this term.
- $this->processTerm($term, FALSE);
- $i++;
- }
- $this->setItemsHandled($i);
- return 1;
- }
-
- /**
- * Sets the default CV and DB for this loader.
- *
- * Unfortunately, not all OBOs include both the 'ontology' and the
- * 'default-namespace' in their headers, so we have to do our best to
- * work out what these two should be.
- *
- */
- private function setDefaults($header) {
- $short_name = '';
- $namespace = '';
-
- // Get the 'ontology' and 'default-namespace' headers. Unfortunately,
- // not all OBO files contain these.
- if (array_key_exists('ontology', $header)) {
- $short_name = strtoupper($header['ontology'][0]);
- }
- if (array_key_exists('default-namespace', $header)) {
- $namespace = $header['default-namespace'][0];
- }
-
- // The OBO specification allows the 'ontology' header tag to be nested for
- // subsets (e.g. go/subsets/goslim_plant). We need to simplify that down
- // to the top-level item.
- $matches = [];
- if (preg_match('/^(.+?)\/.*/', $short_name, $matches)) {
- $short_name = $matches[1];
- $this->is_subset = TRUE;
- }
-
- // If we have the DB short name (or ontology header) but not the default
- // namespace then we may be able to find it via an EBI lookup.
- if (!$namespace and $short_name) {
- $namespace = $this->findEBIOntologyNamespace($short_name);
- }
-
- // If we have the namespace but not the short name then we have to
- // do a few tricks to try and find it.
- if ($namespace and !$short_name) {
-
- // First see if we've seen this ontology before and get it's currently
- // loaded database.
- $sql = "SELECT dbname FROM {db2cv_mview} WHERE cvname = :cvname";
- $short_name = chado_query($sql, [':cvname' => $namespace])->fetchField();
-
- if (!$short_name and array_key_exists('namespace-id-rule', $header)) {
- $matches = [];
- if (preg_match('/^.*\s(.+?):.+$/', $header['namespace-id-rule'][0],$matches)){
- $short_name = $matches[1];
- }
- }
-
- // Try the EBI Lookup: still experimental.
- if (!$short_name) {
- //$short_name = $this->findEBIOntologyPrefix($namespace);
- }
- }
-
- // If we can't find the namespace or the short_name then bust.
- if (!$namespace and !$short_name) {
- throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" and "ontology" headers.');
- }
-
- // Set the defaults.
- $this->default_namespace = $namespace;
- $this->default_db = $short_name;
- $this->addDB($this->default_db);
- $cv = $this->addCV($this->default_namespace);
- $this->obo_namespaces[$namespace] = $cv->cv_id;
-
- }
-
- /**
- * This function searches EBI to find the ontology details for this OBO.
- *
- * @param $ontology
- * The ontology name from the OBO headers.
- *
- * @throws Exception
- */
- private function findEBIOntologyNamespace($ontology) {
-
- // Check if the EBI ontology search has this ontology:
- try {
- $results = $this->oboEbiLookup($ontology, 'ontology');
- if (array_key_exists('default-namespace', $results['config']['annotations'])) {
- $namespace = $results['config']['annotations']['default-namespace'];
- if (is_array($namespace)) {
- $namespace = $namespace[0];
- }
- }
- elseif (array_key_exists('namespace', $results['config'])) {
- $namespace = $results['config']['namespace'];
- }
- // If we can't find the namespace at EBI, then just default to using the
- // same namespace as the DB short name.
- else {
- $namespace = $this->default_db;
- }
-
- return $namespace;
- }
- catch (Exception $e) {
- watchdog_exception('Cannot find the namespace for this ontology.', $e);
- throw $e;
- }
- }
-
- /**
- * Finds the ontology prefix (DB short name) using EBI.
- *
- * @param $namespace
- * The namespace for ontology.
- */
- private function findEBIOntologyPrefix($namespace) {
-
- // NOTE: this code is not yet completed.. It's not clear it will
- // actually work.
-
- $options = array();
- $page = 1;
- $size = 25;
- $full_url = 'https://www.ebi.ac.uk/ols/api/ontologies?page=' . $page. '&size=' . $size;
- while ($response = drupal_http_request($full_url, $options)) {
- $response = drupal_json_decode($response->data);
- foreach ($response['_embedded']['ontologies'] as $ontology) {
- $namespace = $ontology['config']['namespace'];
- }
- $page++;
- $full_url = 'https://www.ebi.ac.uk/ols/api/ontologies?page=' . $page. '&size=' . $size;
- }
- }
- /**
- * A helper function to get details about a foreign term.
- *
- * A foreign term is one that does not belong to the ontology.
- *
- * @param $t
- * A term array that contains these keys at a minimum: id, name,
- * definition, subset, namespace, is_obsolete.
- */
- private function findEBITerm($id) {
-
- // Warn the user if we're looking up terms in EBI as this will slow the
- // loader if there are many lookups.
- if ($this->ebi_warned == FALSE) {
- $this->logMessage(
- "A term that belongs to another ontology is used within this " .
- "vocabulary. Therefore a lookup will be performed with the EBI Ontology " .
- "Lookup Service to retrieve the information for this term. " .
- "Please note, that vocabularies with many non-local terms " .
- "require remote lookups and these lookups can dramatically " .
- "decrease loading time. " ,
- ['!vocab' => $this->default_namespace], TRIPAL_WARNING);
- $this->ebi_warned = TRUE;
-
- // This ontology may havem ultiple remote terms and that takes a while
- // to load so lets change the progress interval down to give
- // updates more often.
- $this->setInterval(1);
- }
-
- $this->logMessage("Performing EBI OLS Lookup for: !id", ['!id' => $id]);
-
- // Get the short name and accession for the term.
- $pair = explode(":", $id, 2);
- $short_name = $pair[0];
- $accession = $pair[1];
-
- // First get the ontology so we can build an IRI for the term
- $base_iri = '';
- $ontologyID = '';
- if (array_key_exists($short_name, $this->baseIRIs)) {
- list($ontologyID, $base_iri) = $this->baseIRIs[$short_name];
- }
- else {
- $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $short_name;
- $response = drupal_http_request($full_url, []);
- if (!$response) {
- throw new Exception(t('Did not get a response from EBI OLS trying to lookup ontology: !ontology',
- ['!ontology' => $short_name]));
- }
- $ontology_results = drupal_json_decode($response->data);
- if ($ontology_results['error']) {
- throw new Exception(t('Cannot find the ontology via an EBI OLS lookup: !short_name. ' .
- 'EBI Reported: !message. ' .
- 'Consider finding the OBO file for this ontology and manually loading it first.',
- ['!message' => $ontology_results['message'], '!short_name' => $short_name]));
- }
- $base_iri = $ontology_results['config']['baseUris'][0];
- $ontologyID = $ontology_results['ontologyId'];
- $this->baseIRIs[$short_name] = [$ontologyID, $base_iri];
- }
-
- // Next get the term.
- $iri = urlencode(urlencode($base_iri . $accession));
- $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontologyID . '/terms/' . $iri;
- $response = drupal_http_request($full_url, []);
- if(!$response){
- throw new Exception(t('Did not get a response from EBI OLS trying to lookup term: !id',
- ['!id' => $id]));
- }
- $results = drupal_json_decode($response->data);
-
- // If EBI sent an error message then throw an error.
- if ($results['error']) {
- throw new Exception(t('Cannot find the term via an EBI OLS lookup: !term. ' .
- 'EBI Reported: !message.' .
- 'Consider finding the OBO file for this ontology and manually loading it first.',
- ['!message' => $results['message'], '!term' => $id]));
- }
- // TODO: what do we do if the term is not defined by this ontology?
- if ($results['is_defining_ontology'] != 1) {
-
- }
-
- // Make an OBO stanza array as if this term were in the OBO file and
- // return it.
- $this->logMessage("Found !term in EBI OLS.", ['!term' => $id]);
- $stanza = [];
- $stanza['id'][0] = $id;
- $stanza['name'][0] = $results['label'];
- $stanza['def'][0] = $results['def'];
- $stanza['namespace'][0] = $results['ontology_name'];
- $stanza['is_obsolete'][0] = $results['is_obsolete'] ? 'true' : '';
- $stanza['is_relationshiptype'][0] = '';
- $stanza['db_name'][0] = $short_name;
- $stanza['comment'][0] = 'Term obtained using the EBI Ontology Lookup Service.';
- if (array_key_exists('in_subset', $results)) {
- if (is_array($results['in_subset'])) {
- $stanza['subset'] = $results['in_subset'];
- }
- else {
- $stanza['subset'][0] = $results['in_subset'];
- }
- }
-
- // If this term has been replaced then get the new term.
- if (array_key_exists('term_replaced_by', $results) and isset($results['term_replaced_by'])) {
- $replaced_by = $results['term_replaced_by'];
- $replaced_by = preg_replace('/_/', ':', $replaced_by);
- $this->logMessage("The term, !term, is replaced by, !replaced",
- ['!term' => $id, '!replaced' => $replaced_by]);
-
- // Before we try to look for the replacement term, let's try to find it.
- // in our list of cached terms.
- if (array_key_exists($replaced_by, $this->termStanzaCache['ids'])) {
- $this->logMessage("Found term, !replaced in the term cache.",
- ['!term' => $id, '!replaced' => $replaced_by]);
- return $this->termStanzaCache['ids'][$id];
- }
-
- // Next look in the database.
- $rpair = explode(":", $replaced_by, 2);
- $found = $this->lookupTerm($rpair[0], $rpair[1]);
- if ($found) {
- $this->logMessage("Found term, !replaced in the local data store.",
- ['!term' => $id, '!replaced' => $replaced_by]);
- return $found;
- }
-
- // Look for this new term.
- $stanza = $this->findEBITerm($replaced_by);
- }
- return $stanza;
- }
-
- /**
- * Inserts a new cvterm using the OBO stanza array provided.
- *
- * The stanza passed to this function should always come from the term cache,
- * not directly from the OBO file because the cached terms have been
- * updated to include all necessary values. This function also removes
- * all properties assocaited with the term so that those can be added
- * fresh.
- *
- * @param $stanza
- * An OBO stanza array as returned by getCachedTermStanza().
- * @param $is_relationship
- * Set to TRUE if this term is a relationship term.
- * @param $update_if_exists
- * Set to TRUE to update the term if it exists.
- *
- * @return
- * The cvterm ID.
- */
- private function saveTerm($stanza, $is_relationship = FALSE) {
-
- // Get the term ID.
- $id = $stanza['id'][0];
-
- // First check if we've already used this term.
- if ($this->used_terms[$id]) {
- return $this->used_terms[$id];
- }
-
- // Get the term properties.
- $id = $stanza['id'][0];
- $name = $stanza['name'][0];
- $cvname = $stanza['namespace'][0];
- $dbname = $stanza['db_name'][0];
- $namespace = $stanza['namespace'][0];
-
- // Does this term ID have both a short name and accession? If so, then
- // separate out these components, otherwise we will use the id as both
- // the id and accession.
- $accession = '';
- $matches = [];
- if (preg_match('/^(.+?):(.*)$/', $id, $matches)) {
- $accession = $matches[2];
- }
- else {
- $accession = $id;
- }
-
- // Get the definition if there is one.
- $definition = '';
- if (array_key_exists('def', $stanza)) {
- $definition = preg_replace('/^\"(.*)\"/', '\1', $stanza['def'][0]);
- }
-
- // Set the flag if this term is obsolete.
- $is_obsolete = 0;
- if (array_key_exists('is_obsolete', $stanza)) {
- $is_obsolete = $stanza['is_obsolete'][0] == 'true' ? 1 : 0;
- }
-
- // Set the flag if this is a relationsip type.
- $is_relationshiptype = 0;
- if (array_key_exists('is_relationshiptype', $stanza)) {
- $is_relationshiptype = $stanza['is_relationshiptype'][0] == 'true' ? 1 : 0;
- }
-
- // Is this term borrowed from another ontology?
- $is_borrowed = $this->isTermBorrowed($stanza);
-
- // Will hold the cvterm ChadoRecord object.
- $cvterm = NULL;
-
- // Get the CV and DB objects.
- $cv = $this->all_cvs[$cvname];
- $db = $this->all_dbs[$dbname];
-
- // If this is set to TRUE then we should insert the term.
- $do_cvterm_insert = TRUE;
-
- // We need to locate terms using their dbxref. This is because term names
- // can sometimes change, so we don't want to look up the term by it's name.
- // the unique ID which is in the accession will never change.
- $dbxref = new ChadoRecord('dbxref');
- $dbxref->setValues([
- 'db_id' => $db->db_id,
- 'accession' => $accession
- ]);
- if ($dbxref->find()) {
-
- // Does this accession already have a cvterm it's associated with? Then
- // we need to make we will update the name. Names change but accessions
- // always refer to the same term.
- $dbx_cvterm = new ChadoRecord('cvterm');
- $dbx_cvterm->setValues(['dbxref_id' => $dbxref->getID()]);
- if ($dbx_cvterm->find()) {
- $do_cvterm_insert = FALSE;
- $cvterm = $dbx_cvterm;
-
- // We don't want to do any updates for borrowed terms. Just leave them
- // as they are.
- if (!$is_borrowed) {
-
- // Let's make sure we don't have a conflict in term naming
- // if we change the name of this term.
- $this->fixTermMismatch($stanza, $dbxref, $cv, $name);
-
- // Now update this cvterm record.
- $cvterm->setValue('name', $name);
- $cvterm->setValue('definition', $definition);
- $cvterm->setValue('is_obsolete', $is_obsolete);
- $cvterm->setValue('is_relationshiptype', $is_relationshiptype);
-
- try {
- $cvterm->update();
- }
- catch (Exception $e) {
- $this->logMessage('Could not update the term, "!term", with name, "!name" for vocabulary, "!vocab". ERROR: !error.',
- ['!term' => $id,
- '!name' => $name,
- '!vocab' => $cv->name,
- '!error' => $e->getMessage(),
- ],
- TRIPAL_ERROR);
- throw $e;
- }
- }
- }
- }
- // The dbxref doesn't exist, so let's add it.
- else {
- $dbxref->insert();
- }
-
- // Add the cvterm if we didn't do an update.
- if ($do_cvterm_insert) {
-
- // Before updating the term let's check to see if it already exists
- // and make corrections.
- $cvterm = new ChadoRecord('cvterm');
- $cvterm->setValue('cv_id', $cv->cv_id);
- $cvterm->setValue('name', $name);
- if ($cvterm->find()) {
- $fixed = $this->fixTermMismatch($stanza, $dbxref, $cv, $name);
- }
-
- // The term doesnt exist, so let's just do our insert.
- $cvterm->setValues([
- 'cv_id' => $cv->cv_id,
- 'name' => $name,
- 'definition' => $definition,
- 'dbxref_id' => $dbxref->getID(),
- 'is_relationshiptype' => $is_relationshiptype,
- 'is_obsolete' => $is_obsolete,
- 'dbxref_id' => $dbxref->getValue('dbxref_id'),
- ]);
-
- // If the insert failes lets catch the error so we can
- // give a more informative message.
- try {
- $cvterm->insert();
- }
- catch (Exception $e) {
- $this->logMessage('Could not insert the term, "!term", with name, "!name" for vocabulary, "!vocab". ERROR: !error.',
- ['!term' => $id,
- '!name' => $name,
- '!vocab' => $cv->name,
- '!error' => $e->getMessage(),
- ],
- TRIPAL_ERROR);
- throw $e;
- }
- }
-
- // Save the cvterm_id for this term so we don't look it up again.
- $cvterm_id = $cvterm->getID();
- $this->used_terms[$id] = $cvterm_id;
-
- // Return the cvterm_id.
- return $cvterm_id;
- }
- /**
- * Fixes mistmaches between two terms with the same name.
- *
- * If it has been determined that a term's name has changed. Before we update
- * or insert it we must check to make sure no other terms have that name. If
- * they do we must make a correction.
- *
- * @param $dbxref
- * The ChadoRecord object conaining the dbxref record for the term
- * to be inserted/updated.
- * @param $cv
- * The cvterm object.
- * @param $name
- * The name of the term that is a potential conflict.
- *
- * @return
- * Returns TRUE if a conflict was found and corrected.
- */
- public function fixTermMismatch($stanza, $dbxref, $cv, $name) {
-
- $id = $stanza['id'][0];
- $name = $stanza['name'][0];
-
- // First get the record for any potential conflicting term.
- $sql = "
- SELECT cvterm_id
- FROM {cvterm}
- WHERE name = :name and cv_id = :cv_id and dbxref_id != :dbxref_id
- ";
- $args = [
- ':name' => $name,
- ':cv_id' => $cv->cv_id,
- ':dbxref_id' => $dbxref->getID(),
- ];
- $results = chado_query($sql, $args);
- while ($conflict_id = $results->fetchField()) {
- $check_cvterm = new ChadoRecord('cvterm', $conflict_id);
-
- // If the dbxref of this matched term is the same as the current term
- // then it is the same term and there is no conflict.
- if ($dbxref->getID() == $check_cvterm->getValue('dbxref_id')) {
- return FALSE;
- }
-
- // At this point, we have a cvterm with the same name and vocabulary
- // but with a different dbxref. First let's get that other accession.
- $check_dbxref = new ChadoRecord('dbxref', $check_cvterm->getValue('dbxref_id'));
- $check_db = new ChadoRecord('db', $check_dbxref->getValue('db_id'));
- $check_accession = $check_db->getValue('name') . ':' . $check_dbxref->getValue('accession');
-
- // Case 1: The other term that currently has the same name is
- // missing in the OBO file (i.e. no stanza). So, that means that this
- // term probably got relgated to an alt_id on another term. We do
- // not want to delete a term because it may be linked to other
- // records. Instead, let's update its name to let folks know
- // what happend to it and so we can get around the unique
- // constraint. An example of this is the GO:0015881 and
- // GO:1902598 terms where the latter became an alt_id of the
- // first and no longer has its own entry.
- $check_stanza = $this->getCachedTermStanza($check_accession);
- if (!$check_stanza) {
- $new_name = $check_cvterm->getValue('name') . ' (' . $check_accession . ')';
- $check_cvterm->setValue('name', $new_name);
- $check_cvterm->update();
- return TRUE;
- }
- // Case 2: The conflicting term is in the OBO file (ie. has a stanza) and
- // is obsolete and this one is not. Fix it by adding an (obsolete) suffix
- // to the name to avoid the conflict.
- else if ($check_stanza['is_obsolete'] == 'true' and $stanza['is_obsolete'] != 'true') {
- $new_name = $check_cvterm->getValue('name') . ' (obsolete)';
- $check_cvterm->setValue('name', $new_name);
- $check_cvterm->update();
- return TRUE;
- }
- // Case 3: The conflicting term is in the OBO file (ie. has a stanza).
- // That means that there has been some name swapping between
- // terms. We need to temporarily rename the term so that
- // we don't have a unique constraint violation when we update
- // the new one. An example of this is where GO:000425 and
- // GO:0030242 changed names and one was renamed to the previous
- // name of the other.
- else {
- $new_name = $check_cvterm->getValue('name') . ' (' . $check_accession . ')';
- $check_cvterm->setValue('name', $new_name);
- $check_cvterm->update();
- return TRUE;
- }
- }
-
- // We have no conflict so it's save to update or insert.
- return FALSE;
- }
- /**
- * Uses the provided term array to add/update information to Chado about the
- * term including the term, dbxref, synonyms, properties, and relationships.
- *
- * @param $term
- * An array representing the cvterm.
- * @is_relationship
- * Set to 1 if this term is a relationship term
- *
- * @ingroup tripal_obo_loader
- */
- private function processTerm($stanza, $is_relationship = 0) {
-
- //
- // First things first--save the term.
- //
- // If the term does not exist it is inserted, if it does exist it just
- // retrieves the cvterm_id.
- //
- $cvterm_id = $this->saveTerm($stanza, FALSE);
- $id = $stanza['id'][0];
-
- // If this term is borrowed from another ontology? If so then we will
- // not update it.
- if ($this->isTermBorrowed($stanza)) {
- return;
- }
-
- // If this term belongs to this OBO (not borrowed from another OBO) then
- // remove any relationships, properties, xrefs, and synonyms that this
- // term already has so that they can be re-added.
- $sql = "
- DELETE FROM {cvterm_relationship}
- WHERE subject_id = :cvterm_id
- ";
- chado_query($sql, array(':cvterm_id' => $cvterm_id));
-
- // If this is an obsolete term then clear out the relationships where
- // this term is the object.
- if (in_array('is_obsolete', $stanza) and $stanza['is_obsolete'] == 'true') {
- $sql = "
- DELETE FROM {cvterm_relationship}
- WHERE object_id = :cvterm_id
- ";
- chado_query($sql, array(':cvterm_id' => $cvterm_id));
- }
-
- $sql = "
- DELETE FROM {cvtermprop}
- WHERE cvterm_id = :cvterm_id
- ";
- chado_query($sql, array(':cvterm_id' => $cvterm_id));
-
- $sql = "
- DELETE FROM {cvterm_dbxref}
- WHERE cvterm_id = :cvterm_id
- ";
- chado_query($sql, array(':cvterm_id' => $cvterm_id));
-
- $sql = "
- DELETE FROM {cvtermsynonym} CVTSYN
- WHERE cvterm_id = :cvterm_id
- ";
- chado_query($sql, array(':cvterm_id' => $cvterm_id));
-
- // We should never have the problem where we don't have a cvterm_id. The
- // saveTerm() function should always return on. But if for some unkonwn
- // reason we don't have one then fail.
- if (!$cvterm_id) {
- throw new Exception(t('Missing cvterm after saving term: !term',
- ['!term' => print_r($stanza, TRUE)]));
- }
-
- //
- // Handle: alt_id
- //
- if (array_key_exists('alt_id', $stanza)) {
- foreach ($stanza['alt_id'] as $alt_id) {
- $this->addAltID($id, $cvterm_id, $alt_id);
- }
- }
- //
- // Handle: synonym
- //
- if (array_key_exists('synonym', $stanza)) {
- foreach ($stanza['synonym'] as $synonym) {
- $this->addSynonym($id, $cvterm_id, $synonym);
- }
- }
- //
- // Handle: exact_synonym
- //
- if (array_key_exists('exact_synonym', $stanza)) {
- foreach ($stanza['exact_synonym'] as $synonym) {
- $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
- $this->addSynonym($id, $cvterm_id, $fixed);
- }
- }
-
- //
- // Handle: narrow_synonym
- //
- if (array_key_exists('narrow_synonym', $stanza)) {
- foreach ($stanza['narrow_synonym'] as $synonym) {
- $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
- $this->addSynonym($id, $cvterm_id, $fixed);
- }
- }
-
- //
- // Handle: broad_synonym
- //
- if (array_key_exists('broad_synonym', $stanza)) {
- foreach ($stanza['broad_synonym'] as $synonym) {
- $fixed = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
- $this->addSynonym($id, $cvterm_id, $fixed);
- }
- }
- //
- // Handle: comment
- //
- if (array_key_exists('comment', $stanza)) {
- $comments = $stanza['comment'];
- foreach ($comments as $rank => $comment) {
- $this->addComment($id, $cvterm_id, $comment, $rank);
- }
- }
- //
- // Handle: xref
- //
- if (array_key_exists('xref', $stanza)) {
- foreach ($stanza['xref'] as $xref) {
- $this->addXref($id, $cvterm_id, $xref);
- }
- }
- //
- // Handle: xref_analog
- //
- if (array_key_exists('xref_analog', $stanza)) {
- foreach ($stanza['xref_analog'] as $xref) {
- $this->addXref($id, $cvterm_id, $xref);
- }
- }
-
- //
- // Handle: xref_unk
- //
- if (array_key_exists('xref_unk', $stanza)) {
- foreach ($stanza['xref_unk'] as $xref) {
- $this->addXref($id, $cvterm_id, $xref);
- }
- }
-
- //
- // Handle: subset
- //
- if (array_key_exists('subset', $stanza)) {
- foreach ($stanza['subset'] as $subset) {
- $this->addSubset($id, $cvterm_id, $subset);
- }
- }
-
- //
- // Handle: is_a
- //
- if (array_key_exists('is_a', $stanza)) {
- foreach ($stanza['is_a'] as $is_a) {
- $this->addRelationship($id, $cvterm_id, 'is_a', $is_a);
- }
- }
-
- //
- // Handle: relationship
- //
- if (array_key_exists('relationship', $stanza)) {
- foreach ($stanza['relationship'] as $value) {
- $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
- $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
- $this->addRelationship($id, $cvterm_id, $rel, $object);
- }
- }
- /**
- * The following properties are currently unsupported:
- *
- * - intersection_of
- * - union_of
- * - disjoint_from
- * - replaced_by
- * - consider
- * - use_term
- * - builtin
- * - is_anonymous
- *
- */
- }
- /**
- * Adds a cvterm relationship
- *
- * @param $cvterm_id
- * A cvterm_id of the term to which the relationship will be added.
- * @param $rel_id
- * The relationship term ID
- * @param $obj_id
- * The relationship object term ID.
- *
- * @ingroup tripal_obo_loader
- */
- private function addRelationship($id, $cvterm_id, $rel_id, $obj_id) {
-
- // Get the cached terms for both the relationship and the object. They
- // shold be there, but just in case something went wrong, we'll throw
- // an exception if we can't find them.
- $rel_stanza = $this->getCachedTermStanza($rel_id);
- if (!$rel_stanza) {
- throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
- 'The term, !rel, is not in the term cache.',
- ['!source' => $id, '!rel' => $rel_id, '!name' => $obj_id]));
- }
- $rel_cvterm_id = $this->saveTerm($rel_stanza, TRUE);
-
- // Make sure the object term exists in the cache.
- $obj_stanza = $this->getCachedTermStanza($obj_id);
- if (!$obj_stanza) {
- throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
- 'The term, !object, is not in the term cache.',
- ['!source' => $id, '!rel' => $rel_id, '!object' => $obj_id]));
- }
- $obj_cvterm_id = $this->saveTerm($obj_stanza);
- // Add the cvterm_relationship.
- $cvterm_relationship = new ChadoRecord('cvterm_relationship');
- $cvterm_relationship->setValues([
- 'type_id' => $rel_cvterm_id,
- 'subject_id' => $cvterm_id,
- 'object_id' => $obj_cvterm_id
- ]);
-
- // If the insert fails then catch the error and generate a more meaningful
- // message that helps with debugging.
- try {
- $cvterm_relationship->insert();
- }
- catch (Exception $e) {
- throw new Exception(t('Cannot add relationship: "!source !rel !object". ' .
- 'ERROR: !error.',
- ['!source' => $id, '!rel' => $rel_id, '!object' => $obj_id, '!error' => $e->getMessage()]));
- }
- }
- /**
- * Retrieves the term array from the temp loading table for a given term id.
- *
- * @param id
- * The id of the term to retrieve
- *
- * @ingroup tripal_obo_loader
- */
- private function getCachedTermStanza($id) {
- if ($this->cache_type == 'table') {
- $values = array('id' => $id);
- $result = chado_select_record('tripal_obo_temp', array('stanza'), $values);
- if (count($result) == 0) {
- return FALSE;
- }
- return unserialize(base64_decode($result['stanza']));
- }
-
- if (array_key_exists($id, $this->termStanzaCache['ids'])) {
- return $this->termStanzaCache['ids'][$id];
- }
- else {
- return FALSE;
- }
- }
-
- /**
- * Using the term's short-name and accession try to find it in Chado.
- *
- * @param $short_name
- * The term's ontolgy prefix (database short name)
- * @param $accession
- * The term's accession.
- * @return array|NULL
- */
- private function lookupTerm($short_name, $accession) {
-
- // Does the database already exist?
- if (!array_key_exists($short_name, $this->all_dbs)) {
- return NULL;
- }
- $db = $this->all_dbs[$short_name];
-
- // Check if the dbxref exists.
- $dbxref = new ChadoRecord('dbxref');
- $dbxref->setValues([
- 'db_id' => $db->db_id,
- 'accession' => $accession,
- ]);
- if (!$dbxref->find()) {
- return NULL;
- }
-
- // If the dbxref exists then see if it has a corresponding cvterm.
- $cvterm = new ChadoRecord('cvterm');
- $cvterm->setValues(['dbxref_id' => $dbxref->getID()]);
- if (!$cvterm->find()) {
- return NULL;
- }
-
- // Get the CV for this term.
- $cv = new ChadoRecord('cv');
- $cv->setValues(['cv_id' => $cvterm->getValue('cv_id')]);
- $cv->find();
-
- // Create a new stanza using the values of this cvterm.
- $stanza = [];
- $stanza['id'][0] = $short_name . ':' . $accession;
- $stanza['name'][0] = $cvterm->getValue('name');
- $stanza['def'][0] = $cvterm->getValue('definition');
- $stanza['namespace'][0] = $cv->getValue('name');
- $stanza['is_obsolete'][0] = $cvterm->getValue('is_obsolete') == 1 ? 'true' : '';
- $stanza['is_relationshiptype'][0] = '';
- $stanza['db_name'][0] = $db->name;
- $stanza['cv_name'][0] = $cv->getValue('name');
- return $stanza;
- }
- /**
- * Adds a term stanza from the OBO file to the cache for easier lookup.
- *
- * @param $stanza
- * The stanza from the OBO file for the term.
- * @throws Exception
- */
- private function cacheTermStanza($stanza, $type) {
- // Make sure we have defaults.
- if (!$this->default_namespace) {
- throw new Exception('Cannot cache terms without a default CV.' . print_r($stanza, TRUE));
- }
- if (!$this->default_db) {
- throw new Exception('Cannot cache terms without a default DB.' . print_r($stanza, TRUE));
- }
-
- $id = $stanza['id'][0];
-
- // First check if this term is already in the cache, if so then skip it.
- if ($this->getCachedTermStanza($id)) {
- return;
- }
-
- // Does this term have a database short name prefix in the ID (accession)?
- // If not then we'll add the default CV as the namespace. If it does and
- // the short name is not the default for this vocabulary then we'll look
- // it up.
- $matches = [];
- if (preg_match('/^(.+):(.+)$/', $id, $matches)) {
- $short_name = $matches[1];
- $accession = $matches[2];
-
- // If the term is borrowed then let's try to deal with it.
- if ($short_name != $this->default_db) {
-
- // First try to lookup the term and replace the stanza with the updated
- // details.
- $found = $this->lookupTerm($short_name, $accession);
- if ($found) {
- $stanza = $found;
- }
- // If we can't find the term in the database then do an EBI lookup.
- else {
- $stanza = $this->findEBITerm($id);
-
- // Make sure the DBs and CVs exist and are added to our cache.
- $this->addDB($stanza['db_name'][0]);
- $this->addCV($stanza['namespace'][0]);
- }
- }
- // If the term belongs to this OBO then let's set the 'db_name'.
- else {
- if (!array_key_exists('namespace', $stanza)) {
- $stanza['namespace'][0] = $this->default_namespace;
- }
- $stanza['db_name'][0] = $short_name;
- }
-
- // Make sure the db for this term is added to Chado. If it already is
- // then this function won't re-add it.
- $this->addDB($short_name);
- }
- // If there is no DB short name prefix for the id.
- else {
- if (!array_key_exists('namespace', $stanza)) {
- $stanza['namespace'][0] = $this->default_namespace;
- }
- $stanza['db_name'][0] = $this->default_db;
- }
-
- $stanza['is_relationshiptype'][0] = '';
- if ($type == 'Typedef') {
- $stanza['is_relationshiptype'][0] = 'true';
- }
-
- // The is_a field can have constructs like this: {is_inferred="true"}
- // We need to remove those if they exist.
- if (array_key_exists('is_a', $stanza)) {
- foreach ($stanza['is_a'] as $index => $is_a) {
- $stanza['is_a'][$index] = trim(preg_replace('/\{.+?\}/', '', $is_a));
- }
- }
- if (array_key_exists('relationship', $stanza)) {
- foreach ($stanza['relationship'] as $index => $relationship) {
- $stanza['relationship'][$index] = trim(preg_replace('/\{.+?\}/', '', $relationship));
- }
- }
-
- // Clean up any synonym definitions. We only handle the synonym in
- // quotes and the type.
- if (array_key_exists('synonym', $stanza)) {
- foreach ($stanza['synonym'] as $index => $synonym) {
- if (preg_match('/\"(.*?)\".*(EXACT|NARROW|BROAD|RELATED)/', $synonym, $matches)) {
- $stanza['synonym'][$index] = '"' . $matches[1] . '" ' . $matches[2];
- }
- }
- }
-
- // Now before saving, remove any duplicates. Sometimes the OBOs have
- // the same item duplicated in the stanza multiple times. This will
- // result in duplicate contraint violations in the tables. We can either
- // check on every insert if the record exists increasing loading time or
- // remove duplicates here.
- foreach ($stanza as $key => $values) {
- $stanza[$key] = array_unique($values);
- }
-
- // If we should use the cache_type is to cache in the tripal_obo_temp
- // table then handle that now.
- if ($this->cache_type == 'table') {
- // Add the term to the temp table.
- $values = [
- 'id' => $id,
- 'stanza' => base64_encode(serialize($stanza)),
- 'type' => $type,
- ];
- $success = chado_insert_record('tripal_obo_temp', $values);
- if (!$success) {
- throw new Exception("Cannot insert stanza into temporary table.");
- }
- return;
- }
-
- // Cache the term stanza
- $this->termStanzaCache['ids'][$id] = $stanza;
- $this->termStanzaCache['count'][$type]++;
- $this->termStanzaCache['types'][$type][] = $id;
-
- // Cache the term name so we don't have conflicts.
- $name = $stanza['name'][0];
- $this->term_names[$name] = 1;
-
- }
-
- /**
- * Returns the size of a given term type from the cache.
- * @param $type
- * The term type: Typedef, Term, etc.
- */
- private function getCacheSize($type) {
- if ($this->cache_type == 'table') {
- $sql = "
- SELECT count(*) as num_terms
- FROM {tripal_obo_temp}
- WHERE type = :type
- ";
- $result = chado_query($sql, [':type' => $type])->fetchObject();
- return $result->num_terms;
- }
- return $this->termStanzaCache['count'][$type];
- }
-
- /**
- * Retrieves all term IDs for a given type.
- *
- * If the cache is using the tripal_obo_temp table then it
- * returns an iterable Database handle.
- */
- private function getCachedTermStanzas($type) {
- if ($this->cache_type == 'table') {
- $sql = "SELECT id FROM {tripal_obo_temp} WHERE type = 'Typedef' ";
- $typedefs = chado_query($sql);
- return $typedefs;
- }
- return $this->termStanzaCache['types'][$type];
- }
-
- /**
- * Clear's the term cache.
- */
- private function clearTermStanzaCache() {
- if ($this->cache_type == 'table') {
- $sql = "DELETE FROM {tripal_obo_temp}";
- chado_query($sql);
- return;
- }
- $this->termStanzaCache = [
- 'ids' => [],
- 'count' => [
- 'Typedef' => 0,
- 'Term' => 0,
- 'Instance' => 0,
- ],
- 'types' => [
- 'Typedef' => [],
- 'Term' => [],
- 'Instance' => [],
- ],
- ];
- }
- /**
- * Adds the synonyms to a term
- *
- * @param $cvterm_id
- * The cvterm_id of the term to which the synonym will be added.
- * @param $synonym
- * The value of the 'synonym' line of the term stanza.
- *
- * @ingroup tripal_obo_loader
- */
- private function addSynonym($id, $cvterm_id, $synonym) {
- $def = $synonym;
- $syn_type = '';
- // Separate out the synonym definition and type (e.g. EXACT).
- $matches = [];
- if (preg_match('/\"(.*?)\".*(EXACT|NARROW|BROAD|RELATED)/', $synonym, $matches)) {
- $def = $matches[1];
- $syn_type = $matches[2];
- }
-
- // Get the syn type cvterm.
- if (!$syn_type) {
- $syn_type = 'exact';
- }
- $syn_type = $this->syn_types[$syn_type];
-
- // Now save the new synonym.
- $cvtermsynonym = new ChadoRecord('cvtermsynonym');
- $cvtermsynonym->setValues([
- 'cvterm_id' => $cvterm_id,
- 'synonym' => $def,
- 'type_id' => $syn_type->cvterm_id
- ]);
-
- // If the insert fails then catch the error and generate a more meaningful
- // message that helps with debugging.
- try {
- $cvtermsynonym->insert();
- }
- catch (Exception $e) {
- throw new Exception(t('Cannot add synonym, "!synonym" to term: !id. ' .
- 'ERROR: !error.',
- ['!synonym' => $def, '!id' => $id, '!error' => $e->getMessage()]));
- }
- }
- /**
- * Parse the OBO file and populate the templ loading table
- *
- * @param $file
- * The path on the file system where the ontology can be found
- * @param $header
- * An array passed by reference that will be populated with the header
- * information from the OBO file
- *
- * @ingroup tripal_obo_loader
- */
- private function parse($obo_file, &$header) {
- // Set to 1 if we are in the top header lines of the file.
- $in_header = TRUE;
- // Holds the full stanza for the term.
- $stanza = array();
- // Holds the default database for the term.
- $db_short_name = '';
- $line_num = 0;
- $num_read = 0;
- // The type of term: Typedef or Term (inside the [] brackets]
- $type = '';
-
- $filesize = filesize($obo_file);
- $this->setTotalItems($filesize);
- $this->setItemsHandled(0);
- $this->setInterval(5);
- // iterate through the lines in the OBO file and parse the stanzas
- $fh = fopen($obo_file, 'r');
- while ($line = fgets($fh)) {
- $line_num++;
- $size = drupal_strlen($line);
- $num_read += $size;
- $line = trim($line);
- $this->setItemsHandled($num_read);
- // remove newlines
- $line = rtrim($line);
- // remove any special characters that may be hiding
- $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
- // skip empty lines
- if (strcmp($line, '') == 0) {
- continue;
- }
- // Remove comments from end of lines.
- $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);
-
- // At the first stanza we're out of header.
- if (preg_match('/^\s*\[/', $line)) {
-
- // After parsing the header we need to get information about this OBO.
- if ($in_header == TRUE) {
- $this->setDefaults($header);
- $in_header = FALSE;
- }
-
- // Store the stanza we just finished reading.
- if (sizeof($stanza) > 0) {
- // If this term has a namespace then we want to keep track of it.
- if (array_key_exists('namespace', $stanza)) {
- $namespace = $stanza['namespace'][0];
- $cv = $this->all_cvs[$namespace];
- $this->obo_namespaces[$namespace] = $cv->cv_id;
- }
-
- // Before caching this stanza, check the term's name to
- // make sure it doesn't conflict. If it does we'll just
- // add the ID to the name to ensure it doesn't.
- if (array_key_exists($stanza['name'][0], $this->term_names)) {
- $new_name = $stanza['name'][0] . '(' . $stanza['id'][0] .')';
- $stanza['name'][0] = $stanza['name'][0];
- }
-
- $this->cacheTermStanza($stanza, $type);
-
- }
-
- // Get the stanza type: Term, Typedef or Instance
- $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
- // start fresh with a new array
- $stanza = array();
- continue;
- }
- // break apart the line into the tag and value but ignore any escaped colons
- preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
- $pair = explode(":", $line, 2);
- $tag = $pair[0];
- $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
- // if this is the ID line then get the database short name from the ID.
- $matches = array();
- if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
- $db_short_name = $matches[1];
- }
- $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
- $value = preg_replace("/\|-\|-\|/", "\:", $value);
- if ($in_header) {
- if (!array_key_exists($tag, $header)) {
- $header[$tag] = array();
- }
- $header[$tag][] = $value;
- }
- else {
- if (!array_key_exists($tag, $stanza)) {
- $stanza[$tag] = array();
- }
- $stanza[$tag][] = $value;
- }
- }
- // now add the last term in the file
- if (sizeof($stanza) > 0) {
- // If this term has a namespace then we want to keep track of it.
- if (array_key_exists('namespace', $stanza)) {
- $namespace = $stanza['namespace'][0];
- $cv = $this->all_cvs[$namespace];
- $this->obo_namespaces[$namespace] = $cv->cv_id;
- }
- $this->cacheTermStanza($stanza, $type);
- $this->setItemsHandled($num_read);
- }
-
- // Make sure there are CV records for all namespaces.
- $message = t('Found the following namespaces: !namespaces.',
- ['!namespaces' => implode(', ', array_keys($this->obo_namespaces))]);
- foreach ($this->obo_namespaces as $namespace => $cv_id) {
- $this->addCV($namespace);
- }
- $this->logMessage($message);
- }
-
- /**
- * Iterates through all of the cached terms and caches any relationships
- */
- private function cacheRelationships() {
-
- // Now that we have all of the terms parsed and loaded into the cache,
- // lets run through them one more time cache any terms in relationships
- // as well.
- $terms = $this->getCachedTermStanzas('Term');
- $count = $this->getCacheSize('Term');
- $this->setTotalItems($count);
- $this->setItemsHandled(0);
- $this->setInterval(25);
-
- // Iterate through the terms.
- $i = 1;
- foreach ($terms as $t) {
-
- // TODO: it would be better if we had a term iterator so that we
- // don't have to distinguish here between the table vs memory cache type.
- if ($this->cache_type == 'table') {
- $stanza = unserialize(base64_decode($t->stanza));
- }
- else {
- $stanza = $this->termStanzaCache['ids'][$t];
- }
-
- // Check if this stanza has an is_a relationship that needs lookup.
- if (array_key_exists('is_a', $stanza)) {
- foreach ($stanza['is_a'] as $object_term) {
- $rstanza = [];
- $rstanza['id'][] = $object_term;
- $this->cacheTermStanza($rstanza, 'Term');
- }
- }
-
- // Check if this stanza has any additional relationships for lookup.
- if (array_key_exists('relationship', $stanza)) {
- foreach ($stanza['relationship'] as $value) {
-
- // Get the relationship term and the object term
- $rel_term = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
- $object_term = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
-
- $rstanza = [];
- $rstanza['id'][] = $rel_term;
- $this->cacheTermStanza($rstanza, 'Typedef');
-
- $rstanza = [];
- $rstanza['id'][] = $object_term;
- $this->cacheTermStanza($rstanza, 'Term');
- }
- }
- }
- $this->setItemsHandled($i++);
-
- // Last of all, we need to add the "is_a" relationship It's part of the
- // OBO specification as a built-in relationship but not all vocabularies
- // include that term.
- if (!$this->getCachedTermStanza('is_a')) {
- $stanza = [];
- $stanza['id'][0] = 'is_a';
- $stanza['name'][0] = 'is_a';
- $stanza['namespace'][0] = $this->default_namespace;
- $stanza['db_name'][0] = $this->default_db;
- $this->cacheTermStanza($stanza, 'Typedef');
- }
- }
-
-
- /**
- * Adds a property to the cvterm indicating it belongs to a subset.
- * @param $cvterm_id
- * The cvterm_id of the term to which the subset will be added.
- * @param $subset
- * The name of the subset.
- */
- private function addSubset($id, $cvterm_id, $subset) {
- $cvtermprop = new ChadoRecord('cvtermprop');
- $cvtermprop->setValues([
- 'cvterm_id' => $cvterm_id,
- 'type_id' => $this->used_terms['NCIT:C25693'],
- 'value' => $subset
- ]);
- // If the insert fails then catch the error and generate a more meaningful
- // message that helps with debugging.
- try {
- $cvtermprop->insert();
- }
- catch (Exception $e) {
- throw new Exception(t('Cannot add subset, "!subset" to term: !id. ' .
- 'ERROR: !error.',
- ['!subset' => $subset, '!id' => $id, '!error' => $e->getMessage()]));
- }
- }
-
- /**
- * Adds a database to Chado if it doesn't exist.
- *
- * @param $dbname
- * The name of the database to add.
- *
- * @return
- * A Chado database object.
- */
- private function addDB($dbname) {
- // Add the database if it doesn't exist.
- $db = NULL;
- if (array_key_exists($dbname, $this->all_dbs)) {
- $db = $this->all_dbs[$dbname];
- }
- else {
- // If it's not in the cache we can assume it doesn't exist and insert.
- $db = new ChadoRecord('db');
- $db->setValues(['name' => $dbname]);
- $db->insert();
- $db = (object) $db->getValues();
- $this->all_dbs[$dbname] = $db;
- }
- return $db;
- }
-
- /**
- * Adds a vocabulary to Chado if it doesn't exist.
- *
- * @param $cvname
- * The name of the vocabulary to add.
- *
- * @return
- * A Chado cv object.
- */
- private function addCV($cvname) {
- // TODO: we need to get the description and title from EBI for these
- // ontology so that we can put something in the proper fields when
- // adding a new CV or DB.
-
- // Add the CV record if it doesn't exist.
- $cv = NULL;
- if (array_key_exists($cvname, $this->all_cvs)) {
- $cv = $this->all_dbs[$cvname];
- }
- else {
- // If it's not in the cache we can assume it doesn't exist and insert.
- $cv = new ChadoRecord('cv');
- $cv->setValues(['name' => $cvname]);
- $cv->insert();
- $cv = (object) $cv->getValues();
- $this->all_cvs[$cvname] = $cv;
- $this->obo_namespaces[$cvname] = $cv->cv_id;
- }
-
- return $cv;
- }
-
- /**
- * Indicates if the term belongs to this OBO or if it was borrowed
- * .
- * @param $stanza
- */
- private function isTermBorrowed($stanza) {
- $namespace = $stanza['namespace'][0];
- if (array_key_exists($namespace, $this->obo_namespaces)) {
- return FALSE;
- }
- return TRUE;
- }
-
- /**
- * Adds an alternative ID
- *
- * @param $cvterm_id
- * The cvterm_id of the term to which the synonym will be added.
- * @param $alt_id
- * The cross refernce. It should be of the form from the OBO specification
- *
- * @ingroup tripal_obo_loader
- */
- private function addAltID($id, $cvterm_id, $alt_id) {
-
- $dbname = '';
- $accession = '';
- $matches = [];
- if (preg_match('/^(.+?):(.*)$/', $alt_id, $matches)) {
- $dbname = $matches[1];
- $accession = $matches[2];
- }
-
- if (!$accession) {
- throw new Exception("Cannot add an Alt ID without an accession: '$alt_id'");
- }
- // Add the database if it doesn't exist.
- $db = $this->addDB($dbname);
- $db_id = $db->db_id;
-
- // Now add the dbxref if it doesn't alredy exist
- $dbxref = new ChadoRecord('dbxref');
- $dbxref->setValues([
- 'db_id' => $db_id,
- 'accession' => $accession
- ]);
- if (!$dbxref->find()) {
- $dbxref->insert();
- }
-
- // Now add the cvterm_dbxref record.
- $cvterm_dbxref = new ChadoRecord('cvterm_dbxref');
- $cvterm_dbxref->setValues([
- 'cvterm_id' => $cvterm_id,
- 'dbxref_id' => $dbxref->getID(),
- ]);
- $cvterm_dbxref->insert();
- }
- /**
- * Adds a database reference to a cvterm
- *
- * @param $cvterm_id
- * The cvterm_id of the term to which the synonym will be added.
- * @param xref
- * The cross refernce. It should be of the form from the OBO specification
- *
- * @ingroup tripal_obo_loader
- */
- private function addXref($id, $cvterm_id, $xref) {
- $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
- $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
- $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
- $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
- if (!$accession) {
- throw new Exception("Cannot add an xref without an accession: '$xref'");
- }
- // If the xref is a database link then skip those for now.
- if (strcmp($dbname, 'http') == 0) {
- return;
- }
- // Add the database if it doesn't exist.
- $db = $this->addDB($dbname);
- $db_id = $db->db_id;
- // Now add the dbxref if it doesn't alredy exist
- $dbxref = new ChadoRecord('dbxref');
- $dbxref->setValues([
- 'db_id' => $db_id,
- 'accession' => $accession
- ]);
- if (!$dbxref->find()) {
- $dbxref->insert();
- }
-
- // Now add the cvterm_dbxref record.
- $cvterm_dbxref = new ChadoRecord('cvterm_dbxref');
- $cvterm_dbxref->setValues([
- 'cvterm_id' => $cvterm_id,
- 'dbxref_id' => $dbxref->getID(),
- ]);
- $cvterm_dbxref->insert();
- }
- /**
- * Adds a comment to a cvterm.
- *
- * @param $cvterm_id
- * A cvterm_id of the term to which properties will be added
- * @param $comment
- * The comment to add to the cvterm.
- * @param rank
- * The rank of the comment
- *
- * @ingroup tripal_obo_loader
- */
- private function addComment($id, $cvterm_id, $comment, $rank) {
- // Get the comment type id.
- $comment_type_id = $this->used_terms['rdfs:comment'];
-
- // Add the comment as a property of the cvterm.
- $cvtermprop = new ChadoRecord('cvtermprop');
- $cvtermprop->setValues([
- 'cvterm_id' => $cvterm_id,
- 'type_id' => $comment_type_id,
- 'value' => $comment,
- 'rank' => $rank,
- ]);
-
- // If the insert fails then catch the error and generate a more meaningful
- // message that helps with debugging.
- try {
- $cvtermprop->insert();
- }
- catch (Exception $e) {
- throw new Exception(t('Cannot add comment, "!comment" to term: !id. ' .
- 'ERROR: !error.',
- ['!comment' => $comment, '!id' => $id, '!error' => $e->getMessage()]));
- }
- }
- /**
- * API call to Ontology Lookup Service provided by
- * https://www.ebi.ac.uk/ols/docs/api#resources-terms
- *
- * @param accession
- * Accession term for query
- * @param type_of_search
- * Either ontology, term, query, or query-non-local
- *
- * @ingroup tripal_obo_loader
- */
- private function oboEbiLookup($accession, $type_of_search) {
- //Grab just the ontology from the $accession.
- $parts = explode(':', $accession);
- $ontology = strtolower($parts[0]);
- $ontology = preg_replace('/\s+/', '', $ontology);
- if ($type_of_search == 'ontology') {
- $options = array();
- $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology;
- $response = drupal_http_request($full_url, $options);
- if(!empty($response)){
- $response = drupal_json_decode($response->data);
- }
- }
- elseif ($type_of_search == 'term') {
- //The IRI of the terms, this value must be double URL encoded
- $iri = urlencode(urlencode("http://purl.obolibrary.org/obo/" . str_replace(':' , '_', $accession)));
- $options = array();
- $full_url = 'http://www.ebi.ac.uk/ols/api/ontologies/' . $ontology . '/' . 'terms/' . $iri;
- $response = drupal_http_request($full_url, $options);
- if(!empty($response)){
- $response = drupal_json_decode($response->data);
- }
- }
- elseif($type_of_search == 'query') {
- $options = array();
- $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id&local=true';
- $response = drupal_http_request($full_url, $options);
- if(!empty($response)){
- $response = drupal_json_decode($response->data);
- }
- }
- elseif($type_of_search == 'query-non-local') {
- $options = array();
- $full_url = 'http://www.ebi.ac.uk/ols/api/search?q=' . $accession . '&queryFields=obo_id';
- $response = drupal_http_request($full_url, $options);
- if(!empty($response)){
- $response = drupal_json_decode($response->data);
- }
- }
- return $response;
- }
- }
- /**
- * Ajax callback for the OBOImporter::form() function.
- */
- function tripal_cv_obo_form_ajax_callback($form, $form_state) {
- return $form['obo_existing'];
- }
|