123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740 |
- <?php
- function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
-
- $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
- $obo = db_fetch_object(db_query($sql, $obo_id));
-
- if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
- tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
- }
-
- else {
-
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
- if (file_exists($dfile)) {
- tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
- }
-
-
- else {
- if (file_exists($obo->path)) {
- tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
- }
- else {
- print "ERROR: counld not find OBO file: '$obo->path'\n";
- }
- }
- }
- }
- function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
- $newcvs = array();
- tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
- if ($is_new) {
- tripal_cv_load_obo_add_ref($obo_name, $file);
- }
-
- tripal_cv_load_update_cvtermpath($newcvs, $jobid);
- print "Ontology Sucessfully loaded!\n";
- }
- function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
- $newcvs = array();
-
- $temp = tempnam(sys_get_temp_dir(), 'obo_');
- print "Downloading URL $url, saving to $temp\n";
- $url_fh = fopen($url, "r");
- $obo_fh = fopen($temp, "w");
- if (!$url_fh) {
- tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
- " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
- " specify the location of the file on your server.");
- }
- while (!feof($url_fh)) {
- fwrite($obo_fh, fread($url_fh, 255), 255);
- }
- fclose($url_fh);
- fclose($obo_fh);
-
- tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
-
- unlink($temp);
- if ($is_new) {
- tripal_cv_load_obo_add_ref($obo_name, $url);
- }
-
- tripal_cv_load_update_cvtermpath($newcvs, $jobid);
- print "Ontology Sucessfully loaded!\n";
- }
- function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
- print "\nUpdating cvtermpath table. This may take a while...\n";
- foreach ($newcvs as $namespace => $cvid) {
- tripal_cv_update_cvtermpath($cvid, $jobid);
- }
- }
- function tripal_cv_load_obo_add_ref($name, $path) {
- $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
- db_query($isql, $name, $path);
- }
- function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
-
-
-
-
- $connection = tripal_db_persistent_chado();
- $header = array();
- $obo = array();
- print "Opening File $file\n";
-
- if (!tripal_db_add_db('internal')) {
- tripal_cv_obo_quiterror("Cannot add 'internal' database");
- }
- if (!tripal_db_add_db('_global')) {
- tripal_cv_obo_quiterror("Cannot add '_global' database");
- }
-
- $default_db = tripal_cv_obo_parse($file, $obo, $header);
-
- $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
- if (!$defaultcv) {
- tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
- }
- $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
-
- $typedefs = $obo['Typedef'];
- foreach ($typedefs as $typedef) {
- $t = array();
- $t['id'] = $typedef['id'][0];
- $t['name'] = $typedef['name'][0];
- $t['def'] = $typedef['def'][0];
- $t['subset'] = $typedef['subset'][0];
- tripal_cv_obo_process_term($t, $defaultcv->name, $obo, 1, $newcvs, $default_db);
- }
-
-
- $terms = $obo['Term'];
- if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
- tripal_cv_obo_quiterror('Cannot add terms from this ontology');
- }
- return;
- }
- function tripal_cv_obo_quiterror($message) {
- watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);;
- exit;
- }
- function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
- $i = 0;
- $count = sizeof($terms);
- $interval = intval($count * 0.01);
- if ($interval > 1) {
- $interval = 1;
- }
-
- print "Loading terms...\n";
- foreach ($terms as $term) {
-
- if ($jobid and $i % $interval == 0) {
- $complete = ($i / $count) * 100;
- tripal_job_set_progress($jobid, intval($complete));
- printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
- }
-
-
- $t = array();
- $t['id'] = $term['id'][0];
- $t['name'] = $term['name'][0];
- $t['def'] = $term['def'][0];
- $t['subset'] = $term['subset'][0];
- $t['namespace'] = $term['namespace'][0];
- $t['is_obsolete'] = $term['is_obsolete'][0];
- if (!tripal_cv_obo_process_term($t, $defaultcv, $obo, 0, $newcvs, $default_db)) {
- tripal_cv_obo_quiterror("Failed to process terms from the ontology");
- }
- $i++;
- }
-
-
- if ($jobid) {
- $complete = ($i / $count) * 100;
- tripal_job_set_progress($jobid, intval($complete));
- printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
- }
-
- return 1;
- }
- function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
-
- $cvterm = tripal_cv_add_cvterm($term, $defaultcv, $is_relationship, 1, $default_db);
- if (!$cvterm) {
- tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
- }
- if ($term['namespace']) {
- $newcvs[$term['namespace']] = $cvterm->cv_id;
- }
-
- if (isset($term['is_anonymous'])) {
-
- }
- if (isset($term['alt_id'])) {
- foreach ($term['alt_id'] as $alt_id) {
- if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
- tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
- }
- }
- }
- if (isset($term['subset'])) {
-
- }
-
- if (isset($term['synonym'])) {
- if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
- tripal_cv_obo_quiterror("Cannot add synonyms");
- }
- }
-
-
- if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
- if (isset($term['exact_synonym'])) {
- foreach ($term['exact_synonym'] as $synonym) {
- $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
- $term['synonym'][] = $new;
- }
- }
- if (isset($term['narrow_synonym'])) {
- foreach ($term['narrow_synonym'] as $synonym) {
- $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
- $term['synonym'][] = $new;
- }
- }
- if (isset($term['broad_synonym'])) {
- foreach ($term['broad_synonym'] as $synonym) {
- $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
- $term['synonym'][] = $new;
- }
- }
- if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
- tripal_cv_obo_quiterror("Cannot add/update synonyms");
- }
- }
-
- if (isset($term['comment'])) {
- $comments = $term['comment'];
- $j = 0;
- foreach ($comments as $comment) {
- if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
- tripal_cv_obo_quiterror("Cannot add/update cvterm property");
- }
- $j++;
- }
- }
-
- if (isset($term['xref'])) {
- foreach ($term['xref'] as $xref) {
- if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
- tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
- }
- }
- }
- if (isset($term['xref_analog'])) {
- foreach ($term['xref_analog'] as $xref) {
- if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
- tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
- }
- }
- }
- if (isset($term['xref_unk'])) {
- foreach ($term['xref_unk'] as $xref) {
- if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
- tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
- }
- }
- }
-
- if (isset($term['is_a'])) {
- foreach ($term['is_a'] as $is_a) {
- if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship)) {
- tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
- }
- }
- }
- if (isset($term['intersection_of'])) {
-
- }
- if (isset($term['union_of'])) {
-
- }
- if (isset($term['disjoint_from'])) {
-
- }
- if (isset($term['relationship'])) {
- foreach ($term['relationship'] as $value) {
- $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
- $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
- if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship)) {
- tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
- }
- }
- }
- if (isset($term['replaced_by'])) {
-
- }
- if (isset($term['consider'])) {
-
- }
- if (isset($term['use_term'])) {
-
- }
- if (isset($term['builtin'])) {
-
- }
- return 1;
- }
- function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $objname, $object_is_relationship = 0) {
-
- $term = array(
- 'name' => array($rel),
- 'id' => array($rel),
- 'definition' => array(''),
- 'is_obsolete' => array(0),
- );
- $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0);
- if (!$relcvterm) {
- tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
- }
-
- $objterm = tripal_cv_obo_get_term($obo, $objname);
- if (!$objterm) {
- tripal_cv_obo_quiterror("Could not find object term $objname\n");
- }
- $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1);
- if (!$objcvterm) {
- tripal_cv_obo_quiterror("Cannot add/find cvterm");
- }
-
- $values = array(
- 'type_id' => $relcvterm->cvterm_id,
- 'subject_id' => $cvterm->cvterm_id,
- 'object_id' => $objcvterm->cvterm_id
- );
- $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
- $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
- if (count($result) == 0) {
- $options = array('statement_name' => 'ins_cvtermrelationship_tysuob');
- $sql = "INSERT INTO {cvterm_relationship} ".
- "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
- $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
- if (!$success) {
- tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
- }
- }
- return TRUE;
- }
- function tripal_cv_obo_get_term($obo, $id) {
- foreach ($obo as $type) {
- foreach ($type as $term) {
- $accession = $term['id'][0];
- if (strcmp($accession, $id)==0) {
- return $term;
- }
- }
- }
- return FALSE;
- }
- function tripal_cv_obo_add_synonyms($term, $cvterm) {
-
- $syncv = tripal_cv_add_cv('synonym_type');
-
- if (isset($term['synonym'])) {
- foreach ($term['synonym'] as $synonym) {
-
-
- $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
- $type = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
-
- $values = array(
- 'name' => $type,
- 'cv_id' => array(
- 'name' => 'synonym_type',
- ),
- );
- $options = array('statement_name' => 'sel_cvterm_nacv');
- $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
-
- if (count($results) == 0) {
-
- $term = array(
- 'name' => array($type),
- 'id' => array("internal:$type"),
- 'definition' => array(''),
- 'is_obsolete' => array(0),
- );
- $syntype = tripal_cv_add_cvterm($term, $syncv, 0, 1);
- if (!$syntype) {
- tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
- }
- }
-
- $values = array(
- 'cvterm_id' => $cvterm->cvterm_id,
- 'synonym' => $def
- );
- $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
- $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
- if (count($results) == 0) {
- $values = array(
- 'cvterm_id' => $cvterm->cvterm_id,
- 'synonym' => $def,
- 'type_id' => $syntype->cvterm_id
- );
- $options = array('statement_name' => 'ins_cvtermsynonym_cvsy');
- $success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
- if (!$success) {
- tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
- }
- }
-
-
-
-
-
- }
- }
- return TRUE;
- }
- function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
- $i = 0;
- $in_header = 1;
- $stanza = array();
- $default_db = '_global';
-
- $fh = fopen($obo_file, 'r');
- while ($line = fgets($fh)) {
- $i++;
-
- $line = rtrim($line);
-
- $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
-
- if (strcmp($line, '') == 0) {
- continue;
- }
-
- $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);
- if (preg_match('/^\s*\[/', $line)) {
- $in_header = 0;
-
- if (sizeof($stanza) > 0) {
- if (!isset($obo[$type])) {
- $obo[$type] = array();
- }
- if (!isset($obo[$type][$stanza['id'][0]])) {
- $obo[$type][$stanza['id'][0]] = $stanza;
- }
- else {
- array_merge($obo[$type][$stanza['id'][0]], $stanza);
- }
- }
-
- $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
-
- $stanza = array();
- continue;
- }
-
- preg_replace("/\\:/", "|-|-|", $line);
- $pair = explode(":", $line, 2);
- $tag = $pair[0];
- $value = ltrim(rtrim($pair[1]));
-
-
- $matches = array();
- if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
- $default_db = $matches[1];
- }
-
- $tag = preg_replace("/\|-\|-\|/", "\:", $tag);
- $value = preg_replace("/\|-\|-\|/", "\:", $value);
- if ($in_header) {
- if (!isset($header[$tag])) {
- $header[$tag] = array();
- }
- $header[$tag][] = $value;
- }
- else {
- if (!isset($stanza[$tag])) {
- $stanza[$tag] = array();
- }
- $stanza[$tag][] = $value;
- }
- }
-
- if (sizeof($stanza) > 0) {
- if (!isset($obo[$type])) {
- $obo[$type] = array();
- }
- if (!isset($obo[$type][$stanza['id'][0]])) {
- $obo[$type][$stanza['id'][0]] = $stanza;
- }
- else {
- array_merge($obo[$type][$stanza['id'][0]], $stanza);
- }
- }
- return $default_db;
- }
- function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
- $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
- $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
- $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
- $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
- if (!$accession) {
- tripal_cv_obo_quiterror();
- watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
- return FALSE;
- }
-
- if (strcmp($dbname, 'http') == 0) {
- $accession = $xref;
- $dbname = 'URL';
- }
-
- $db = tripal_db_add_db($dbname);
- if (!$db) {
- tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
- }
-
- $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
- if (!$dbxref) {
- tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
- }
-
- $sql = "SELECT * from {cvterm_dbxref} WHERE cvterm_id = %d and dbxref_id = %d";
- if (!db_fetch_object(db_query($sql, $cvterm->cvterm_id, $dbxref->dbxref_id))) {
- $sql = "INSERT INTO {cvterm_dbxref} (cvterm_id,dbxref_id)".
- "VALUES (%d,%d)";
- if (!db_query($sql, $cvterm->cvterm_id, $dbxref->dbxref_id)) {
- tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
- }
- }
- return TRUE;
- }
- function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
-
- $cv = tripal_cv_add_cv('cvterm_property_type', '');
- if (!$cv) {
- tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
- }
-
- $sql = "
- SELECT *
- FROM {cvterm} CVT INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
- WHERE CVT.name = '%s' and CV.name = '%s'
- ";
- $cvproptype = db_fetch_object(db_query($sql, $property, 'cvterm_property_type'));
- if (!$cvproptype) {
- $term = array(
- 'name' => array($property),
- 'id' => array("internal:$property"),
- 'definition' => array(''),
- 'is_obsolete' => array(0),
- );
- $cvproptype = tripal_cv_add_cvterm($term, $cv, 0, 0);
- if (!$cvproptype) {
- tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
- }
- }
-
- if ($rank == 0) {
- $sql = "DELETE FROM {cvtermprop} WHERE cvterm_id = %d";
- db_query($sql, $cvterm->cvterm_id);
- }
-
- $sql = "INSERT INTO {cvtermprop} (cvterm_id,type_id,value,rank) ".
- "VALUES (%d, %d, '%s',%d)";
- if (!db_query($sql, $cvterm->cvterm_id, $cvproptype->cvterm_id, $value, $rank)) {
- tripal_cv_obo_quiterror("Could not add property $property for term\n");
- }
- return TRUE;
- }
- function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
-
- $dbxsql = "SELECT dbxref_id FROM {dbxref} WHERE db_id = %d and accession = '%s'";
- $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
- if (!$dbxref) {
- $sql = "
- INSERT INTO {dbxref} (db_id, accession, version, description)
- VALUES (%d,'%s','%s','%s')
- ";
- if (!db_query($sql, $db_id, $accession, $version, $description)) {
- tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
- }
- print "Added Dbxref accession: $accession\n";
- $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
- }
- return $dbxref;
- }
|