|
@@ -127,15 +127,37 @@ function tripal_cv_load_obo_add_ref($name, $path) {
|
|
|
*/
|
|
|
function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
|
|
|
|
|
|
- // we need to get a persistent connection. If one exists this function
|
|
|
- // will not recreate it, but if not it will create one and store it in
|
|
|
- // a Drupal variable for reuse later.
|
|
|
- $connection = tripal_db_persistent_chado();
|
|
|
-
|
|
|
$header = array();
|
|
|
- $obo = array();
|
|
|
+
|
|
|
+ // make sure our temporary table exists
|
|
|
+ $ret = array();
|
|
|
+ if (!db_table_exists('tripal_obo_temp')) {
|
|
|
+ $schema = tripal_cv_get_custom_tables('tripal_obo_temp');
|
|
|
+ $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
|
|
|
+ if (!$success) {
|
|
|
+ watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // empty the temp table
|
|
|
+ $sql = "DELETE FROM tripal_obo_temp";
|
|
|
+ chado_query($sql);
|
|
|
|
|
|
- print "Opening File $file\n";
|
|
|
+ // get a persistent connection
|
|
|
+ $connection = tripal_db_persistent_chado();
|
|
|
+ if (!$connection) {
|
|
|
+ print "A persistant connection was not obtained. Loading will be slow\n";
|
|
|
+ }
|
|
|
+
|
|
|
+ // if we cannot get a connection then let the user know the loading will be slow
|
|
|
+ tripal_db_start_transaction();
|
|
|
+ if ($connection) {
|
|
|
+ print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
|
|
|
+ "If the load fails or is terminated prematurely then the entire set of \n" .
|
|
|
+ "insertions/updates is rolled back and will not be found in the database\n\n";
|
|
|
+ }
|
|
|
+
|
|
|
+ print "Step 1: Preloading File $file\n";
|
|
|
|
|
|
// make sure we have an 'internal' and a '_global' database
|
|
|
if (!tripal_db_add_db('internal')) {
|
|
@@ -146,7 +168,7 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
|
|
|
}
|
|
|
|
|
|
// parse the obo file
|
|
|
- $default_db = tripal_cv_obo_parse($file, $obo, $header);
|
|
|
+ $default_db = tripal_cv_obo_parse($file, $header, $jobid);
|
|
|
|
|
|
// add the CV for this ontology to the database
|
|
|
$defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
|
|
@@ -156,18 +178,24 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
|
|
|
$newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
|
|
|
|
|
|
// add any typedefs to the vocabulary first
|
|
|
-
|
|
|
- $typedefs = $obo['Typedef'];
|
|
|
- foreach ($typedefs as $typedef) {
|
|
|
- tripal_cv_obo_process_term($typedef, $defaultcv->name, $obo, 1, $newcvs, $default_db);
|
|
|
+ $sql = "
|
|
|
+ SELECT * FROM tripal_obo_temp
|
|
|
+ WHERE type = 'Typedef'
|
|
|
+ ";
|
|
|
+ $typedefs = chado_query($sql);
|
|
|
+ while ($typedef = db_fetch_object($typedefs)) {
|
|
|
+ $term = unserialize(base64_decode($typedef->stanza));
|
|
|
+ tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// next add terms to the vocabulary
|
|
|
- $terms = $obo['Term'];
|
|
|
- if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
|
|
|
+ print "\nStep 2: Loading terms...\n";
|
|
|
+ if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) {
|
|
|
tripal_cv_obo_quiterror('Cannot add terms from this ontology');
|
|
|
}
|
|
|
|
|
|
+ // transaction is complete
|
|
|
+ tripal_db_commit_transaction();
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -186,37 +214,36 @@ function tripal_cv_obo_quiterror($message) {
|
|
|
*
|
|
|
* @ingroup tripal_obo_loader
|
|
|
*/
|
|
|
-function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
|
|
|
+function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
|
|
|
|
|
|
$i = 0;
|
|
|
- $count = count($terms);
|
|
|
-
|
|
|
- // if the number of terms is zero then simply return.
|
|
|
- // this can happen when an OBO file simply has typedef
|
|
|
- // entries and no actual terms.
|
|
|
- if($count == 0) {
|
|
|
- return 1;
|
|
|
- }
|
|
|
+
|
|
|
+ // iterate through each term from the OBO file and add it
|
|
|
+ $sql = "
|
|
|
+ SELECT * FROM tripal_obo_temp
|
|
|
+ WHERE type = 'Term'
|
|
|
+ ORDER BY id
|
|
|
+ ";
|
|
|
+ $terms = chado_query($sql);
|
|
|
+ $count = pg_num_rows($terms);
|
|
|
|
|
|
// calculate the interval for updates
|
|
|
$interval = intval($count * 0.0001);
|
|
|
if ($interval < 1) {
|
|
|
$interval = 1;
|
|
|
}
|
|
|
-
|
|
|
- // iterate through each term from the OBO file and add it
|
|
|
- print "Loading terms...\n";
|
|
|
- foreach ($terms as $term) {
|
|
|
-
|
|
|
- // update the job status every 1% terms
|
|
|
+ while($t = db_fetch_object($terms)) {
|
|
|
+ $term = unserialize(base64_decode($t->stanza));
|
|
|
+
|
|
|
+ // update the job status every interval
|
|
|
if ($jobid and $i % $interval == 0) {
|
|
|
- $complete = ($i / $count) * 100;
|
|
|
- tripal_job_set_progress($jobid, intval($complete));
|
|
|
- printf("%d of %d records. (%0.2f%%) memory: %s bytes\r", $i, $count, $complete, number_format(memory_get_usage()));
|
|
|
+ $complete = ($i / $count) * 50;
|
|
|
+ tripal_job_set_progress($jobid + 50, intval($complete));
|
|
|
+ printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
|
|
|
}
|
|
|
|
|
|
// add/update this term
|
|
|
- if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs, $default_db)) {
|
|
|
+ if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) {
|
|
|
tripal_cv_obo_quiterror("Failed to process terms from the ontology");
|
|
|
}
|
|
|
|
|
@@ -225,9 +252,9 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
|
|
|
|
|
|
// set the final status
|
|
|
if ($jobid) {
|
|
|
- $complete = ($i / $count) * 100;
|
|
|
- tripal_job_set_progress($jobid, intval($complete));
|
|
|
- printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
|
|
|
+ $complete = ($i / $count) * 50;
|
|
|
+ tripal_job_set_progress($jobid + 50, intval($complete));
|
|
|
+ printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
|
|
|
}
|
|
|
|
|
|
return 1;
|
|
@@ -237,39 +264,41 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
|
|
|
*
|
|
|
* @ingroup tripal_obo_loader
|
|
|
*/
|
|
|
-function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
|
|
|
+function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
|
|
|
+
|
|
|
// construct the term array for sending to the tripal_cv_add_cvterm function
|
|
|
// for adding a new cvterm
|
|
|
$t = array();
|
|
|
- $t['id'] = $term['id'][0];
|
|
|
- $t['name'] = $term['name'][0];
|
|
|
- $t['def'] = $term['def'][0];
|
|
|
- if (isset($term['subset'])) {
|
|
|
- $t['subset'] = $term['subset'][0];
|
|
|
+ $t['id'] = $term['id'][0];
|
|
|
+ $t['name'] = $term['name'][0];
|
|
|
+ if (array_key_exists('def', $term)) {
|
|
|
+ $t['def'] = $term['def'][0];
|
|
|
+ }
|
|
|
+ if (array_key_exists('subset', $term)) {
|
|
|
+ $t['subset'] = $term['subset'][0];
|
|
|
}
|
|
|
- if (isset($term['namespace'])) {
|
|
|
- $t['namespace'] = $term['namespace'][0];
|
|
|
+ if (array_key_exists('namespace', $term)) {
|
|
|
+ $t['namespace'] = $term['namespace'][0];
|
|
|
}
|
|
|
- if (isset($term['is_obsolete'])) {
|
|
|
+ if (array_key_exists('is_obsolete', $term)) {
|
|
|
$t['is_obsolete'] = $term['is_obsolete'][0];
|
|
|
}
|
|
|
|
|
|
// add the cvterm
|
|
|
- $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
|
|
|
+ $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
|
|
|
if (!$cvterm) {
|
|
|
tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- if (isset($term['namespace'])) {
|
|
|
+ }
|
|
|
+
|
|
|
+ if (array_key_exists('namespace', $term)) {
|
|
|
$newcvs[$term['namespace'][0]] = $cvterm->cv_id;
|
|
|
}
|
|
|
|
|
|
// now handle other properites
|
|
|
- if (isset($term['is_anonymous'])) {
|
|
|
+ if (array_key_exists('is_anonymous', $term)) {
|
|
|
//print "WARNING: unhandled tag: is_anonymous\n";
|
|
|
}
|
|
|
- if (isset($term['alt_id'])) {
|
|
|
+ if (array_key_exists('alt_id', $term)) {
|
|
|
foreach ($term['alt_id'] as $alt_id) {
|
|
|
if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
|
|
@@ -277,11 +306,11 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (isset($term['subset'])) {
|
|
|
+ if (array_key_exists('subset', $term)) {
|
|
|
//print "WARNING: unhandled tag: subset\n";
|
|
|
}
|
|
|
// add synonyms for this cvterm
|
|
|
- if (isset($term['synonym'])) {
|
|
|
+ if (array_key_exists('synonym', $term)) {
|
|
|
if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add synonyms");
|
|
|
}
|
|
@@ -289,20 +318,20 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
|
|
|
// reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
|
|
|
// types to be of the v1.2 standard
|
|
|
- if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
|
|
|
- if (isset($term['exact_synonym'])) {
|
|
|
+ if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
|
|
|
+ if (array_key_exists('exact_synonym', $term)) {
|
|
|
foreach ($term['exact_synonym'] as $synonym) {
|
|
|
$new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
|
|
|
$term['synonym'][] = $new;
|
|
|
}
|
|
|
}
|
|
|
- if (isset($term['narrow_synonym'])) {
|
|
|
+ if (array_key_exists('narrow_synonym', $term)) {
|
|
|
foreach ($term['narrow_synonym'] as $synonym) {
|
|
|
$new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
|
|
|
$term['synonym'][] = $new;
|
|
|
}
|
|
|
}
|
|
|
- if (isset($term['broad_synonym'])) {
|
|
|
+ if (array_key_exists('broad_synonym', $term)) {
|
|
|
foreach ($term['broad_synonym'] as $synonym) {
|
|
|
$new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
|
|
|
$term['synonym'][] = $new;
|
|
@@ -315,7 +344,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
}
|
|
|
|
|
|
// add the comment to the cvtermprop table
|
|
|
- if (isset($term['comment'])) {
|
|
|
+ if (array_key_exists('comment', $term)) {
|
|
|
$comments = $term['comment'];
|
|
|
$j = 0;
|
|
|
foreach ($comments as $comment) {
|
|
@@ -325,9 +354,9 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
$j++;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// add any other external dbxrefs
|
|
|
- if (isset($term['xref'])) {
|
|
|
+ if (array_key_exists('xref', $term)) {
|
|
|
foreach ($term['xref'] as $xref) {
|
|
|
if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
|
|
@@ -335,14 +364,14 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (isset($term['xref_analog'])) {
|
|
|
+ if (array_key_exists('xref_analog', $term)) {
|
|
|
foreach ($term['xref_analog'] as $xref) {
|
|
|
if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (isset($term['xref_unk'])) {
|
|
|
+ if (array_key_exists('xref_unk', $term)) {
|
|
|
foreach ($term['xref_unk'] as $xref) {
|
|
|
if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
|
|
@@ -351,42 +380,42 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
}
|
|
|
|
|
|
// add is_a relationships for this cvterm
|
|
|
- if (isset($term['is_a'])) {
|
|
|
+ if (array_key_exists('is_a', $term)) {
|
|
|
foreach ($term['is_a'] as $is_a) {
|
|
|
- if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship, $default_db)) {
|
|
|
+ if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- if (isset($term['intersection_of'])) {
|
|
|
+
|
|
|
+ if (array_key_exists('intersection_of', $term)) {
|
|
|
//print "WARNING: unhandled tag: intersection_of\n";
|
|
|
}
|
|
|
- if (isset($term['union_of'])) {
|
|
|
+ if (array_key_exists('union_of', $term)) {
|
|
|
//print "WARNING: unhandled tag: union_on\n";
|
|
|
}
|
|
|
- if (isset($term['disjoint_from'])) {
|
|
|
+ if (array_key_exists('disjoint_from', $term)) {
|
|
|
//print "WARNING: unhandled tag: disjoint_from\n";
|
|
|
}
|
|
|
- if (isset($term['relationship'])) {
|
|
|
+ if (array_key_exists('relationship', $term)) {
|
|
|
foreach ($term['relationship'] as $value) {
|
|
|
$rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
|
|
|
$object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
|
|
|
- if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship, $default_db)) {
|
|
|
+ if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
|
|
|
tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (isset($term['replaced_by'])) {
|
|
|
+ if (array_key_exists('replaced_by', $term)) {
|
|
|
//print "WARNING: unhandled tag: replaced_by\n";
|
|
|
}
|
|
|
- if (isset($term['consider'])) {
|
|
|
+ if (array_key_exists('consider', $term)) {
|
|
|
//print "WARNING: unhandled tag: consider\n";
|
|
|
}
|
|
|
- if (isset($term['use_term'])) {
|
|
|
+ if (array_key_exists('use_term', $term)) {
|
|
|
//print "WARNING: unhandled tag: user_term\n";
|
|
|
}
|
|
|
- if (isset($term['builtin'])) {
|
|
|
+ if (array_key_exists('builtin', $term)) {
|
|
|
//print "WARNING: unhandled tag: builtin\n";
|
|
|
}
|
|
|
return 1;
|
|
@@ -397,7 +426,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
|
|
|
*
|
|
|
* @ingroup tripal_obo_loader
|
|
|
*/
|
|
|
-function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
|
|
|
+function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel,
|
|
|
$objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
|
|
|
|
|
|
// make sure the relationship cvterm exists
|
|
@@ -425,7 +454,7 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
|
|
|
}
|
|
|
|
|
|
// get the object term
|
|
|
- $oterm = tripal_cv_obo_get_term($obo, $objname);
|
|
|
+ $oterm = tripal_cv_obo_get_term($objname);
|
|
|
if (!$oterm) {
|
|
|
tripal_cv_obo_quiterror("Could not find object term $objname\n");
|
|
|
}
|
|
@@ -433,14 +462,16 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
|
|
|
$objterm = array();
|
|
|
$objterm['id'] = $oterm['id'][0];
|
|
|
$objterm['name'] = $oterm['name'][0];
|
|
|
- $objterm['def'] = $oterm['def'][0];
|
|
|
- if (isset($oterm['subset'])) {
|
|
|
+ if (array_key_exists('def', $oterm)) {
|
|
|
+ $objterm['def'] = $oterm['def'][0];
|
|
|
+ }
|
|
|
+ if (array_key_exists('subset', $oterm)) {
|
|
|
$objterm['subset'] = $oterm['subset'][0];
|
|
|
}
|
|
|
- if (isset($oterm['namespace'])) {
|
|
|
+ if (array_key_exists('namespace', $oterm)) {
|
|
|
$objterm['namespace'] = $oterm['namespace'][0];
|
|
|
}
|
|
|
- if (isset($oterm['is_obsolete'])) {
|
|
|
+ if (array_key_exists('is_obsolete', $oterm)) {
|
|
|
$objterm['is_obsolete'] = $oterm['is_obsolete'][0];
|
|
|
}
|
|
|
$objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);
|
|
@@ -474,26 +505,14 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
|
|
|
*
|
|
|
* @ingroup tripal_obo_loader
|
|
|
*/
|
|
|
-function tripal_cv_obo_get_term($obo, $id) {
|
|
|
- /*
|
|
|
- foreach ($obo as $type) {
|
|
|
- foreach ($type as $term) {
|
|
|
- $accession = $term['id'][0];
|
|
|
- if (strcmp($accession, $id)==0) {
|
|
|
- return $term;
|
|
|
- }
|
|
|
- }
|
|
|
- } */
|
|
|
- // iterate through each of the types in the
|
|
|
- // obo file (parsed into memory) and look
|
|
|
- // for this term. If found, return it.
|
|
|
- foreach ($obo as $type) {
|
|
|
- if (array_key_exists($id, $type)) {
|
|
|
- return $type[$id];
|
|
|
- }
|
|
|
+function tripal_cv_obo_get_term($id) {
|
|
|
+ $values = array('id' => $id);
|
|
|
+ $options = array('statement_name' => 'sel_tripalobotemp_id');
|
|
|
+ $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
|
|
|
+ if (count($result) == 0) {
|
|
|
+ return FALSE;
|
|
|
}
|
|
|
-
|
|
|
- return FALSE;
|
|
|
+ return unserialize(base64_decode($result[0]->stanza));
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -506,7 +525,7 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
|
|
|
$syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
|
|
|
|
|
|
// now add the synonyms
|
|
|
- if (isset($term['synonym'])) {
|
|
|
+ if (array_key_exists('synonym', $term)) {
|
|
|
foreach ($term['synonym'] as $synonym) {
|
|
|
|
|
|
// separate out the synonym definition and the synonym type
|
|
@@ -593,20 +612,41 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
|
|
|
*
|
|
|
* @ingroup tripal_obo_loader
|
|
|
*/
|
|
|
-function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
|
|
|
- $i = 0;
|
|
|
+function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
|
|
|
$in_header = 1;
|
|
|
$stanza = array();
|
|
|
$default_db = '_global';
|
|
|
+ $line_num = 0;
|
|
|
+ $num_read = 0;
|
|
|
+ $intv_read = 0;
|
|
|
+
|
|
|
+ $filesize = filesize($obo_file);
|
|
|
+ $interval = intval($filesize * 0.01);
|
|
|
+ if ($interval < 1) {
|
|
|
+ $interval = 1;
|
|
|
+ }
|
|
|
|
|
|
// iterate through the lines in the OBO file and parse the stanzas
|
|
|
$fh = fopen($obo_file, 'r');
|
|
|
while ($line = fgets($fh)) {
|
|
|
- $i++;
|
|
|
-
|
|
|
+
|
|
|
+ $line_num++;
|
|
|
+ $size = drupal_strlen($line);
|
|
|
+ $num_read += $size;
|
|
|
+ $intv_read += $size;
|
|
|
+ $line = trim($line);
|
|
|
+
|
|
|
+ // update the job status every 1% features
|
|
|
+ if ($jobid and $intv_read >= $interval) {
|
|
|
+ $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
|
|
|
+ print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
|
|
|
+ tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50));
|
|
|
+ $intv_read = 0;
|
|
|
+ }
|
|
|
+
|
|
|
// remove newlines
|
|
|
$line = rtrim($line);
|
|
|
-
|
|
|
+
|
|
|
// remove any special characters that may be hiding
|
|
|
$line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
|
|
|
|
|
@@ -618,19 +658,25 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
|
|
|
//remove comments from end of lines
|
|
|
$line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped
|
|
|
|
|
|
- if (preg_match('/^\s*\[/', $line)) { // at the first stanza we're out of header
|
|
|
+ // at the first stanza we're out of header
|
|
|
+ if (preg_match('/^\s*\[/', $line)) {
|
|
|
$in_header = 0;
|
|
|
+
|
|
|
// store the stanza we just finished reading
|
|
|
if (sizeof($stanza) > 0) {
|
|
|
- if (!isset($obo[$type])) {
|
|
|
- $obo[$type] = array();
|
|
|
- }
|
|
|
- if (!isset($obo[$type][$stanza['id'][0]])) {
|
|
|
- $obo[$type][$stanza['id'][0]] = $stanza;
|
|
|
- }
|
|
|
- else {
|
|
|
- array_merge($obo[$type][$stanza['id'][0]], $stanza);
|
|
|
+ // add the term to the temp table
|
|
|
+ $values = array(
|
|
|
+ 'id' => $stanza['id'][0],
|
|
|
+ 'stanza' => base64_encode(serialize($stanza)),
|
|
|
+ 'type' => $type,
|
|
|
+ );
|
|
|
+ $options = array('statement_name' => 'ins_tripalobotemp_all');
|
|
|
+ $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
|
|
|
+ if (!$success) {
|
|
|
+ watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
|
|
|
+ exit;
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
// get the stanza type: Term, Typedef or Instance
|
|
|
$type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
|
|
@@ -654,13 +700,13 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
|
|
|
$tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
|
|
|
$value = preg_replace("/\|-\|-\|/", "\:", $value);
|
|
|
if ($in_header) {
|
|
|
- if (!isset($header[$tag])) {
|
|
|
+ if (!array_key_exists($tag, $header)) {
|
|
|
$header[$tag] = array();
|
|
|
}
|
|
|
$header[$tag][] = $value;
|
|
|
}
|
|
|
else {
|
|
|
- if (!isset($stanza[$tag])) {
|
|
|
+ if (!array_key_exists($tag, $stanza)) {
|
|
|
$stanza[$tag] = array();
|
|
|
}
|
|
|
$stanza[$tag][] = $value;
|
|
@@ -668,14 +714,16 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
|
|
|
}
|
|
|
// now add the last term in the file
|
|
|
if (sizeof($stanza) > 0) {
|
|
|
- if (!isset($obo[$type])) {
|
|
|
- $obo[$type] = array();
|
|
|
- }
|
|
|
- if (!isset($obo[$type][$stanza['id'][0]])) {
|
|
|
- $obo[$type][$stanza['id'][0]] = $stanza;
|
|
|
- }
|
|
|
- else {
|
|
|
- array_merge($obo[$type][$stanza['id'][0]], $stanza);
|
|
|
+ $values = array(
|
|
|
+ 'id' => $stanza['id'][0],
|
|
|
+ 'stanza' => base64_encode(serialize($stanza)),
|
|
|
+ 'type' => $type,
|
|
|
+ );
|
|
|
+ $options = array('statement_name' => 'ins_tripalobotemp_all');
|
|
|
+ tripal_core_chado_insert('tripal_obo_temp', $values, $options);
|
|
|
+ if (!$success) {
|
|
|
+ watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
|
|
|
+ exit;
|
|
|
}
|
|
|
}
|
|
|
return $default_db;
|