|
@@ -187,6 +187,16 @@ class GFF3Importer extends TripalImporter {
|
|
*/
|
|
*/
|
|
private $create_organism = FALSE;
|
|
private $create_organism = FALSE;
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * Holds mapping of DB names to DB ids.
|
|
|
|
+ */
|
|
|
|
+ private $db_lookup = [];
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Holds a mapping of Dbxref names to ids.
|
|
|
|
+ */
|
|
|
|
+ private $dbxref_lookup = [];
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* An array that stores CVterms that have been looked up so we don't have
|
|
* An array that stores CVterms that have been looked up so we don't have
|
|
* to do the database query every time.
|
|
* to do the database query every time.
|
|
@@ -624,15 +634,23 @@ class GFF3Importer extends TripalImporter {
|
|
$this->logMessage("Step 4: Loading feature locations... ");
|
|
$this->logMessage("Step 4: Loading feature locations... ");
|
|
$this->loadFeatureLocs();
|
|
$this->loadFeatureLocs();
|
|
|
|
|
|
- $this->logMessage("Step 5: Loading features properties... ");
|
|
|
|
|
|
+ $this->logMessage("Step 5: Loading features Derives_from (gene/CDS relationships)...");
|
|
|
|
+ $this->loadDerivesFroms();
|
|
|
|
+
|
|
|
|
+ $this->logMessage("Step 6: Loading features properties... ");
|
|
$this->loadProperties();
|
|
$this->loadProperties();
|
|
|
|
|
|
- $this->logMessage("Step 6: Loading features synonyms (aliases)... ");
|
|
|
|
|
|
+ $this->logMessage("Step 7: Loading features synonyms (aliases)... ");
|
|
$this->loadAliases();
|
|
$this->loadAliases();
|
|
|
|
|
|
- $this->logMessage("Step 7: Loading features cross references... ");
|
|
|
|
|
|
+ $this->logMessage("Step 8: Loading features cross references... ");
|
|
$this->loadDbxrefs();
|
|
$this->loadDbxrefs();
|
|
|
|
|
|
|
|
+ if (!empty($this->residue_index)) {
|
|
|
|
+ $this->logMessage("Step 9: Loading residues from FASTA... ");
|
|
|
|
+ $this->loadFastas();
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -755,6 +773,59 @@ class GFF3Importer extends TripalImporter {
|
|
$this->null_pub = $pub;
|
|
$this->null_pub = $pub;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * Makes sure Chado is ready with the necessary Dbxref records.
|
|
|
|
+ */
|
|
|
|
+ private function prepDbxrefs() {
|
|
|
|
+ $sql = "
|
|
|
|
+ SELECT db_id
|
|
|
|
+ FROM {db}
|
|
|
|
+ WHERE name = :dbname";
|
|
|
|
+
|
|
|
|
+ foreach ($this->db_lookup as $dbname => $value) {
|
|
|
|
+ $result = chado_query($sql, array(
|
|
|
|
+ ':dbname' => $dbname,
|
|
|
|
+ ));
|
|
|
|
+
|
|
|
|
+ $db = $result->fetchObject() ?? NULL;
|
|
|
|
+ if ($db) {
|
|
|
|
+ $db = chado_get_db(array('db_id' => $db->db_id));
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ $db = chado_insert_db(array(
|
|
|
|
+ 'name' => $dbname,
|
|
|
|
+ ));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->db_lookup[$dbname] = $db->db_id;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $sql = "
|
|
|
|
+ SELECT dbxref_id
|
|
|
|
+ FROM {dbxref}
|
|
|
|
+ WHERE db_id = :db_id and accession = :accession";
|
|
|
|
+
|
|
|
|
+ foreach ($this->dbxref_lookup as $index => $info) {
|
|
|
|
+ $result = chado_query($sql, array(
|
|
|
|
+ ':db_id' => $this->db_lookup[$info['db']],
|
|
|
|
+ ':accession' => $info['accession'],
|
|
|
|
+ ));
|
|
|
|
+
|
|
|
|
+ $dbx = $result->fetchObject() ?? NULL;
|
|
|
|
+ if ($dbx) {
|
|
|
|
+ $dbx = chado_get_dbxref(array('dbxref_id' => $dbx->dbxref_id));
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ $dbx = chado_insert_dbxref(array(
|
|
|
|
+ 'db_id' => $this->db_lookup[$info['db']],
|
|
|
|
+ 'accession' => $info['accession'],
|
|
|
|
+ ));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->dbxref_lookup[$index] = $dbx->dbxref_id;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* Parses the current line of the GFF3 file for a feature.
|
|
* Parses the current line of the GFF3 file for a feature.
|
|
*
|
|
*
|
|
@@ -824,6 +895,8 @@ class GFF3Importer extends TripalImporter {
|
|
$attr_parent = '';
|
|
$attr_parent = '';
|
|
$attr_others = [];
|
|
$attr_others = [];
|
|
$attr_aliases = [];
|
|
$attr_aliases = [];
|
|
|
|
+ $attr_dbxref = [];
|
|
|
|
+ $attr_derives = [];
|
|
foreach ($attrs as $attr) {
|
|
foreach ($attrs as $attr) {
|
|
$attr = rtrim($attr);
|
|
$attr = rtrim($attr);
|
|
$attr = ltrim($attr);
|
|
$attr = ltrim($attr);
|
|
@@ -859,6 +932,12 @@ class GFF3Importer extends TripalImporter {
|
|
elseif (strcmp($tag_name, 'Parent') == 0) {
|
|
elseif (strcmp($tag_name, 'Parent') == 0) {
|
|
$attr_parent = urldecode($tag[1]);
|
|
$attr_parent = urldecode($tag[1]);
|
|
}
|
|
}
|
|
|
|
+ elseif (strcmp($tag_name, 'Dbxref') == 0) {
|
|
|
|
+ $attr_dbxref = array_merge($attr_dbxref, $tags[$tag_name]);
|
|
|
|
+ }
|
|
|
|
+ elseif (strcmp($tag_name, 'Derives_from') == 0) {
|
|
|
|
+ $attr_derives = array_merge($attr_derives, $tags[$tag_name]);
|
|
|
|
+ }
|
|
// Get the list of non-reserved attributes.
|
|
// Get the list of non-reserved attributes.
|
|
elseif (strcmp($tag_name, 'Name') !=0 and strcmp($tag_name, 'ID') !=0 and
|
|
elseif (strcmp($tag_name, 'Name') !=0 and strcmp($tag_name, 'ID') !=0 and
|
|
strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
|
|
strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
|
|
@@ -883,6 +962,31 @@ class GFF3Importer extends TripalImporter {
|
|
$ret['name'] = $attr_name;
|
|
$ret['name'] = $attr_name;
|
|
$ret['uniquename'] = $attr_uniquename;
|
|
$ret['uniquename'] = $attr_uniquename;
|
|
$ret['synonyms'] = $attr_aliases;
|
|
$ret['synonyms'] = $attr_aliases;
|
|
|
|
+ $ret['dbxrefs'] = [];
|
|
|
|
+ foreach ($attr_dbxref as $key => $dbx) {
|
|
|
|
+ $parts = explode(':', $dbx);
|
|
|
|
+ if (count($parts) != 2) {
|
|
|
|
+ throw new Exception(t('Dbxrefs must be of the format: "Dbxref=<db name>:<accession>". The Dbxref %dbx on line %line_num does not satisfy this format.',
|
|
|
|
+ ['%line_num' => $this->current_line, '%dbx' => $dbx]));
|
|
|
|
+ }
|
|
|
|
+ $ret['dbxrefs']["{$parts[0]}:{$parts[1]}"] = array(
|
|
|
|
+ 'db' => $parts[0],
|
|
|
|
+ 'accession' => $parts[1],
|
|
|
|
+ );
|
|
|
|
+ }
|
|
|
|
+ $ret['dbxrefs']["GFF_source:{$ret['source']}"] = array(
|
|
|
|
+ 'db' => 'GFF_source',
|
|
|
|
+ 'accession' => $ret['source'],
|
|
|
|
+ );
|
|
|
|
+
|
|
|
|
+ $ret['derives_from'] = $attr_derives;
|
|
|
|
+ if (count($ret['derives_from']) > 1) {
|
|
|
|
+ throw new Exception(t('Each feature can only have one "Derives_from" attribute. The feature %uniquename has more than one: %derives',
|
|
|
|
+ [
|
|
|
|
+ '%uniquename' => $ret['uniquename'],
|
|
|
|
+ '%derives' => $ret['derives_from'],
|
|
|
|
+ ]));
|
|
|
|
+ }
|
|
|
|
|
|
// Now add all of the attributes into the return array.
|
|
// Now add all of the attributes into the return array.
|
|
foreach ($tags as $key => $value) {
|
|
foreach ($tags as $key => $value) {
|
|
@@ -912,8 +1016,72 @@ class GFF3Importer extends TripalImporter {
|
|
// Get the ID and the current file pointer and store that for later.
|
|
// Get the ID and the current file pointer and store that for later.
|
|
if (preg_match('/^>/', $line)) {
|
|
if (preg_match('/^>/', $line)) {
|
|
$id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
|
|
$id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
|
|
- $this->residue_index[$id] = ftell($this->gff_file_h);
|
|
|
|
|
|
+ $this->residue_index[trim($id)] = ftell($this->gff_file_h);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Loads the actual residue information from the FASTA section of the file.
|
|
|
|
+ */
|
|
|
|
+ private function loadFastas() {
|
|
|
|
+
|
|
|
|
+ $num_residues = count(array_keys($this->residue_index));
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled(0);
|
|
|
|
+ $this->setTotalItems($num_residues);
|
|
|
|
+
|
|
|
|
+ $count = 0;
|
|
|
|
+
|
|
|
|
+ foreach ($this->residue_index as $uniquename => $offset) {
|
|
|
|
+ $is_landmark = FALSE;
|
|
|
|
+ if (!(array_key_exists($uniquename, $this->features) and $this->features[$uniquename]) and !(array_key_exists($uniquename, $this->landmarks) and $this->landmarks[$uniquename])) {
|
|
|
|
+ $this->logMessage('Cannot find feature to assign FASTA sequence: %uname.',
|
|
|
|
+ ['%uname' => $uniquename], TRIPAL_WARNING);
|
|
|
|
+ $count++;
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (array_key_exists($uniquename, $this->features)) {
|
|
|
|
+ $feature = $this->features[$uniquename];
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ $feature = $this->landmarks[$uniquename];
|
|
|
|
+ $is_landmark = TRUE;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->ensureFeatureIsLoaded($feature);
|
|
|
|
+ $id = $feature['feature_id'];
|
|
|
|
+
|
|
|
|
+ $residues = [];
|
|
|
|
+ fseek($this->gff_file_h, $offset);
|
|
|
|
+ while ($line = fgets($this->gff_file_h)) {
|
|
|
|
+ if (preg_match('/^>/', $line)) {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ $residues[] = trim($line);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $residues = implode('', $residues);
|
|
|
|
+ $feature['residues'] = $residues;
|
|
|
|
+
|
|
|
|
+ if (!$is_landmark) {
|
|
|
|
+ $this->features[$uniquename] = $feature;
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ $this->landmarks[$uniquename] = $feature;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ chado_update_record('feature', array(
|
|
|
|
+ 'feature_id' => $id,
|
|
|
|
+ ), array(
|
|
|
|
+ 'residues' => $residues,
|
|
|
|
+ 'seqlen' => strlen($residues),
|
|
|
|
+ 'md5checksum' => md5($residues),
|
|
|
|
+ ));
|
|
|
|
+
|
|
|
|
+ $count++;
|
|
|
|
+ $this->setItemsHandled($count);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1028,6 +1196,17 @@ class GFF3Importer extends TripalImporter {
|
|
$this->relationships['Child'][$gff_feature['uniquename']] = $gff_feature['Parent'];
|
|
$this->relationships['Child'][$gff_feature['uniquename']] = $gff_feature['Parent'];
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ // Organize DBs for faster acces later on.
|
|
|
|
+ foreach ($gff_feature['dbxrefs'] as $index => $info) {
|
|
|
|
+ if (!array_key_exists($info['db'], $this->db_lookup)) {
|
|
|
|
+ $this->db_lookup[$info['db']] = FALSE;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!array_key_exists($index, $this->dbxref_lookup)) {
|
|
|
|
+ $this->dbxref_lookup[$index] = $info;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
// Organize the CVterms for faster access later on.
|
|
// Organize the CVterms for faster access later on.
|
|
if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
|
|
if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
|
|
$feature_cvterms[$gff_feature['type']] = 0;
|
|
$feature_cvterms[$gff_feature['type']] = 0;
|
|
@@ -1044,6 +1223,8 @@ class GFF3Importer extends TripalImporter {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ $this->prepDbxrefs();
|
|
|
|
+
|
|
// Iterate through the feature type terms and get a chado object for each.
|
|
// Iterate through the feature type terms and get a chado object for each.
|
|
$feature_cvterm_ids = [];
|
|
$feature_cvterm_ids = [];
|
|
foreach ($feature_cvterms as $name => $counts) {
|
|
foreach ($feature_cvterms as $name => $counts) {
|
|
@@ -1179,7 +1360,9 @@ class GFF3Importer extends TripalImporter {
|
|
$batch_num = 1;
|
|
$batch_num = 1;
|
|
$sql = '';
|
|
$sql = '';
|
|
$args = [];
|
|
$args = [];
|
|
|
|
+ $batch_features = [];
|
|
foreach ($features as $uniquename => $feature) {
|
|
foreach ($features as $uniquename => $feature) {
|
|
|
|
+ $batch_features[$uniquename] = $feature;
|
|
|
|
|
|
// Only do an insert if this feature doesn't already exist in the databse.
|
|
// Only do an insert if this feature doesn't already exist in the databse.
|
|
if (!$this->doesFeatureAlreadyExist($feature)) {
|
|
if (!$this->doesFeatureAlreadyExist($feature)) {
|
|
@@ -1203,7 +1386,7 @@ class GFF3Importer extends TripalImporter {
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = $init_sql . $sql;
|
|
$sql = $init_sql . $sql;
|
|
$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
- $this->assignFeatureIDs($start_id, $last_id);
|
|
|
|
|
|
+ $this->assignFeatureIDs($batch_features);
|
|
$this->setItemsHandled($batch_num);
|
|
$this->setItemsHandled($batch_num);
|
|
$batch_num++;
|
|
$batch_num++;
|
|
|
|
|
|
@@ -1212,6 +1395,7 @@ class GFF3Importer extends TripalImporter {
|
|
$i = 0;
|
|
$i = 0;
|
|
$args = [];
|
|
$args = [];
|
|
$start_id = $last_id;
|
|
$start_id = $last_id;
|
|
|
|
+ $batch_features = [];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1221,9 +1405,12 @@ class GFF3Importer extends TripalImporter {
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = $init_sql . $sql;
|
|
$sql = $init_sql . $sql;
|
|
$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
- $this->assignFeatureIDs($start_id, $last_id);
|
|
|
|
$this->setItemsHandled($batch_num);
|
|
$this->setItemsHandled($batch_num);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ if (!empty($batch_features)) {
|
|
|
|
+ $this->assignFeatureIDs($batch_features);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -1232,20 +1419,25 @@ class GFF3Importer extends TripalImporter {
|
|
* The start and last IDs should corresopnd to Id's surrounding
|
|
* The start and last IDs should corresopnd to Id's surrounding
|
|
* a batch insert of features.
|
|
* a batch insert of features.
|
|
*/
|
|
*/
|
|
- private function assignFeatureIDs($start_id, $last_id) {
|
|
|
|
|
|
+ private function assignFeatureIDs($batch_features) {
|
|
// Get the feature Ids for the batch sequences
|
|
// Get the feature Ids for the batch sequences
|
|
$sql = "
|
|
$sql = "
|
|
- SELECT feature_id, uniquename
|
|
|
|
|
|
+ SELECT feature_id
|
|
FROM {feature} F
|
|
FROM {feature} F
|
|
- WHERE feature_id > $start_id and feature_id <= $last_id
|
|
|
|
|
|
+ WHERE uniquename = :uniquename and organism_id = :organism_id and type_id = :type_id
|
|
";
|
|
";
|
|
- $results = chado_query($sql);
|
|
|
|
- while ($result = $results->fetchObject()) {
|
|
|
|
- if (array_key_exists($result->uniquename, $this->features)) {
|
|
|
|
- $this->features[$result->uniquename]['feature_id'] = $result->feature_id;
|
|
|
|
|
|
+
|
|
|
|
+ foreach ($batch_features as $uniquename => $feature) {
|
|
|
|
+ $result = chado_query($sql, array(
|
|
|
|
+ ':uniquename' => $feature['uniquename'],
|
|
|
|
+ ':organism_id' => $feature['organism_id'],
|
|
|
|
+ ':type_id' => $this->feature_cvterm_lookup[$feature['type']],
|
|
|
|
+ ))->fetchObject();
|
|
|
|
+ if (array_key_exists($uniquename, $this->features)) {
|
|
|
|
+ $this->features[$uniquename]['feature_id'] = $result->feature_id;
|
|
}
|
|
}
|
|
- if (array_key_exists($result->uniquename, $this->landmarks)) {
|
|
|
|
- $this->landmarks[$result->uniquename]['feature_id'] = $result->feature_id;
|
|
|
|
|
|
+ if (array_key_exists($uniquename, $this->landmarks)) {
|
|
|
|
+ $this->landmarks[$uniquename]['feature_id'] = $result->feature_id;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1321,7 +1513,172 @@ class GFF3Importer extends TripalImporter {
|
|
*
|
|
*
|
|
*/
|
|
*/
|
|
private function loadDbxrefs() {
|
|
private function loadDbxrefs() {
|
|
|
|
+ $batch_size = 1000;
|
|
|
|
+ $num_features = count(array_keys($this->features));
|
|
|
|
+ $num_batches = (int) ($num_features / $batch_size) + 1;
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled(0);
|
|
|
|
+ $this->setTotalItems($num_batches);
|
|
|
|
+
|
|
|
|
+ // Don't need to use placeholders for this insert since we are only using
|
|
|
|
+ // integers.
|
|
|
|
+ $count = 0;
|
|
|
|
+ $batch_num = 0;
|
|
|
|
+ $batch_pairs = [];
|
|
|
|
+ $init_fdbx_sql = "INSERT INTO {feature_dbxref} (feature_id, dbxref_id) VALUES \n";
|
|
|
|
+ $check_fdbx_sql = "SELECT feature_dbxref_id FROM {feature_dbxref} WHERE feature_id = :feature_id and dbxref_id = :dbxref_id";
|
|
|
|
+ foreach ($this->features as $uniquename => $feature) {
|
|
|
|
+ $count++;
|
|
|
|
+
|
|
|
|
+ $this->ensureFeatureIsLoaded($feature);
|
|
|
|
+
|
|
|
|
+ foreach ($feature['dbxrefs'] as $index => $info) {
|
|
|
|
+ $feature_id = $feature['feature_id'];
|
|
|
|
+ $dbx_id = $this->dbxref_lookup[$index];
|
|
|
|
+
|
|
|
|
+ // Check that this feature_dbxref is not already in the database.
|
|
|
|
+ $result = chado_query($check_fdbx_sql, array(
|
|
|
|
+ ':feature_id' => $feature_id,
|
|
|
|
+ ':dbxref_id' => $dbx_id,
|
|
|
|
+ ))->fetchObject() ?? NULL;
|
|
|
|
+
|
|
|
|
+ if (!$result) {
|
|
|
|
+ $batch_pairs[] = "($feature_id, $dbx_id)";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if ($count == $batch_size) {
|
|
|
|
+ $batch_num++;
|
|
|
|
+ if (count($batch_pairs) > 0) {
|
|
|
|
+ // Perform the actual insertion.
|
|
|
|
+ $fdbx_sql = $init_fdbx_sql . implode(', ', $batch_pairs);
|
|
|
|
+ $last_id = chado_query($fdbx_sql, array(), array('return' => Database::RETURN_INSERT_ID));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled($batch_num);
|
|
|
|
+
|
|
|
|
+ $count = 0;
|
|
|
|
+ $batch_pairs = [];
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+ if ($count > 0) {
|
|
|
|
+ $batch_num++;
|
|
|
|
+ if (count($batch_pairs) > 0) {
|
|
|
|
+ // Perform the actual insertion.
|
|
|
|
+ $fdbx_sql = $init_fdbx_sql . implode(', ', $batch_pairs);
|
|
|
|
+ $last_id = chado_query($fdbx_sql, array(), array('return' => Database::RETURN_INSERT_ID));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled($batch_num);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ *
|
|
|
|
+ */
|
|
|
|
+ private function loadDerivesFroms() {
|
|
|
|
+ $batch_size = 1000;
|
|
|
|
+ $num_features = count(array_keys($this->features));
|
|
|
|
+ $num_batches = (int) ($num_features / $batch_size) + 1;
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled(0);
|
|
|
|
+ $this->setTotalItems($num_batches);
|
|
|
|
+
|
|
|
|
+ $init_sql = "
|
|
|
|
+ INSERT INTO {feature_relationship}
|
|
|
|
+ (subject_id, object_id, type_id, rank)
|
|
|
|
+ VALUES\n";
|
|
|
|
+
|
|
|
|
+ $count = 0;
|
|
|
|
+ $batch_num = 1;
|
|
|
|
+ $rows = [];
|
|
|
|
+ $args = [];
|
|
|
|
+ $derives_id = $this->getCvtermID('derives_from');
|
|
|
|
+
|
|
|
|
+ foreach ($this->features as $uniquename => $subject) {
|
|
|
|
+ if (!$this->doesFeatureAlreadyExist($subject)) {
|
|
|
|
+ $count++;
|
|
|
|
+
|
|
|
|
+ $this->ensureFeatureIsLoaded($subject);
|
|
|
|
+ if (!empty($subject['derives_from'])) {
|
|
|
|
+
|
|
|
|
+ $object = $subject['derives_from'][0];
|
|
|
|
+ if (array_key_exists($object, $this->features)) {
|
|
|
|
+ $this->ensureFeatureIsLoaded($this->features[$object]);
|
|
|
|
+ $object_id = $this->features[$object]['feature_id'];
|
|
|
|
+ }
|
|
|
|
+ elseif (array_key_exists($object, $this->landmarks)) {
|
|
|
|
+ $this->ensureFeatureIsLoaded($this->landmarks[$object]);
|
|
|
|
+ $object_id = $this->landmarks[$object]['feature_id'];
|
|
|
|
+ }
|
|
|
|
+ else {
|
|
|
|
+ // Derives_from cannot be found in either features or landmarks, so
|
|
|
|
+ // we need to get the feature id from the database if it exists.
|
|
|
|
+ $result = chado_select_record('feature', ['type_id'], [
|
|
|
|
+ 'organism_id' => $subject['organism_id'],
|
|
|
|
+ 'uniquename' => $object,
|
|
|
|
+ ]);
|
|
|
|
+
|
|
|
|
+ if (count($result) > 1) {
|
|
|
|
+ $this->logMessage('Cannot find feature type for "%object" in derives_from relationship. Multiple matching features exist with this uniquename.',
|
|
|
|
+ ['%object' => $object], TRIPAL_WARNING);
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ elseif (count($result) == 0) {
|
|
|
|
+ $this->logMessage('Cannot find feature type for "%object" in derives_from relationship.',
|
|
|
|
+ ['%object' => $object], TRIPAL_WARNING);
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $type_id = $result->type_id;
|
|
|
|
+ $ofeature = chado_select_record('feature', ['feature_id'], [
|
|
|
|
+ 'organism_id' => $subject['organism_id'],
|
|
|
|
+ 'uniquename' => $object,
|
|
|
|
+ 'type_id' => $type_id,
|
|
|
|
+ ]);
|
|
|
|
+ if (count($ofeature) == 0) {
|
|
|
|
+ $this->logMessage("Could not add 'Derives_from' relationship for %uniquename and %object. Object feature, '%object', cannot be found.",
|
|
|
|
+ [
|
|
|
|
+ '%uniquename' => $uniquename,
|
|
|
|
+ '%object' => $object,
|
|
|
|
+ ], TRIPAL_ERROR);
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $object_id = $ofeature[0]->feature_id;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $rows[] = "(:subject_id_$count, :object_id_$count, $derives_id, 0)";
|
|
|
|
+ $args[":subject_id_$count"] = $subject['feature_id'];
|
|
|
|
+ $args[":object_id_$count"] = $object_id;
|
|
|
|
+
|
|
|
|
+ if ($count == $batch_size) {
|
|
|
|
+ $batch_num++;
|
|
|
|
+
|
|
|
|
+ if (count($rows) > 0) {
|
|
|
|
+ $sql = $init_sql . implode(",\n", $rows);
|
|
|
|
+ chado_query($sql, $args, array('return' => Database::RETURN_INSERT_ID));
|
|
|
|
+ }
|
|
|
|
+ $this->setItemsHandled($batch_num);
|
|
|
|
+
|
|
|
|
+ $rows = [];
|
|
|
|
+ $args = [];
|
|
|
|
+ $count = 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if ($count > 0) {
|
|
|
|
+ $batch_num++;
|
|
|
|
+ if (count($rows) > 0) {
|
|
|
|
+ $sql = $init_sql . implode(",\n", $rows);
|
|
|
|
+ chado_query($sql, $args, array('return' => Database::RETURN_INSERT_ID));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ $this->setItemsHandled($batch_num);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -1367,7 +1724,7 @@ class GFF3Importer extends TripalImporter {
|
|
$args[":srcfeature_id_$i"] = $this->landmarks[$feature['landmark']]['feature_id'];
|
|
$args[":srcfeature_id_$i"] = $this->landmarks[$feature['landmark']]['feature_id'];
|
|
$args[":feature_id_$i"] = $feature['feature_id'];
|
|
$args[":feature_id_$i"] = $feature['feature_id'];
|
|
$args[":fmin_$i"] = $feature['start'];
|
|
$args[":fmin_$i"] = $feature['start'];
|
|
- $args[":fmax_$i"] = $feature['end'];
|
|
|
|
|
|
+ $args[":fmax_$i"] = $feature['stop'];
|
|
$args[":strand_$i"] = $feature['strand'];
|
|
$args[":strand_$i"] = $feature['strand'];
|
|
$args[":phase_$i"] = $feature['phase'] ? $feature['phase'] : NULL;
|
|
$args[":phase_$i"] = $feature['phase'] ? $feature['phase'] : NULL;
|
|
$args[":rank_$i"] = $rank;
|
|
$args[":rank_$i"] = $rank;
|
|
@@ -1379,7 +1736,7 @@ class GFF3Importer extends TripalImporter {
|
|
// Insert the batch.
|
|
// Insert the batch.
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = rtrim($sql, ",\n");
|
|
$sql = $init_sql . $sql;
|
|
$sql = $init_sql . $sql;
|
|
- //$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
|
|
|
|
+ $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
|
|
$this->setItemsHandled($batch_num);
|
|
$this->setItemsHandled($batch_num);
|
|
$batch_num++;
|
|
$batch_num++;
|
|
|
|
|
|
@@ -2112,7 +2469,7 @@ class GFF3Importer extends TripalImporter {
|
|
*/
|
|
*/
|
|
private function getCvtermID($type, $cv_id = NULL, $is_prop_type = FALSE) {
|
|
private function getCvtermID($type, $cv_id = NULL, $is_prop_type = FALSE) {
|
|
if (!isset($cv_id)) {
|
|
if (!isset($cv_id)) {
|
|
- $cv_id = $this->sequence_cv_id;
|
|
|
|
|
|
+ $cv_id = $this->feature_cv->getValue('cv_id');
|
|
}
|
|
}
|
|
if ($is_prop_type and array_key_exists($type, $this->featureprop_cvterm_lookup)) {
|
|
if ($is_prop_type and array_key_exists($type, $this->featureprop_cvterm_lookup)) {
|
|
return $this->featureprop_cvterm_lookup[$type];
|
|
return $this->featureprop_cvterm_lookup[$type];
|