|
@@ -137,6 +137,16 @@ function tripal_feature_gff3_load_form() {
|
|
|
'#description' => t('Features present in the GFF file that exist in the database
|
|
|
will be removed rather than imported'),
|
|
|
);
|
|
|
+ $form['import_options']['create_organism']= array(
|
|
|
+ '#type' => 'checkbox',
|
|
|
+ '#title' => t('Create organism'),
|
|
|
+ '#required' => FALSE,
|
|
|
+ '#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
|
|
|
+ different organism to be aligned to the landmark sequence of another species. The format of the
|
|
|
+ attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
|
|
|
+ species name. Check this box to automatically add the organism to the database if it does not already exists.
|
|
|
+ Otherwise lines with an oraganism attribute where the organism is not present in the database will be skipped.'),
|
|
|
+ );
|
|
|
|
|
|
$form['targets'] = array(
|
|
|
'#type' => 'fieldset',
|
|
@@ -204,6 +214,7 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
|
|
|
$target_organism_id = $form_state['values']['target_organism_id'];
|
|
|
$target_type = trim($form_state['values']['target_type']);
|
|
|
$create_target = $form_state['values']['create_target'];
|
|
|
+ $create_organism = $form_state['values']['create_organism'];
|
|
|
$add_only = $form_state['values']['add_only'];
|
|
|
$update = $form_state['values']['update'];
|
|
|
$refresh = $form_state['values']['refresh'];
|
|
@@ -261,10 +272,13 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
|
|
|
$line_number = trim($form_state['values']['line_number']);
|
|
|
$landmark_type = trim($form_state['values']['landmark_type']);
|
|
|
$alt_id_attr = trim($form_state['values']['alt_id_attr']);
|
|
|
+ $create_organism = $form_state['values']['create_organism'];
|
|
|
+
|
|
|
|
|
|
$args = array($gff_file, $organism_id, $analysis_id, $add_only,
|
|
|
$update, $refresh, $remove, $use_transaction, $target_organism_id,
|
|
|
- $target_type, $create_target, $line_number, $landmark_type, $alt_id_attr);
|
|
|
+ $target_type, $create_target, $line_number, $landmark_type, $alt_id_attr,
|
|
|
+ $create_organism);
|
|
|
|
|
|
$type = '';
|
|
|
if ($add_only) {
|
|
@@ -292,9 +306,10 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
|
|
|
* @ingroup gff3_loader
|
|
|
*/
|
|
|
function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
- $add_only =0, $update = 0, $refresh = 0, $remove = 0, $use_transaction = 1,
|
|
|
+ $add_only = 0, $update = 0, $refresh = 0, $remove = 0, $use_transaction = 1,
|
|
|
$target_organism_id = NULL, $target_type = NULL, $create_target = 0,
|
|
|
- $start_line = 1, $landmark_type = '', $alt_id_attr = '', $job = NULL) {
|
|
|
+ $start_line = 1, $landmark_type = '', $alt_id_attr = '', $create_organism = FALSE,
|
|
|
+ $job = NULL) {
|
|
|
|
|
|
// make sure our temporary table exists
|
|
|
$ret = array();
|
|
@@ -517,8 +532,14 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
$attr_fmax_partial = 'f';
|
|
|
$attr_is_obsolete = 'f';
|
|
|
$attr_is_analysis = 'f';
|
|
|
- $attr_others = '';
|
|
|
+ $attr_others = '';
|
|
|
$residues = '';
|
|
|
+
|
|
|
+ // the organism to which a feature belongs can be set in the GFF
|
|
|
+ // file using the 'organism' attribute. By default we
|
|
|
+ // set the $feature_organism variable to the default organism for the landmark
|
|
|
+ $attr_organism = '';
|
|
|
+ $feature_organism = $organism;
|
|
|
|
|
|
foreach ($attrs as $attr) {
|
|
|
$attr = rtrim($attr);
|
|
@@ -549,24 +570,67 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
}
|
|
|
|
|
|
// get the name and ID tags
|
|
|
+ $skip_feature = 0; // if there is a problem with any of the attributes this variable gets set
|
|
|
if (strcmp($tag_name, 'ID') == 0) {
|
|
|
$attr_uniquename = urldecode($tag[1]);
|
|
|
}
|
|
|
elseif (strcmp($tag_name, 'Name') == 0) {
|
|
|
$attr_name = urldecode($tag[1]);
|
|
|
}
|
|
|
+ elseif (strcmp($tag_name, 'organism') == 0) {
|
|
|
+ $attr_organism = urldecode($tag[1]);
|
|
|
+ $org_matches = array();
|
|
|
+ if (preg_match('/^(.*?):(.*?)$/', $attr_organism, $org_matches)) {
|
|
|
+ $values = array(
|
|
|
+ 'genus' => $org_matches[1],
|
|
|
+ 'species' => $org_matches[2],
|
|
|
+ );
|
|
|
+ $options = array('statement_name' => 'sel_organism_gesp');
|
|
|
+ $org = tripal_core_chado_select('organism', array("*"), $values, $options);
|
|
|
+ if (count($org) == 0) {
|
|
|
+ if ($create_organism) {
|
|
|
+ $options = array('statement_name' => 'ins_organism_gesp');
|
|
|
+ $feature_organism = (object) tripal_core_chado_insert('organism', $values, $options);
|
|
|
+ if (!$feature_organism) {
|
|
|
+ watchdog('T_gff3_loader', "Could not add the organism, '%org', from line %line. Skipping this line. ",
|
|
|
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
|
|
|
+ $skip_feature = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ watchdog('T_gff3_loader', "The organism attribute '%org' on line %line does not exist. Skipping this line. ",
|
|
|
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
|
|
|
+ $skip_feature = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // we found the organism in the database so use it
|
|
|
+ $feature_organism = $org[0];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ watchdog('T_gff3_loader', "The organism attribute '%org' on line %line is not properly formated. It ".
|
|
|
+ "should be of the form: organism=Genus:species. Skipping this line.",
|
|
|
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
|
|
|
+ $skip_feature = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
// get the list of non-reserved attributes
|
|
|
- elseif (strcmp($tag_name, 'Alias') !=0 and strcmp($tag_name, 'Parent') !=0 and
|
|
|
- strcmp($tag_name, 'Target') !=0 and strcmp($tag_name, 'Gap') !=0 and
|
|
|
- strcmp($tag_name, 'Derives_from') !=0 and strcmp($tag_name, 'Note') !=0 and
|
|
|
- strcmp($tag_name, 'Dbxref') !=0 and strcmp($tag_name, 'Ontology_term') !=0 and
|
|
|
- strcmp($tag_name, 'Is_circular') !=0 and strcmp($tag_name, 'target_organism') !=0 and
|
|
|
- strcmp($tag_name, 'target_type') != 0) {
|
|
|
+ elseif (strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
|
|
|
+ strcmp($tag_name, 'Target') != 0 and strcmp($tag_name, 'Gap') != 0 and
|
|
|
+ strcmp($tag_name, 'Derives_from') != 0 and strcmp($tag_name, 'Note') != 0 and
|
|
|
+ strcmp($tag_name, 'Dbxref') != 0 and strcmp($tag_name, 'Ontology_term') != 0 and
|
|
|
+ strcmp($tag_name, 'Is_circular') != 0 and strcmp($tag_name, 'target_organism') != 0 and
|
|
|
+ strcmp($tag_name, 'target_type') != 0 and strcmp($tag_name, 'organism' != 0)) {
|
|
|
foreach ($tags[$tag_name] as $value) {
|
|
|
$attr_others[$tag_name][] = $value;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ if ($skip_line) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
// if neither name nor uniquename are provided then generate one
|
|
|
if (!$attr_uniquename and !$attr_name) {
|
|
@@ -611,7 +675,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
// different.
|
|
|
if (!$remove and !(strcmp($landmark, $attr_uniquename) == 0 or strcmp($landmark, $attr_name) == 0)) {
|
|
|
$select = array(
|
|
|
- 'organism_id' => $organism_id,
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
'uniquename' => $landmark,
|
|
|
);
|
|
|
$columns = array('count(*) as num_landmarks');
|
|
@@ -626,7 +690,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
if (!$count or count($count) == 0 or $count[0]->num_landmarks == 0) {
|
|
|
// now look for the landmark using the name rather than uniquename.
|
|
|
$select = array(
|
|
|
- 'organism_id' => $organism_id,
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
'name' => $landmark,
|
|
|
);
|
|
|
$columns = array('count(*) as num_landmarks');
|
|
@@ -664,7 +728,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
$sql = "DELETE FROM {feature}
|
|
|
WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
|
|
|
$match = array(
|
|
|
- 'organism_id' => $organism->organism_id,
|
|
|
+ 'organism_id' => $feature_organism->organism_id,
|
|
|
'uniquename' => $attr_uniquename,
|
|
|
'type_id' => $cvterm->cvterm_id
|
|
|
);
|
|
@@ -681,7 +745,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
if ($update or $refresh or $add_only) {
|
|
|
|
|
|
// add/update the feature
|
|
|
- $feature = tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm,
|
|
|
+ $feature = tripal_feature_load_gff3_feature($feature_organism, $analysis_id, $cvterm,
|
|
|
$attr_uniquename, $attr_name, $residues, $attr_is_analysis,
|
|
|
$attr_is_obsolete, $add_only, $score);
|
|
|
|
|
@@ -729,7 +793,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
}
|
|
|
// add parent relationships
|
|
|
if (array_key_exists('Parent', $tags)) {
|
|
|
- tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $organism_id, $fmin);
|
|
|
+ tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $feature_organism->organism_id, $fmin);
|
|
|
}
|
|
|
// add target relationships
|
|
|
if (array_key_exists('Target', $tags)) {
|
|
@@ -749,7 +813,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
|
|
|
}
|
|
|
// add the Derives_from relationship (e.g. polycistronic genes).
|
|
|
if (array_key_exists('Derives_from', $tags)) {
|
|
|
- tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $organism);
|
|
|
+ tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $feature_organism);
|
|
|
}
|
|
|
// add in the GFF3_source dbxref so that GBrowse can find the feature using the source column
|
|
|
$source_ref = array('GFF_source:' . $source);
|