@@ -137,6 +137,16 @@ function tripal_feature_gff3_load_form() {
'#description' => t('Features present in the GFF file that exist in the database
will be removed rather than imported'),
+ $form['import_options']['create_organism']= array(
+ '#type' => 'checkbox',
+ '#title' => t('Create organism'),
+ '#required' => FALSE,
+ '#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
+ different organism to be aligned to the landmark sequence of another species. The format of the
+ attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
+ species name. Check this box to automatically add the organism to the database if it does not already exists.
+ Otherwise lines with an oraganism attribute where the organism is not present in the database will be skipped.'),
+ );
$form['targets'] = array(
'#type' => 'fieldset',
@@ -204,6 +214,7 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
$target_organism_id = $form_state['values']['target_organism_id'];
$target_type = trim($form_state['values']['target_type']);
$create_target = $form_state['values']['create_target'];
+ $create_organism = $form_state['values']['create_organism'];
$add_only = $form_state['values']['add_only'];
$update = $form_state['values']['update'];
$refresh = $form_state['values']['refresh'];
@@ -261,10 +272,13 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
$line_number = trim($form_state['values']['line_number']);
$landmark_type = trim($form_state['values']['landmark_type']);
$alt_id_attr = trim($form_state['values']['alt_id_attr']);
+ $create_organism = $form_state['values']['create_organism'];
$args = array($gff_file, $organism_id, $analysis_id, $add_only,
$update, $refresh, $remove, $use_transaction, $target_organism_id,
- $target_type, $create_target, $line_number, $landmark_type, $alt_id_attr);
+ $target_type, $create_target, $line_number, $landmark_type, $alt_id_attr,
+ $create_organism);
$type = '';
if ($add_only) {
@@ -292,9 +306,10 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
* @ingroup gff3_loader
function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
- $add_only =0, $update = 0, $refresh = 0, $remove = 0, $use_transaction = 1,
+ $add_only = 0, $update = 0, $refresh = 0, $remove = 0, $use_transaction = 1,
$target_organism_id = NULL, $target_type = NULL, $create_target = 0,
- $start_line = 1, $landmark_type = '', $alt_id_attr = '', $job = NULL) {
+ $start_line = 1, $landmark_type = '', $alt_id_attr = '', $create_organism = FALSE,
+ $job = NULL) {
// make sure our temporary table exists
$ret = array();
@@ -517,8 +532,14 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
$attr_fmax_partial = 'f';
$attr_is_obsolete = 'f';
$attr_is_analysis = 'f';
- $attr_others = '';
+ $attr_others = '';
$residues = '';
+ // the organism to which a feature belongs can be set in the GFF
+ // file using the 'organism' attribute. By default we
+ // set the $feature_organism variable to the default organism for the landmark
+ $attr_organism = '';
+ $feature_organism = $organism;
foreach ($attrs as $attr) {
$attr = rtrim($attr);
@@ -549,24 +570,67 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
// get the name and ID tags
+ $skip_feature = 0; // if there is a problem with any of the attributes this variable gets set
if (strcmp($tag_name, 'ID') == 0) {
$attr_uniquename = urldecode($tag[1]);
elseif (strcmp($tag_name, 'Name') == 0) {
$attr_name = urldecode($tag[1]);
+ elseif (strcmp($tag_name, 'organism') == 0) {
+ $attr_organism = urldecode($tag[1]);
+ $org_matches = array();
+ if (preg_match('/^(.*?):(.*?)$/', $attr_organism, $org_matches)) {
+ $values = array(
+ 'genus' => $org_matches[1],
+ 'species' => $org_matches[2],
+ );
+ $options = array('statement_name' => 'sel_organism_gesp');
+ $org = tripal_core_chado_select('organism', array("*"), $values, $options);
+ if (count($org) == 0) {
+ if ($create_organism) {
+ $options = array('statement_name' => 'ins_organism_gesp');
+ $feature_organism = (object) tripal_core_chado_insert('organism', $values, $options);
+ if (!$feature_organism) {
+ watchdog('T_gff3_loader', "Could not add the organism, '%org', from line %line. Skipping this line. ",
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
+ $skip_feature = 1;
+ }
+ }
+ else {
+ watchdog('T_gff3_loader', "The organism attribute '%org' on line %line does not exist. Skipping this line. ",
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
+ $skip_feature = 1;
+ }
+ }
+ else {
+ // we found the organism in the database so use it
+ $feature_organism = $org[0];
+ }
+ }
+ else {
+ watchdog('T_gff3_loader', "The organism attribute '%org' on line %line is not properly formated. It ".
+ "should be of the form: organism=Genus:species. Skipping this line.",
+ array('%org' => $attr_organism, '%line' => $line_num), WATCHDOG_ERROR);
+ $skip_feature = 1;
+ }
+ }
// get the list of non-reserved attributes
- elseif (strcmp($tag_name, 'Alias') !=0 and strcmp($tag_name, 'Parent') !=0 and
- strcmp($tag_name, 'Target') !=0 and strcmp($tag_name, 'Gap') !=0 and
- strcmp($tag_name, 'Derives_from') !=0 and strcmp($tag_name, 'Note') !=0 and
- strcmp($tag_name, 'Dbxref') !=0 and strcmp($tag_name, 'Ontology_term') !=0 and
- strcmp($tag_name, 'Is_circular') !=0 and strcmp($tag_name, 'target_organism') !=0 and
- strcmp($tag_name, 'target_type') != 0) {
+ elseif (strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
+ strcmp($tag_name, 'Target') != 0 and strcmp($tag_name, 'Gap') != 0 and
+ strcmp($tag_name, 'Derives_from') != 0 and strcmp($tag_name, 'Note') != 0 and
+ strcmp($tag_name, 'Dbxref') != 0 and strcmp($tag_name, 'Ontology_term') != 0 and
+ strcmp($tag_name, 'Is_circular') != 0 and strcmp($tag_name, 'target_organism') != 0 and
+ strcmp($tag_name, 'target_type') != 0 and strcmp($tag_name, 'organism' != 0)) {
foreach ($tags[$tag_name] as $value) {
$attr_others[$tag_name][] = $value;
+ if ($skip_line) {
+ continue;
+ }
// if neither name nor uniquename are provided then generate one
if (!$attr_uniquename and !$attr_name) {
@@ -611,7 +675,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
// different.
if (!$remove and !(strcmp($landmark, $attr_uniquename) == 0 or strcmp($landmark, $attr_name) == 0)) {
$select = array(
- 'organism_id' => $organism_id,
+ 'organism_id' => $organism->organism_id,
'uniquename' => $landmark,
$columns = array('count(*) as num_landmarks');
@@ -626,7 +690,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
if (!$count or count($count) == 0 or $count[0]->num_landmarks == 0) {
// now look for the landmark using the name rather than uniquename.
$select = array(
- 'organism_id' => $organism_id,
+ 'organism_id' => $organism->organism_id,
'name' => $landmark,
$columns = array('count(*) as num_landmarks');
@@ -664,7 +728,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
$sql = "DELETE FROM {feature}
WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
$match = array(
- 'organism_id' => $organism->organism_id,
+ 'organism_id' => $feature_organism->organism_id,
'uniquename' => $attr_uniquename,
'type_id' => $cvterm->cvterm_id
@@ -681,7 +745,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
if ($update or $refresh or $add_only) {
// add/update the feature
- $feature = tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm,
+ $feature = tripal_feature_load_gff3_feature($feature_organism, $analysis_id, $cvterm,
$attr_uniquename, $attr_name, $residues, $attr_is_analysis,
$attr_is_obsolete, $add_only, $score);
@@ -729,7 +793,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
// add parent relationships
if (array_key_exists('Parent', $tags)) {
- tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $organism_id, $fmin);
+ tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $feature_organism->organism_id, $fmin);
// add target relationships
if (array_key_exists('Target', $tags)) {
@@ -749,7 +813,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
// add the Derives_from relationship (e.g. polycistronic genes).
if (array_key_exists('Derives_from', $tags)) {
- tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $organism);
+ tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $feature_organism);
// add in the GFF3_source dbxref so that GBrowse can find the feature using the source column
$source_ref = array('GFF_source:' . $source);