feature; // number of bases per line in FASTA format $num_bases = 50; // we don't want to get the sequence for traditionally large types. They are // too big, bog down the web browser, take longer to load and it's not // reasonable to print them on a page. $residues = ''; if (strcmp($feature->type_id->name, 'scaffold') != 0 and strcmp($feature->type_id->name, 'chromosome') != 0 and strcmp($feature->type_id->name, 'supercontig') != 0 and strcmp($feature->type_id->name, 'pseudomolecule') != 0) { $feature = chado_expand_var($feature, 'field', 'feature.residues'); $residues = $feature->residues; } // get the sequence derived from alignments $feature = $variables['node']->feature; $featureloc_sequences = $feature->featureloc_sequences; if ($residues or count($featureloc_sequences) > 0) { $sequences_html = ''; // a variable for holding all sequences HTML text $list_items = []; // a list to be used for theming of content on this page // ADD IN RESIDUES FOR THIS FEATURE // add in the residues if they are present if ($residues) { $list_items[] = '' . $feature->type_id->name . ' sequence'; // format the sequence to break every 50 residues $sequences_html .= ''; $sequences_html .= '

'; $sequences_html .= '

' . $feature->type_id->name . ' sequence

'; $sequences_html .= '

';
    $sequences_html .= '>' . tripal_get_fasta_defline($feature, '', NULL, '', strlen($feature->residues)) . "
";
    $sequences_html .= wordwrap($feature->residues, $num_bases, "
", TRUE);
    $sequences_html .= '

'; $sequences_html .= 'back to top'; $sequences_html .= '

'; } // ADD IN RELATIONSHIP SEQUENCES (e.g. proteins) // see the explanation in the tripal_feature_relationships.tpl.php // template for how the 'all_relationships' is provided. It is this // variable that we use to get the proteins. $all_relationships = $feature->all_relationships; $object_rels = $all_relationships['object']; $has_coding_seq = 0; $coding_seq = ''; foreach ($object_rels as $rel_type => $rels) { foreach ($rels as $subject_type => $subjects) { foreach ($subjects as $subject) { // add in protein sequence if it has residues if ($rel_type == 'derives from' and $subject_type == 'polypeptide') { $protein = $subject->record->subject_id; $protein = chado_expand_var($protein, 'field', 'feature.residues'); if ($protein->residues) { $list_items[] = 'protein sequence'; $sequences_html .= ''; $sequences_html .= '

'; $sequences_html .= '

protein sequence of ' . $protein->name . '

'; $sequences_html .= '

';
            $sequences_html .= '>' . tripal_get_fasta_defline($protein, '', NULL, '', strlen($protein->residues)) . "
";
            $sequences_html .= wordwrap($protein->residues, $num_bases, "
", TRUE);
            $sequences_html .= '

'; $sequences_html .= 'back to top'; $sequences_html .= '

'; } } // If the CDS has sequences then concatenate those. The objects // should be returned in order of rank if ($rel_type == 'part of' and $subject_type == 'CDS') { $cds = $subject->record->subject_id; $cds = chado_expand_var($cds, 'field', 'feature.residues'); if ($cds->residues) { $has_coding_seq = 1; $coding_seq .= $cds->residues; } } // add any other sequences that are related through a relationship // and that have values in the 'residues' column } } } // CODING SEQUENCES FROM RELATIONSHIPS // add in any CDS sequences. if ($has_coding_seq) { $list_items[] = 'coding sequence '; $sequences_html .= ''; $sequences_html .= '

'; $sequences_html .= '

coding sequence

'; $sequences_html .= '

';
    $sequences_html .= wordwrap($coding_seq, $num_bases, "
", TRUE);
    $sequences_html .= '

'; $sequences_html .= 'back to top'; $sequences_html .= '

'; } /* ADD IN ALIGNMENT SEQUENCES FOR THIS FEATURE * For retreiving the sequence from an alignment we would typically make a call to * chado_expand_var function. For example, to retrieve all * of the featurelocs in order to get the sequences needed for this template, the * following function call would be made: * * $feature = chado_expand_var($feature,'table','featureloc'); * * Then all of the sequences would need to be retreived from the alignments and * formatted for display below. However, to simplify this template, this has already * been done by the tripal_feature module and the sequences are made available in * the variable: * * $feature->featureloc_sequences */ if (count($featureloc_sequences) > 0) { foreach ($featureloc_sequences as $src => $attrs) { // the $attrs array has the following keys // * id: a unique identifier combining the feature id with the cvterm id // * type: the type of sequence (e.g. mRNA, etc) // * location: the alignment location // * defline: the definition line // * formatted_seq: the formatted sequences // * featureloc: the feature object aligned to $list_items[] = '' . $feature->type_id->name . ' from alignment at ' . $attrs['location'] . ""; $sequences_html .= ''; $sequences_html .= '

'; $sequences_html .= '

' . $feature->type_id->name . ' from alignment at ' . $attrs['location'] . '

'; $sequences_html .= $attrs['formatted_seq']; $sequences_html .= 'back to top'; $sequences_html .= '

'; } // check to see if this alignment has any CDS. If so, generate a CDS sequence $cds_sequence = tripal_get_feature_sequences( [ 'feature_id' => $feature->feature_id, 'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id, 'name' => $feature->name, 'featureloc_id' => $attrs['featureloc']->featureloc_id, ], [ 'width' => $num_bases, // FASTA sequence should have $num_bases chars per line 'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent 'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence 'sub_feature_types' => ['CDS'], // we're looking for CDS features 'is_html' => 1, ] ); if (count($cds_sequence) > 0) { // the tripal_get_feature_sequences() function can return multiple sequences // if a feature is aligned to multiple places. In the case of CDSs we expect // that one mRNA is only aligned to a single location on the assembly so we // can access the CDS sequence with index 0. if ($cds_sequence[0]['residues']) { $list_items[] = 'coding sequence from alignment at ' . $attrs['location'] . ""; $sequences_html .= ''; $sequences_html .= '

'; $sequences_html .= '

Coding sequence (CDS) from alignment at ' . $attrs['location'] . '

'; $sequences_html .= '

';
        $sequences_html .= '>' . tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']) . "
";
        $sequences_html .= $cds_sequence[0]['residues'];
        $sequences_html .= '

'; $sequences_html .= 'back to top'; $sequences_html .= '

'; } } } ?>

The following sequences are available for this feature:

'; print theme_item_list([ 'items' => $list_items, 'title' => '', 'type' => 'ul', 'attributes' => [], ]); $message = 'Administrators, sequences will appear on this page if:

For any feature type:

This feature has residues stored in the "residues" field of the feature table of Chado.
This feature is aligned to another feature (e.g. scaffold, or chromosome). In this case, the sequence underlying the alignment will be shown.

For gene models:

This feature has a "polypeptide" (protein) feature associated via the "feature_relationship" table of Chado with a relationship of type "derives from" and the protein feature has residues. Typically, a protein is associated with an mRNA feature and protein sequences will appear on the mRNA page.
This feature has one or more CDS features associated via the "feature_relationship" table of Chado with a relationship of type "part of". If the CDS features have residues then those will be concatenated and presented as a sequence. Typically, CDSs are associated with an mRNA feature and CDS sequences will appear on the mRNA page.
This feature is aligned to another feature (e.g. scaffold, or chromosome) and this feature has one or more CDS features associated. The CDS sequenes underlying the alignment will be shown.

'; print tripal_set_message($message, TRIPAL_INFO, ['return_html' => 1]); // now print the sequences print $sequences_html; }