t('Sequences'),
'description' => t('A field for managing nucleotide and protein residues.'),
'default_widget' => 'chado_feature__residues_widget',
'default_formatter' => 'chado_feature__residues_formatter',
'settings' => array(),
'instance_settings' => array('text_processing' => 1, 'display_summary' => 0),
'storage' => array(
'type' => 'field_chado_storage',
'module' => 'tripal_chado',
'active' => TRUE
),
);
}
/**
* @see TripalField::can_attach()
*/
protected function setCanAttach() {
$table_name = $this->details['chado_table'];
$type_table = $this->details['chado_type_table'];
$type_field = $this->details['chado_type_column'];
$cv_id = $this->details['chado_cv_id'];
$cvterm_id = $this->details['chado_cvterm_id'];
// If this is not the feature table then we don't want to attach.
if ($table_name == 'feature') {
$this->can_attach = TRUE;
return;
}
$this->can_attach = FALSE;
}
/**
* @see TripalField::setFieldName()
*/
protected function setFieldName() {
$table_name = $this->details['chado_table'];
$type_table = $this->details['chado_type_table'];
$type_field = $this->details['chado_type_column'];
$cv_id = $this->details['chado_cv_id'];
$cvterm_id = $this->details['chado_cvterm_id'];
$this->field_name = 'feature__residues';
}
/**
* @see TripalField::create_info()
*/
function createInfo() {
if (!$this->can_attach) {
return;
}
$table_name = $this->details['chado_table'];
$type_table = $this->details['chado_type_table'];
$type_field = $this->details['chado_type_column'];
$cv_id = $this->details['chado_cv_id'];
$cvterm_id = $this->details['chado_cvterm_id'];
return array(
'field_name' => $this->field_name,
'type' => 'chado_feature__residues',
'cardinality' => 1,
'locked' => FALSE,
'storage' => array(
'type' => 'field_chado_storage',
),
'settings' => array(
'chado_table' => $table_name,
'chado_column' => 'residues',
'semantic_web' => tripal_get_chado_semweb_term($table_name, 'residues'),
),
);
}
/**
* @see TripalField::createInstanceInfo()
*/
function createInstanceInfo() {
if (!$this->can_attach) {
return;
}
$table_name = $this->details['chado_table'];
$type_table = $this->details['chado_type_table'];
$type_field = $this->details['chado_type_column'];
$cv_id = $this->details['chado_cv_id'];
$cvterm_id = $this->details['chado_cvterm_id'];
return array(
'field_name' => $this->field_name,
'entity_type' => $this->entity_type,
'bundle' => $this->bundle->name,
'label' => 'Sequences',
'description' => 'All available sequences for this record.',
'required' => FALSE,
'settings' => array(
'auto_attach' => FALSE,
),
'widget' => array(
'type' => 'chado_feature__residues_widget',
'settings' => array(
'display_label' => 1,
),
),
'display' => array(
'deafult' => array(
'label' => 'above',
'type' => 'chado_feature__residues_formatter',
'settings' => array(),
),
),
);
}
/**
* @see TripalField::widgetInfo()
*/
public static function widgetInfo() {
return array(
'chado_feature__residues_widget' => array(
'label' => t('Sequence'),
'field types' => array('chado_feature__residues'),
),
);
}
/**
* @see TripalField::formatterInfo()
*/
static function formatterInfo() {
return array(
'chado_feature__residues_formatter' => array(
'label' => t('Sequences'),
'field types' => array('chado_feature__residues'),
),
);
}
/**
* @see TripalField::formatterView()
*/
static function formatterView(&$element, $entity_type, $entity, $field,
$instance, $langcode, $items, $display) {
$num_bases = 50;
$feature = $entity->chado_record;
foreach ($items as $delta => $item) {
// If there are no residues then skip this one.
if (!is_array($item['value']) or !array_key_exists('residues', $item['value'])) {
continue;
}
$residues = $item['value']['residues'];
$label = $item['value']['label'];
$defline = $item['value']['defline'];
$content = '
' . $label . '
';
$content .= '
';
$content .= '>' . $defline . "
";
$content .= wordwrap($residues, $num_bases, "
", TRUE);
$content .= '
';
$element[$delta] = array(
// We create a render array to produce the desired markup,
'#type' => 'markup',
'#markup' => $content,
);
}
}
/**
* @see TripalField::widgetForm()
*/
public static function widgetForm(&$widget, &$form, &$form_state, $field, $instance,
$langcode, $items, $delta, $element) {
$settings = $field['settings'];
$field_name = $field['field_name'];
$field_type = $field['type'];
$field_table = $field['settings']['chado_table'];
$field_column = $field['settings']['chado_column'];
// Get the field defaults.
$residues = '';
if (count($items) > 0 and array_key_exists('feature__residues', $items[0])) {
$residues = $items[0]['feature__residues'];
}
if (array_key_exists('values', $form_state)) {
$residues = tripal_chado_get_field_form_values($field_name, $form_state, 0, 'feature__residues');
}
$widget['value'] = array(
'#type' => 'value',
'#value' => array_key_exists($delta, $items) ? $items[$delta]['value'] : '',
);
$widget['feature__residues'] = array(
'#type' => 'textarea',
'#title' => $element['#title'],
'#description' => $element['#description'],
'#weight' => isset($element['#weight']) ? $element['#weight'] : 0,
'#default_value' => $residues,
'#delta' => $delta,
'#element_validate' => array('chado_feature__residues_widget_validate'),
'#cols' => 30,
);
}
/**
* @see TripalField::load()
*/
static function load($field, $entity, $details = array()) {
$field_name = $field['field_name'];
$feature = $details['record'];
$num_seqs = 0;
// We don't want to get the sequence for traditionally large types. They are
// too big, bog down the web browser, take longer to load and it's not
// reasonable to print them on a page.
if(strcmp($feature->type_id->name,'scaffold') == 0 or
strcmp($feature->type_id->name,'chromosome') == 0 or
strcmp($feature->type_id->name,'supercontig') == 0 or
strcmp($feature->type_id->name,'pseudomolecule') == 0) {
$entity->{$field_name}['und'][$num_seqs]['value'] = array(
'@type' => 'SO:0000110',
'type' => 'sequence_feature',
'label' => 'Residues',
'defline' => ">This sequence is too large for this display.",
'residues' => '',
);
$entity->{$field_name}['und'][$num_seqs]['feature__residues'] = '';
}
else {
$feature = chado_expand_var($feature,'field','feature.residues');
if ($feature->residues) {
$entity->{$field_name}['und'][$num_seqs]['value'] = array(
'@type' => 'SO:0000110',
'type' => 'sequence_feature',
'label' => 'Sequence',
'defline' => tripal_get_fasta_defline($feature, '', NULL, '', strlen($feature->residues)),
'residues' => $feature->residues,
);
$entity->{$field_name}['und'][$num_seqs]['feature__residues'] = $feature->residues;
}
else {
$entity->{$field_name}['und'][$num_seqs]['value'] = array();
$entity->{$field_name}['und'][$num_seqs]['feature__residues'] = '';
}
}
$num_seqs++;
// Add in the protein sequences
$values = array(
'object_id' => $feature->feature_id,
'subject_id' => array(
'type_id' => array(
'name' => 'polypeptide'
),
),
'type_id' => array(
'name' => 'derives_from',
),
);
$options = array(
'return_array' => 1,
'include_fk' => array(
'subject_id' => 1
),
);
$protein_rels = chado_generate_var('feature_relationship', $values, $options);
foreach ($protein_rels as $protein_rel) {
$protein_rel = chado_expand_var($protein_rel, 'field', 'feature.residues');
if ($protein_rel->subject_id->residues) {
$entity->{$field_name}['und'][$num_seqs++]['value'] = array(
'@type' => 'SO:0000104',
'type' => 'polypeptide',
'label' => 'Protein Sequence',
'defline' => tripal_get_fasta_defline($protein_rel->subject_id, '', NULL, '', strlen($protein_rel->subject_id->residues)),
'residues' => $protein_rel->subject_id->residues,
);
}
}
// Add in sequences from alignments.
$options = array(
'return_array' => 1,
'include_fk' => array(
'srcfeature_id' => array(
'type_id' => 1
),
'feature_id' => array(
'type_id' => 1
),
),
);
$feature = chado_expand_var($feature, 'table', 'featureloc', $options);
$featureloc_sequences = self::get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
// Add in the coding sequences.
$values = array(
'object_id' => $feature->feature_id,
'subject_id' => array(
'type_id' => array(
'name' => 'CDS'
),
),
'type_id' => array(
'name' => 'part_of',
),
);
$options = array(
'order_by' => array('rank' => 'ASC'),
'return_array' => 1,
'include_fk' => array(
'subject_id' => 1
),
);
$cds_rels = chado_generate_var('feature_relationship', $values, $options);
$coding_seq = '';
foreach ($cds_rels as $cds_rel) {
$cds_rel = chado_expand_var($cds_rel, 'field', 'feature.residues');
if ($cds_rel->subject_id->residues) {
$coding_seq .= $cds_rel->subject_id->residues;
}
}
if ($coding_seq) {
$entity->{$field_name}['und'][$num_seqs++]['value'] = array(
'@type' => 'SO:0000316',
'type' => 'coding_sequence',
'label' => 'Coding sequence (CDS)',
'defline' => tripal_get_fasta_defline($feature, '', $feature->featureloc->feature_id[0], '', strlen($coding_seq)),
'residues' => $coding_seq,
);
}
foreach($featureloc_sequences as $src => $attrs){
// the $attrs array has the following keys
// * id: a unique identifier combining the feature id with the cvterm id
// * type: the type of sequence (e.g. mRNA, etc)
// * location: the alignment location
// * defline: the definition line
// * formatted_seq: the formatted sequences
// * featureloc: the feature object aligned to
$entity->{$field_name}['und'][$num_seqs++]['value'] = array(
'residues' => $attrs['residues'],
'@type' => 'SO:0000110',
'type' => 'sequence_feature',
'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
'label' => 'Sequence from alignment at ' . $attrs['location'],
);
// check to see if this alignment has any CDS. If so, generate a CDS sequence
$cds_sequence = tripal_get_feature_sequences(
array(
'feature_id' => $feature->feature_id,
'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
'name' => $feature->name,
'featureloc_id' => $attrs['featureloc']->featureloc_id,
),
array(
'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
'sub_feature_types' => array('CDS'), // we're looking for CDS features
'is_html' => 0
)
);
if (count($cds_sequence) > 0) {
// the tripal_get_feature_sequences() function can return multiple sequences
// if a feature is aligned to multiple places. In the case of CDSs we expect
// that one mRNA is only aligned to a single location on the assembly so we
// can access the CDS sequence with index 0.
if ($cds_sequence[0]['residues']) {
$entity->{$field_name}['und'][$num_seqs++]['value'] = array(
'residues' => $cds_sequence[0]['residues'],
'@type' => 'SO:0000316',
'type' => 'coding_sequence',
'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
'label' => 'Coding sequence (CDS) from alignment at ' . $attrs['location'],
);
}
}
}
}
/**
*
* @param unknown $feature_id
* @param unknown $featurelocs
* @return multitype:|Ambigous
*/
private function get_featureloc_sequences($feature_id, $featurelocs) {
// if we don't have any featurelocs then no point in continuing
if (!$featurelocs) {
return array();
}
// get the list of relationships (including any aggregators) and iterate
// through each one to find information needed to color-code the reference sequence
$relationships = self::get_aggregate_relationships($feature_id);
if (!$relationships) {
return array();
}
// iterate through each of the realtionships features and get their
// locations
foreach ($relationships as $rindex => $rel) {
// get the featurelocs for each of the relationship features
$rel_featurelocs = self::get_featurelocs($rel->subject_id, 'as_child', 0);
foreach ($rel_featurelocs as $rfindex => $rel_featureloc) {
// keep track of this unique source feature
$src = $rel_featureloc->src_feature_id . "-" . $rel_featureloc->src_cvterm_id;
// copy over the results to the relationship object. Since there can
// be more than one feature location for each relationship feature we
// use the '$src' variable to keep track of these.
$rel->featurelocs = new stdClass();
$rel->featurelocs->$src = new stdClass();
$rel->featurelocs->$src->src_uniquename = $rel_featureloc->src_uniquename;
$rel->featurelocs->$src->src_cvterm_id = $rel_featureloc->src_cvterm_id;
$rel->featurelocs->$src->src_cvname = $rel_featureloc->src_cvname;
$rel->featurelocs->$src->fmin = $rel_featureloc->fmin;
$rel->featurelocs->$src->fmax = $rel_featureloc->fmax;
$rel->featurelocs->$src->src_name = $rel_featureloc->src_name;
// keep track of the individual parts for each relationship
$start = $rel->featurelocs->$src->fmin;
$end = $rel->featurelocs->$src->fmax;
$type = $rel->subject_type;
$rel_locs[$src]['parts'][$start][$type]['start'] = $start;
$rel_locs[$src]['parts'][$start][$type]['end'] = $end;
$rel_locs[$src]['parts'][$start][$type]['type'] = $type;
}
}
// the featurelocs array provided to the function contains the locations
// where this feature is found. We want to get the sequence for each
// location and then annotate it with the parts found from the relationships
// locations determiend above.
$floc_sequences = array();
foreach ($featurelocs as $featureloc) {
// build the src name so we can keep track of the different parts for each feature
$src = $featureloc->srcfeature_id->feature_id . "-" . $featureloc->srcfeature_id->type_id->cvterm_id;
// orient the parts to the beginning of the feature sequence
if (!empty($rel_locs[$src]['parts'])) {
$parts = $rel_locs[$src]['parts'];
$rparts = array(); // we will fill this up if we're on the reverse strand
foreach ($parts as $start => $types) {
foreach ($types as $type_name => $type) {
if ($featureloc->strand >= 0) {
// this is on the forward strand. We need to convert the start on the src feature to the
// start on this feature's sequence
$parts[$start][$type_name]['start'] = $parts[$start][$type_name]['start'] - $featureloc->fmin;
$parts[$start][$type_name]['end'] = $parts[$start][$type_name]['end'] - $featureloc->fmin;
$parts[$start][$type_name]['type'] = $type_name;
}
else {
// this is on the reverse strand. We need to swap the start and stop and calculate from the
// begining of the reverse sequence
$size = ($featureloc->fmax - $featureloc->fmin);
$start_orig = $parts[$start][$type_name]['start'];
$end_orig = $parts[$start][$type_name]['end'];
$new_start = $size - ($end_orig - $featureloc->fmin);
$new_end = $size - ($start_orig - $featureloc->fmin);
$rparts[$new_start][$type_name]['start'] = $new_start;
$rparts[$new_start][$type_name]['end'] = $new_end;
$rparts[$new_start][$type_name]['type'] = $type_name;
}
}
}
// now sort the parts
// if we're on the reverse strand we need to resort
if ($featureloc->strand >= 0) {
usort($parts, 'chado_feature__residues_sort_rel_parts_by_start');
}
else {
usort($rparts, 'chado_feature__residues_sort_rel_parts_by_start');
$parts = $rparts;
}
$floc_sequences[$src]['id'] = $src;
$floc_sequences[$src]['type'] = $featureloc->feature_id->type_id->name;
$args = array(':feature_id' => $featureloc->srcfeature_id->feature_id);
$start = $featureloc->fmin + 1;
$size = $featureloc->fmax - $featureloc->fmin;
// TODO: fix the hard coded $start and $size
// the $start and $size variables are hard-coded in the SQL statement
// because the db_query function places quotes around all placeholders
// (e.g. :start & :size) and screws up the substring function
$sql = "
SELECT substring(residues from $start for $size) as residues
FROM {feature}
WHERE feature_id = :feature_id
";
$sequence = chado_query($sql, $args)->fetchObject();
$residues = $sequence->residues;
if ($featureloc->strand < 0) {
$residues = tripal_reverse_compliment_sequence($residues);
}
$strand = '.';
if ($featureloc->strand == 1) {
$strand = '+';
}
elseif ($featureloc->strand == -1) {
$strand = '-';
}
$floc_sequences[$src]['location'] = tripal_get_location_string($featureloc);
$floc_sequences[$src]['defline'] = tripal_get_fasta_defline($featureloc->feature_id, '', $featureloc, '', strlen($residues));
$floc_sequences[$src]['featureloc'] = $featureloc;
$floc_sequences[$src]['residues'] = $residues;
//$floc_sequences[$src]['formatted_seq'] = tripal_feature_color_sequence($residues, $parts, $floc_sequences[$src]['defline']);
}
}
return $floc_sequences;
}
/**
* Get features related to the current feature to a given depth. Recursive function.
*
* @param $feature_id
* @param $substitute
* @param $levels
* @param $base_type_id
* @param $depth
*
* @ingroup tripal_feature
*/
private function get_aggregate_relationships($feature_id, $substitute=1,
$levels=0, $base_type_id=NULL, $depth=0) {
// we only want to recurse to as many levels deep as indicated by the
// $levels variable, but only if this variable is > 0. If 0 then we
// recurse until we reach the end of the relationships tree.
if ($levels > 0 and $levels == $depth) {
return NULL;
}
// first get the relationships for this feature
return self::get_relationships($feature_id, 'as_object');
}
/**
* Get the relationships for a feature.
*
* @param $feature_id
* The feature to get relationships for
* @param $side
* The side of the relationship this feature is (ie: 'as_subject' or 'as_object')
*
* @ingroup tripal_feature
*/
private function get_relationships($feature_id, $side = 'as_subject') {
// get the relationships for this feature. The query below is used for both
// querying the object and subject relationships
$sql = "
SELECT
FS.name as subject_name, FS.uniquename as subject_uniquename,
CVTS.name as subject_type, CVTS.cvterm_id as subject_type_id,
FR.subject_id, FR.type_id as relationship_type_id, FR.object_id, FR.rank,
CVT.name as rel_type,
FO.name as object_name, FO.uniquename as object_uniquename,
CVTO.name as object_type, CVTO.cvterm_id as object_type_id
FROM {feature_relationship} FR
INNER JOIN {cvterm} CVT ON FR.type_id = CVT.cvterm_id
INNER JOIN {feature} FS ON FS.feature_id = FR.subject_id
INNER JOIN {feature} FO ON FO.feature_id = FR.object_id
INNER JOIN {cvterm} CVTO ON FO.type_id = CVTO.cvterm_id
INNER JOIN {cvterm} CVTS ON FS.type_id = CVTS.cvterm_id
";
if (strcmp($side, 'as_object')==0) {
$sql .= " WHERE FR.object_id = :feature_id";
}
if (strcmp($side, 'as_subject')==0) {
$sql .= " WHERE FR.subject_id = :feature_id";
}
$sql .= " ORDER BY FR.rank";
// get the relationships
$results = chado_query($sql, array(':feature_id' => $feature_id));
// iterate through the relationships, put these in an array and add
// in the Drupal node id if one exists
$i=0;
$esql = "
SELECT entity_id
FROM {chado_entity}
WHERE data_table = 'feature' AND record_id = :feature_id";
$relationships = array();
while ($rel = $results->fetchObject()) {
$entity = db_query($esql, array(':feature_id' => $rel->subject_id))->fetchObject();
if ($entity) {
$rel->subject_entity_id = $entity->entity_id;
}
$entity = db_query($esql, array(':feature_id' => $rel->object_id))->fetchObject();
if ($entity) {
$rel->object_entity_id = $entity->entity_id;
}
$relationships[$i++] = $rel;
}
return $relationships;
}
/**
* Load the locations for a given feature
*
* @param $feature_id
* The feature to look up locations for
* @param $side
* Whether the feature is the scrfeature, 'as_parent', or feature, 'as_child'
* @param $aggregate
* Whether or not to get the locations for related features
*
* @ingroup tripal_feature
*/
private function get_featurelocs($feature_id, $side = 'as_parent', $aggregate = 1) {
$sql = "
SELECT
F.name, F.feature_id, F.uniquename,
FS.name as src_name, FS.feature_id as src_feature_id, FS.uniquename as src_uniquename,
CVT.name as cvname, CVT.cvterm_id,
CVTS.name as src_cvname, CVTS.cvterm_id as src_cvterm_id,
FL.fmin, FL.fmax, FL.is_fmin_partial, FL.is_fmax_partial,FL.strand, FL.phase
FROM {featureloc} FL
INNER JOIN {feature} F ON FL.feature_id = F.feature_id
INNER JOIN {feature} FS ON FS.feature_id = FL.srcfeature_id
INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
INNER JOIN {cvterm} CVTS ON FS.type_id = CVTS.cvterm_id
";
if (strcmp($side, 'as_parent')==0) {
$sql .= "WHERE FL.srcfeature_id = :feature_id ";
}
if (strcmp($side, 'as_child')==0) {
$sql .= "WHERE FL.feature_id = :feature_id ";
}
$flresults = chado_query($sql, array(':feature_id' => $feature_id));
// copy the results into an array
$i=0;
$featurelocs = array();
while ($loc = $flresults->fetchObject()) {
// if a drupal node exists for this feature then add the nid to the
// results object
$loc->feid = tripal_get_chado_entity_id('feature', $loc->feature_id);
$loc->seid = tripal_get_chado_entity_id('feature', $loc->src_feature_id);
// add the result to the array
$featurelocs[$i++] = $loc;
}
// Add the relationship feature locs if aggregate is turned on
if ($aggregate and strcmp($side, 'as_parent')==0) {
// get the relationships for this feature without substituting any children
// for the parent. We want all relationships
$relationships = tripal_feature_get_aggregate_relationships($feature_id, 0);
foreach ($relationships as $rindex => $rel) {
// get the featurelocs for each of the relationship features
$rel_featurelocs = tripal_feature_load_featurelocs($rel->subject_id, 'as_child', 0);
foreach ($rel_featurelocs as $findex => $rfloc) {
$featurelocs[$i++] = $rfloc;
}
}
}
usort($featurelocs, 'chado_feature__residues_sort_locations');
return $featurelocs;
}
}
/**
* Callback function for validating the chado_feature__residues_widget.
*/
function chado_feature__residues_widget_validate($element, &$form_state) {
$field_name = $element['#parents'][0];
// Remove any white spaces.
$residues = tripal_chado_get_field_form_values($field_name, $form_state, 0, 'feature__residues');
if ($residues) {
$residues = preg_replace('/\s/', '', $residues);
tripal_chado_set_field_form_values($field_name, $form_state, $residues, 0, 'feature__residues');
}
}
/**
* Used to sort the list of relationship parts by start position
*
* @ingroup tripal_feature
*/
function chado_feature__residues_sort_rel_parts_by_start($a, $b) {
foreach ($a as $type_name => $details) {
$astart = $a[$type_name]['start'];
break;
}
foreach ($b as $type_name => $details) {
$bstart = $b[$type_name]['start'];
break;
}
return strnatcmp($astart, $bstart);
}
/**
* Used to sort the feature locs by start position
*
* @param $a
* One featureloc record (as an object)
* @param $b
* The other featureloc record (as an object)
*
* @return
* Which feature location comes first
*
* @ingroup tripal_feature
*/
function chado_feature__residues_sort_locations($a, $b) {
return strnatcmp($a->fmin, $b->fmin);
}