123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617 |
- <?php
- class chado_feature__residues extends TripalField {
- // The default lable for this field.
- public static $default_label = 'Sequences';
- // The default description for this field.
- public static $default_description = 'A field for managing nucleotide and protein residues.';
- // Add any default settings elements. If you override the globalSettingsForm()
- // or the instanceSettingsForm() functions then you need to be sure that
- // any settings you want those functions to manage are listed in this
- // array.
- public static $default_settings = array(
- 'chado_table' => '',
- 'chado_column' => '',
- 'base_table' => '',
- 'semantic_web' => '',
- );
- // Set this to the name of the storage backend that by default will support
- // this field.
- public static $default_storage = 'field_chado_storage';
- /**
- * @see TripalField::formatterView()
- */
- public function formatterView(&$element, $entity_type, $entity, $langcode, $items, $display) {
- $element[0] = array(
- // We create a render array to produce the desired markup,
- '#type' => 'markup',
- '#markup' => '',
- );
- $num_bases = 50;
- foreach ($items as $delta => $item) {
- // If there are no residues then skip this one.
- if (!is_array($item['value']) or !array_key_exists('residues', $item['value'])) {
- continue;
- }
- $residues = $item['value']['residues'];
- $label = $item['value']['label'];
- $defline = $item['value']['defline'];
- $content = '<p>' . $label . '<p>';
- $content .= '<pre class="residues-formatter">';
- $content .= '>' . $defline . "<br>";
- $content .= wordwrap($residues, $num_bases, "<br>", TRUE);
- $content .= '</pre>';
- $element[$delta] = array(
- // We create a render array to produce the desired markup,
- '#type' => 'markup',
- '#markup' => $content,
- );
- }
- }
- /**
- * @see TripalField::widgetForm()
- */
- public function widgetForm(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
- parent::widgetForm($widget, $form, $form_state, $langcode, $items, $delta, $element);
- $settings = $this->field['settings'];
- $field_name = $this->field['field_name'];
- $field_type = $this->field['type'];
- $field_table = $this->field['settings']['chado_table'];
- $field_column = $this->field['settings']['chado_column'];
- // Get the field defaults.
- $residues = '';
- if (count($items) > 0 and array_key_exists('chado-feature__residues', $items[0])) {
- $residues = $items[0]['chado-feature__residues'];
- }
- if (array_key_exists('values', $form_state)) {
- //$residues = tripal_chado_get_field_form_values($field_name, $form_state, 0, 'feature__residues');
- }
- $widget['value'] = array(
- '#type' => 'value',
- '#value' => array_key_exists($delta, $items) ? $items[$delta]['value'] : '',
- );
- $widget['chado-feature__residues'] = array(
- '#type' => 'textarea',
- '#title' => $element['#title'],
- '#description' => $element['#description'],
- '#weight' => isset($element['#weight']) ? $element['#weight'] : 0,
- '#default_value' => $residues,
- '#delta' => $delta,
- '#cols' => 30,
- );
- }
- /**
- * @see TripalField::widgetFormSubmit()
- */
- public function widgetFormSubmit($form, &$form_state, $entity_type, $entity, $langcode, $delta) {
- // Remove any white spaces.
- $residues = $items[0]['chado-feature__residues'];
- if ($residues) {
- $residues = preg_replace('/\s/', '', $residues);
- $items[0]['chado-feature__residues'] = $residues;
- }
- }
- /**
- * @see TripalField::load()
- */
- public function load($entity, $details = array()) {
- $field_name = $this->field['field_name'];
- $feature = $details['record'];
- $num_seqs = 0;
- // We don't want to get the sequence for traditionally large types. They are
- // too big, bog down the web browser, take longer to load and it's not
- // reasonable to print them on a page.
- if(strcmp($feature->type_id->name,'scaffold') == 0 or
- strcmp($feature->type_id->name,'chromosome') == 0 or
- strcmp($feature->type_id->name,'supercontig') == 0 or
- strcmp($feature->type_id->name,'pseudomolecule') == 0) {
- $entity->{$field_name}['und'][$num_seqs]['value'] = array(
- '@type' => 'SO:0000110',
- 'type' => 'sequence_feature',
- 'label' => 'Residues',
- 'defline' => ">This sequence is too large for this display.",
- 'residues' => '',
- );
- $entity->{$field_name}['und'][$num_seqs]['chado-feature__residues'] = '';
- }
- else {
- $feature = chado_expand_var($feature,'field','feature.residues');
- if ($feature->residues) {
- $entity->{$field_name}['und'][$num_seqs]['value'] = array(
- '@type' => 'SO:0000110',
- 'type' => 'sequence_feature',
- 'label' => 'Raw Sequence',
- 'defline' => tripal_get_fasta_defline($feature, '', NULL, '', strlen($feature->residues)),
- 'residues' => $feature->residues,
- );
- $entity->{$field_name}['und'][$num_seqs]['chado-feature__residues'] = $feature->residues;
- }
- else {
- $entity->{$field_name}['und'][$num_seqs]['value'] = array();
- $entity->{$field_name}['und'][$num_seqs]['chado-feature__residues'] = '';
- }
- }
- $num_seqs++;
- // Add in the protein sequences. It's faster to provide the SQL rather than
- // to use chado_generate_var based on the type.
- $sql = "
- SELECT F.*
- FROM {feature_relationship} FR
- INNER JOIN {feature} F on FR.subject_id = F.feature_id
- INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
- INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
- WHERE
- FR.object_id = :feature_id and
- CVT.name = 'polypeptide' and
- RCVT.name = 'derives_from'
- ORDER BY FR.rank ASC
- ";
- $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
- while ($protein = $results->fetchObject()) {
- if ($protein->residues) {
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
- '@type' => 'SO:0000104',
- 'type' => 'polypeptide',
- 'label' => 'Protein Sequence',
- 'defline' => tripal_get_fasta_defline($protein, '', NULL, '', strlen($protein->residues)),
- 'residues' => $protein->residues,
- );
- }
- }
- // Add in sequences from alignments.
- $options = array(
- 'return_array' => 1,
- 'include_fk' => array(
- 'srcfeature_id' => array(
- 'type_id' => 1
- ),
- 'feature_id' => array(
- 'type_id' => 1
- ),
- ),
- );
- $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
- $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
- // Add in the coding sequences. It's faster to provide the SQL rather than
- // to use chado_generate_var based on the type.
- $sql = "
- SELECT F.*
- FROM {feature_relationship} FR
- INNER JOIN {feature} F on FR.subject_id = F.feature_id
- INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
- INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
- INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
- WHERE
- FR.object_id = :feature_id and
- CVT.name = 'CDS' and
- RCVT.name = 'part_of'
- ORDER BY FR.rank ASC
- ";
- $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
- $coding_seq = '';
- while ($CDS = $results->fetchObject()) {
- if ($CDS->residues) {
- $coding_seq .= $CDS->residues;
- }
- }
- if ($coding_seq) {
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
- '@type' => 'SO:0000316',
- 'type' => 'coding_sequence',
- 'label' => 'Coding sequence (CDS)',
- 'defline' => tripal_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
- 'residues' => $coding_seq,
- );
- }
- foreach($featureloc_sequences as $src => $attrs){
- // the $attrs array has the following keys
- // * id: a unique identifier combining the feature id with the cvterm id
- // * type: the type of sequence (e.g. mRNA, etc)
- // * location: the alignment location
- // * defline: the definition line
- // * formatted_seq: the formatted sequences
- // * featureloc: the feature object aligned to
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
- 'residues' => $attrs['residues'],
- '@type' => 'SO:0000110',
- 'type' => 'sequence_feature',
- 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
- 'label' => 'Sequence from alignment at ' . $attrs['location'],
- );
- // check to see if this alignment has any CDS. If so, generate a CDS sequence
- $cds_sequence = tripal_get_feature_sequences(
- array(
- 'feature_id' => $feature->feature_id,
- 'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
- 'name' => $feature->name,
- 'featureloc_id' => $attrs['featureloc']->featureloc_id,
- ),
- array(
- 'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
- 'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
- 'sub_feature_types' => array('CDS'), // we're looking for CDS features
- 'is_html' => 0
- )
- );
- if (count($cds_sequence) > 0) {
- // the tripal_get_feature_sequences() function can return multiple sequences
- // if a feature is aligned to multiple places. In the case of CDSs we expect
- // that one mRNA is only aligned to a single location on the assembly so we
- // can access the CDS sequence with index 0.
- if ($cds_sequence[0]['residues']) {
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
- 'residues' => $cds_sequence[0]['residues'],
- '@type' => 'SO:0000316',
- 'type' => 'coding_sequence',
- 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
- 'label' => 'Coding sequence (CDS) from alignment at ' . $attrs['location'],
- );
- }
- }
- }
- }
- /**
- *
- * @param unknown $feature_id
- * @param unknown $featurelocs
- * @return multitype:|Ambigous <multitype:, an>
- */
- private function get_featureloc_sequences($feature_id, $featurelocs) {
- // if we don't have any featurelocs then no point in continuing
- if (!$featurelocs) {
- return array();
- }
- // get the list of relationships (including any aggregators) and iterate
- // through each one to find information needed to color-code the reference sequence
- $relationships = $this->get_aggregate_relationships($feature_id);
- if (!$relationships) {
- return array();
- }
- // iterate through each of the realtionships features and get their
- // locations
- foreach ($relationships as $rindex => $rel) {
- // get the featurelocs for each of the relationship features
- $rel_featurelocs = $this->get_featurelocs($rel->subject_id, 'as_child', 0);
- foreach ($rel_featurelocs as $rfindex => $rel_featureloc) {
- // keep track of this unique source feature
- $src = $rel_featureloc->src_feature_id . "-" . $rel_featureloc->src_cvterm_id;
- // copy over the results to the relationship object. Since there can
- // be more than one feature location for each relationship feature we
- // use the '$src' variable to keep track of these.
- $rel->featurelocs = new stdClass();
- $rel->featurelocs->$src = new stdClass();
- $rel->featurelocs->$src->src_uniquename = $rel_featureloc->src_uniquename;
- $rel->featurelocs->$src->src_cvterm_id = $rel_featureloc->src_cvterm_id;
- $rel->featurelocs->$src->src_cvname = $rel_featureloc->src_cvname;
- $rel->featurelocs->$src->fmin = $rel_featureloc->fmin;
- $rel->featurelocs->$src->fmax = $rel_featureloc->fmax;
- $rel->featurelocs->$src->src_name = $rel_featureloc->src_name;
- // keep track of the individual parts for each relationship
- $start = $rel->featurelocs->$src->fmin;
- $end = $rel->featurelocs->$src->fmax;
- $type = $rel->subject_type;
- $rel_locs[$src]['parts'][$start][$type]['start'] = $start;
- $rel_locs[$src]['parts'][$start][$type]['end'] = $end;
- $rel_locs[$src]['parts'][$start][$type]['type'] = $type;
- }
- }
- // the featurelocs array provided to the function contains the locations
- // where this feature is found. We want to get the sequence for each
- // location and then annotate it with the parts found from the relationships
- // locations determiend above.
- $floc_sequences = array();
- foreach ($featurelocs as $featureloc) {
- // build the src name so we can keep track of the different parts for each feature
- $src = $featureloc->srcfeature_id->feature_id . "-" . $featureloc->srcfeature_id->type_id->cvterm_id;
- // orient the parts to the beginning of the feature sequence
- if (!empty($rel_locs[$src]['parts'])) {
- $parts = $rel_locs[$src]['parts'];
- $rparts = array(); // we will fill this up if we're on the reverse strand
- foreach ($parts as $start => $types) {
- foreach ($types as $type_name => $type) {
- if ($featureloc->strand >= 0) {
- // this is on the forward strand. We need to convert the start on the src feature to the
- // start on this feature's sequence
- $parts[$start][$type_name]['start'] = $parts[$start][$type_name]['start'] - $featureloc->fmin;
- $parts[$start][$type_name]['end'] = $parts[$start][$type_name]['end'] - $featureloc->fmin;
- $parts[$start][$type_name]['type'] = $type_name;
- }
- else {
- // this is on the reverse strand. We need to swap the start and stop and calculate from the
- // begining of the reverse sequence
- $size = ($featureloc->fmax - $featureloc->fmin);
- $start_orig = $parts[$start][$type_name]['start'];
- $end_orig = $parts[$start][$type_name]['end'];
- $new_start = $size - ($end_orig - $featureloc->fmin);
- $new_end = $size - ($start_orig - $featureloc->fmin);
- $rparts[$new_start][$type_name]['start'] = $new_start;
- $rparts[$new_start][$type_name]['end'] = $new_end;
- $rparts[$new_start][$type_name]['type'] = $type_name;
- }
- }
- }
- // now sort the parts
- // if we're on the reverse strand we need to resort
- if ($featureloc->strand >= 0) {
- usort($parts, 'chado_feature__residues_sort_rel_parts_by_start');
- }
- else {
- usort($rparts, 'chado_feature__residues_sort_rel_parts_by_start');
- $parts = $rparts;
- }
- $floc_sequences[$src]['id'] = $src;
- $floc_sequences[$src]['type'] = $featureloc->feature_id->type_id->name;
- $args = array(':feature_id' => $featureloc->srcfeature_id->feature_id);
- $start = $featureloc->fmin + 1;
- $size = $featureloc->fmax - $featureloc->fmin;
- // TODO: fix the hard coded $start and $size
- // the $start and $size variables are hard-coded in the SQL statement
- // because the db_query function places quotes around all placeholders
- // (e.g. :start & :size) and screws up the substring function
- $sql = "
- SELECT substring(residues from $start for $size) as residues
- FROM {feature}
- WHERE feature_id = :feature_id
- ";
- $sequence = chado_query($sql, $args)->fetchObject();
- $residues = $sequence->residues;
- if ($featureloc->strand < 0) {
- $residues = tripal_reverse_compliment_sequence($residues);
- }
- $strand = '.';
- if ($featureloc->strand == 1) {
- $strand = '+';
- }
- elseif ($featureloc->strand == -1) {
- $strand = '-';
- }
- $floc_sequences[$src]['location'] = tripal_get_location_string($featureloc);
- $floc_sequences[$src]['defline'] = tripal_get_fasta_defline($featureloc->feature_id, '', $featureloc, '', strlen($residues));
- $floc_sequences[$src]['featureloc'] = $featureloc;
- $floc_sequences[$src]['residues'] = $residues;
- //$floc_sequences[$src]['formatted_seq'] = tripal_feature_color_sequence($residues, $parts, $floc_sequences[$src]['defline']);
- }
- }
- return $floc_sequences;
- }
- /**
- * Get features related to the current feature to a given depth. Recursive function.
- *
- * @param $feature_id
- * @param $substitute
- * @param $levels
- * @param $base_type_id
- * @param $depth
- *
- * @ingroup tripal_feature
- */
- private function get_aggregate_relationships($feature_id, $substitute=1,
- $levels=0, $base_type_id=NULL, $depth=0) {
- // we only want to recurse to as many levels deep as indicated by the
- // $levels variable, but only if this variable is > 0. If 0 then we
- // recurse until we reach the end of the relationships tree.
- if ($levels > 0 and $levels == $depth) {
- return NULL;
- }
- // first get the relationships for this feature
- return $this->get_relationships($feature_id, 'as_object');
- }
- /**
- * Get the relationships for a feature.
- *
- * @param $feature_id
- * The feature to get relationships for
- * @param $side
- * The side of the relationship this feature is (ie: 'as_subject' or 'as_object')
- *
- * @ingroup tripal_feature
- */
- private function get_relationships($feature_id, $side = 'as_subject') {
- // get the relationships for this feature. The query below is used for both
- // querying the object and subject relationships
- $sql = "
- SELECT
- FS.name as subject_name, FS.uniquename as subject_uniquename,
- CVTS.name as subject_type, CVTS.cvterm_id as subject_type_id,
- FR.subject_id, FR.type_id as relationship_type_id, FR.object_id, FR.rank,
- CVT.name as rel_type,
- FO.name as object_name, FO.uniquename as object_uniquename,
- CVTO.name as object_type, CVTO.cvterm_id as object_type_id
- FROM {feature_relationship} FR
- INNER JOIN {cvterm} CVT ON FR.type_id = CVT.cvterm_id
- INNER JOIN {feature} FS ON FS.feature_id = FR.subject_id
- INNER JOIN {feature} FO ON FO.feature_id = FR.object_id
- INNER JOIN {cvterm} CVTO ON FO.type_id = CVTO.cvterm_id
- INNER JOIN {cvterm} CVTS ON FS.type_id = CVTS.cvterm_id
- ";
- if (strcmp($side, 'as_object')==0) {
- $sql .= " WHERE FR.object_id = :feature_id";
- }
- if (strcmp($side, 'as_subject')==0) {
- $sql .= " WHERE FR.subject_id = :feature_id";
- }
- $sql .= " ORDER BY FR.rank";
- // get the relationships
- $results = chado_query($sql, array(':feature_id' => $feature_id));
- // iterate through the relationships, put these in an array and add
- // in the Drupal node id if one exists
- $i=0;
- $esql = "
- SELECT entity_id
- FROM {chado_entity}
- WHERE data_table = 'feature' AND record_id = :feature_id";
- $relationships = array();
- while ($rel = $results->fetchObject()) {
- $entity = db_query($esql, array(':feature_id' => $rel->subject_id))->fetchObject();
- if ($entity) {
- $rel->subject_entity_id = $entity->entity_id;
- }
- $entity = db_query($esql, array(':feature_id' => $rel->object_id))->fetchObject();
- if ($entity) {
- $rel->object_entity_id = $entity->entity_id;
- }
- $relationships[$i++] = $rel;
- }
- return $relationships;
- }
- /**
- * Load the locations for a given feature
- *
- * @param $feature_id
- * The feature to look up locations for
- * @param $side
- * Whether the feature is the scrfeature, 'as_parent', or feature, 'as_child'
- * @param $aggregate
- * Whether or not to get the locations for related features
- *
- * @ingroup tripal_feature
- */
- private function get_featurelocs($feature_id, $side = 'as_parent', $aggregate = 1) {
- $sql = "
- SELECT
- F.name, F.feature_id, F.uniquename,
- FS.name as src_name, FS.feature_id as src_feature_id, FS.uniquename as src_uniquename,
- CVT.name as cvname, CVT.cvterm_id,
- CVTS.name as src_cvname, CVTS.cvterm_id as src_cvterm_id,
- FL.fmin, FL.fmax, FL.is_fmin_partial, FL.is_fmax_partial,FL.strand, FL.phase
- FROM {featureloc} FL
- INNER JOIN {feature} F ON FL.feature_id = F.feature_id
- INNER JOIN {feature} FS ON FS.feature_id = FL.srcfeature_id
- INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
- INNER JOIN {cvterm} CVTS ON FS.type_id = CVTS.cvterm_id
- ";
- if (strcmp($side, 'as_parent')==0) {
- $sql .= "WHERE FL.srcfeature_id = :feature_id ";
- }
- if (strcmp($side, 'as_child')==0) {
- $sql .= "WHERE FL.feature_id = :feature_id ";
- }
- $flresults = chado_query($sql, array(':feature_id' => $feature_id));
- // copy the results into an array
- $i=0;
- $featurelocs = array();
- while ($loc = $flresults->fetchObject()) {
- // if a drupal node exists for this feature then add the nid to the
- // results object
- $loc->feid = tripal_get_chado_entity_id('feature', $loc->feature_id);
- $loc->seid = tripal_get_chado_entity_id('feature', $loc->src_feature_id);
- // add the result to the array
- $featurelocs[$i++] = $loc;
- }
- // Add the relationship feature locs if aggregate is turned on
- if ($aggregate and strcmp($side, 'as_parent')==0) {
- // get the relationships for this feature without substituting any children
- // for the parent. We want all relationships
- $relationships = tripal_feature_get_aggregate_relationships($feature_id, 0);
- foreach ($relationships as $rindex => $rel) {
- // get the featurelocs for each of the relationship features
- $rel_featurelocs = tripal_feature_load_featurelocs($rel->subject_id, 'as_child', 0);
- foreach ($rel_featurelocs as $findex => $rfloc) {
- $featurelocs[$i++] = $rfloc;
- }
- }
- }
- usort($featurelocs, 'chado_feature__residues_sort_locations');
- return $featurelocs;
- }
- }
- /**
- * Callback function for validating the chado_feature__residues_widget.
- */
- function chado_feature__residues_widget_validate($element, &$form_state) {
- }
- /**
- * Used to sort the list of relationship parts by start position
- *
- * @ingroup tripal_feature
- */
- function chado_feature__residues_sort_rel_parts_by_start($a, $b) {
- foreach ($a as $type_name => $details) {
- $astart = $a[$type_name]['start'];
- break;
- }
- foreach ($b as $type_name => $details) {
- $bstart = $b[$type_name]['start'];
- break;
- }
- return strnatcmp($astart, $bstart);
- }
- /**
- * Used to sort the feature locs by start position
- *
- * @param $a
- * One featureloc record (as an object)
- * @param $b
- * The other featureloc record (as an object)
- *
- * @return
- * Which feature location comes first
- *
- * @ingroup tripal_feature
- */
- function chado_feature__residues_sort_locations($a, $b) {
- return strnatcmp($a->fmin, $b->fmin);
- }
|