|
@@ -64,6 +64,18 @@ class data__sequence extends ChadoField {
|
|
|
'searchable' => FALSE,
|
|
|
'type' => 'xs:string',
|
|
|
'readonly' => FALSE,
|
|
|
+ 'elements' => [
|
|
|
+ 'data:2044' => [
|
|
|
+ 'name' => 'sequence',
|
|
|
+ 'searchable' => FALSE,
|
|
|
+ ],
|
|
|
+ // @todo add this cvterm to the db.
|
|
|
+ // https://www.ebi.ac.uk/ols/ontologies/ncit/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FNCIT_C47845
|
|
|
+ 'NCIT:C47845' => [
|
|
|
+ 'name' => 'FASTA Format',
|
|
|
+ 'searchable' => FALSE,
|
|
|
+ ],
|
|
|
+ ],
|
|
|
],
|
|
|
];
|
|
|
}
|
|
@@ -73,107 +85,28 @@ class data__sequence extends ChadoField {
|
|
|
*/
|
|
|
public function load($entity) {
|
|
|
$field_name = $this->field['field_name'];
|
|
|
- $feature = $entity->chado_record;
|
|
|
+ $feature = (array) $entity->chado_record;
|
|
|
|
|
|
- $feature = chado_expand_var($feature, 'field', 'feature.residues');
|
|
|
- $entity->{$field_name}['und'][0]['value'] = $feature->residues;
|
|
|
+ // Retrieve all the sequence for the given feature.
|
|
|
+ $sequences = chado_get_feature_sequences($feature, []);
|
|
|
+ dpm($sequences, 'seq');
|
|
|
+ // If there are no direct residues then try to derive from the parent.
|
|
|
+ if (empty($sequences) OR (sizeof($sequences) == 1 AND empty($sequences[0]['residues']))) {
|
|
|
+ $sequences = chado_get_feature_sequences($feature, ['derive_from_parent' => 1]);
|
|
|
+ }
|
|
|
|
|
|
- /* // Add in sequences from alignments.
|
|
|
- $options = array(
|
|
|
- 'return_array' => 1,
|
|
|
- 'include_fk' => array(
|
|
|
- 'srcfeature_id' => array(
|
|
|
- 'type_id' => 1
|
|
|
- ),
|
|
|
- 'feature_id' => array(
|
|
|
- 'type_id' => 1
|
|
|
- ),
|
|
|
- ),
|
|
|
- );
|
|
|
- $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
|
|
|
- $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
|
|
|
-
|
|
|
- // Add in the coding sequences. It's faster to provide the SQL rather than
|
|
|
- // to use chado_generate_var based on the type.
|
|
|
- $sql = "
|
|
|
- SELECT F.*
|
|
|
- FROM {feature_relationship} FR
|
|
|
- INNER JOIN {feature} F on FR.subject_id = F.feature_id
|
|
|
- INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
|
|
|
- INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
|
|
|
- INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
|
|
|
- WHERE
|
|
|
- FR.object_id = :feature_id and
|
|
|
- CVT.name = 'CDS' and
|
|
|
- RCVT.name = 'part_of'
|
|
|
- ORDER BY FR.rank ASC
|
|
|
- ";
|
|
|
- $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
|
|
|
- $coding_seq = '';
|
|
|
- while ($CDS = $results->fetchObject()) {
|
|
|
- if ($CDS->residues) {
|
|
|
- $coding_seq .= $CDS->residues;
|
|
|
- }
|
|
|
- }
|
|
|
- if ($coding_seq) {
|
|
|
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
|
|
|
- '@type' => 'SO:0000316',
|
|
|
- 'type' => 'coding_sequence',
|
|
|
- 'label' => 'Coding sequence (CDS)',
|
|
|
- 'defline' => chado_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
|
|
|
- 'residues' => $coding_seq,
|
|
|
- );
|
|
|
- }
|
|
|
-
|
|
|
- foreach($featureloc_sequences as $src => $attrs){
|
|
|
- // the $attrs array has the following keys
|
|
|
- // * id: a unique identifier combining the feature id with the cvterm id
|
|
|
- // * type: the type of sequence (e.g. mRNA, etc)
|
|
|
- // * location: the alignment location
|
|
|
- // * defline: the definition line
|
|
|
- // * formatted_seq: the formatted sequences
|
|
|
- // * featureloc: the feature object aligned to
|
|
|
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
|
|
|
- 'residues' => $attrs['residues'],
|
|
|
- '@type' => 'SO:0000110',
|
|
|
- 'type' => 'sequence_feature',
|
|
|
- 'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
|
|
|
- 'label' => 'Sequence from alignment at ' . $attrs['location'],
|
|
|
- );
|
|
|
-
|
|
|
-
|
|
|
- // check to see if this alignment has any CDS. If so, generate a CDS sequence
|
|
|
- $cds_sequence = chado_get_feature_sequences(
|
|
|
- array(
|
|
|
- 'feature_id' => $feature->feature_id,
|
|
|
- 'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
|
|
|
- 'name' => $feature->name,
|
|
|
- 'featureloc_id' => $attrs['featureloc']->featureloc_id,
|
|
|
- ),
|
|
|
- array(
|
|
|
- 'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
|
|
|
- 'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
|
|
|
- 'sub_feature_types' => array('CDS'), // we're looking for CDS features
|
|
|
- 'is_html' => 0
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- if (count($cds_sequence) > 0) {
|
|
|
- // the chado_get_feature_sequences() function can return multiple sequences
|
|
|
- // if a feature is aligned to multiple places. In the case of CDSs we expect
|
|
|
- // that one mRNA is only aligned to a single location on the assembly so we
|
|
|
- // can access the CDS sequence with index 0.
|
|
|
- if ($cds_sequence[0]['residues']) {
|
|
|
- $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
|
|
|
- 'residues' => $cds_sequence[0]['residues'],
|
|
|
- '@type' => 'SO:0000316',
|
|
|
- 'type' => 'coding_sequence',
|
|
|
- 'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
|
|
|
- 'label' => 'Coding sequence (CDS) from alignment at ' . $attrs['location'],
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
- } */
|
|
|
+ // Add each sequence to the field as a fasta record.
|
|
|
+ // NOTE: We keep the residues separate in case the formatter wants to
|
|
|
+ // determine it's own fasta record defline.
|
|
|
+ // @debug dpm($sequences, 'sequences');
|
|
|
+ foreach ($sequences as $k => $seq) {
|
|
|
+ if (!empty($seq['residues'])) {
|
|
|
+ $entity->{$field_name}['und'][$k]['value'] = [
|
|
|
+ 'NCIT:C47845' => '>' . $seq['defline'] . "\n" . $seq['residues'],
|
|
|
+ 'data:2044' => $seq['residues'],
|
|
|
+ ];
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|