|  | @@ -129,6 +129,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |     * must be a valid Sequence Ontology (SO) term. Default is NULL
 | 
	
		
			
				|  |  |     */
 | 
	
		
			
				|  |  |    private $target_type = NULL;
 | 
	
		
			
				|  |  | +  private $target_type_id = NULL;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    /**
 | 
	
		
			
				|  |  |     * A flag indicating if the target feature should be created. If FALSE
 | 
	
	
		
			
				|  | @@ -221,13 +222,13 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |     * Maps parents to their children and contains the ranks of the children.
 | 
	
		
			
				|  |  |     */
 | 
	
		
			
				|  |  |    private $parent_lookup = [];
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |    /**
 | 
	
		
			
				|  |  |     * An array that stores CVterms that have been looked up so we don't have
 | 
	
		
			
				|  |  |     * to do the database query every time.
 | 
	
		
			
				|  |  |     */
 | 
	
		
			
				|  |  |    private $feature_cvterm_lookup = [];
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |    /**
 | 
	
		
			
				|  |  |     * An array that stores CVterms that have been looked up so we don't have
 | 
	
		
			
				|  |  |     * to do the database query every time.
 | 
	
	
		
			
				|  | @@ -565,6 +566,20 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    // If a target type is provided then get the ID.
 | 
	
		
			
				|  |  | +    if ($this->target_type) {
 | 
	
		
			
				|  |  | +      $target_type = new ChadoRecord('cvterm');
 | 
	
		
			
				|  |  | +      $target_type->setValues([
 | 
	
		
			
				|  |  | +        'name' => $this->target_type,
 | 
	
		
			
				|  |  | +        'cv_id' => $this->feature_cv->getID()
 | 
	
		
			
				|  |  | +      ]);
 | 
	
		
			
				|  |  | +      $num_found = $target_type->find();
 | 
	
		
			
				|  |  | +      if ($num_found == 0) {
 | 
	
		
			
				|  |  | +        throw new Exception(t("Cannot find the specified target type, !type.", ['!type' => $this->target_type]));
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      $this->target_type_id = $target_type->getID();
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      // Create the cache file for storing parsed GFF entries.
 | 
	
		
			
				|  |  |      $this->openCacheFile();
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -632,15 +647,10 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $this->logMessage("Step 18: Insert 'derives_from' relationships...            ");
 | 
	
		
			
				|  |  |        $this->insertFeatureDerivesFrom();
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      $this->logMessage("Step 19: Insert Targets...                                 ");
 | 
	
		
			
				|  |  | -      // TODO: Target (target_organism & target_type)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      $this->logMessage("Step 20: Add any missing proteins...                     ");
 | 
	
		
			
				|  |  | -      // TODO: protein records.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // TODO: handle is_circular (it may just need to be a property).
 | 
	
		
			
				|  |  | +      $this->logMessage("Step 19: Insert Targets...                               ");
 | 
	
		
			
				|  |  | +      $this->insertFeatureTargets();
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      $this->logMessage("Step 21: Associate features with analysis....             ");
 | 
	
		
			
				|  |  | +      $this->logMessage("Step 20: Associate features with analysis....             ");
 | 
	
		
			
				|  |  |        $this->insertFeatureAnalysis();
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        if (!empty($this->residue_index)) {
 | 
	
	
		
			
				|  | @@ -892,9 +902,9 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        'line' => $this->current_line,
 | 
	
		
			
				|  |  |        'landmark' => $cols[0],
 | 
	
		
			
				|  |  |        'source' => $cols[1],
 | 
	
		
			
				|  |  | -      'type' => $cols[2],
 | 
	
		
			
				|  |  | +      'type' => strtolower($cols[2]),
 | 
	
		
			
				|  |  |        'start' => $cols[3],
 | 
	
		
			
				|  |  | -      'end' => $cols[4],
 | 
	
		
			
				|  |  | +      'stop' => $cols[4],
 | 
	
		
			
				|  |  |        'score' => $cols[5],
 | 
	
		
			
				|  |  |        'strand' => $cols[6],
 | 
	
		
			
				|  |  |        'phase' => $cols[7],
 | 
	
	
		
			
				|  | @@ -905,9 +915,9 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      // to be zero-based, so we substract 1 from the fmin. Also, in case
 | 
	
		
			
				|  |  |      // they are backwards, put them in the right order.
 | 
	
		
			
				|  |  |      $fmin = $ret['start'] - 1;
 | 
	
		
			
				|  |  | -    $fmax = $ret['end'];
 | 
	
		
			
				|  |  | -    if ($ret['end'] < $ret['start']) {
 | 
	
		
			
				|  |  | -      $fmin = $ret['end'] - 1;
 | 
	
		
			
				|  |  | +    $fmax = $ret['stop'];
 | 
	
		
			
				|  |  | +    if ($ret['stop'] < $ret['start']) {
 | 
	
		
			
				|  |  | +      $fmin = $ret['stop'] - 1;
 | 
	
		
			
				|  |  |        $fmax = $ret['start'];
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      $ret['start'] = $fmin;
 | 
	
	
		
			
				|  | @@ -925,7 +935,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $ret['strand'] = -1;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      if (strcmp($ret['phase'], '.') == 0) {
 | 
	
		
			
				|  |  | -      if (strtolower($ret['type']) == 'cds') {
 | 
	
		
			
				|  |  | +      if ($ret['type'] == 'cds') {
 | 
	
		
			
				|  |  |          $ret['phase'] = '0';
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        else {
 | 
	
	
		
			
				|  | @@ -937,13 +947,14 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      $attr_name = '';
 | 
	
		
			
				|  |  |      $attr_uniquename = '';
 | 
	
		
			
				|  |  |      $attrs = explode(";", $cols[8]);
 | 
	
		
			
				|  |  | -    $attr_organism = [];
 | 
	
		
			
				|  |  | +    $attr_organism = $this->organism_id;
 | 
	
		
			
				|  |  |      $attr_parent = '';
 | 
	
		
			
				|  |  |      $attr_others = [];
 | 
	
		
			
				|  |  |      $attr_aliases = [];
 | 
	
		
			
				|  |  |      $attr_dbxref = [];
 | 
	
		
			
				|  |  |      $attr_derives = [];
 | 
	
		
			
				|  |  |      $attr_terms = [];
 | 
	
		
			
				|  |  | +    $attr_target = [];
 | 
	
		
			
				|  |  |      foreach ($attrs as $attr) {
 | 
	
		
			
				|  |  |        $attr = rtrim($attr);
 | 
	
		
			
				|  |  |        $attr = ltrim($attr);
 | 
	
	
		
			
				|  | @@ -986,18 +997,61 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |          $attr_terms = array_merge($attr_terms, $tags[$tag_name]);
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        elseif (strcmp($tag_name, 'organism') == 0) {
 | 
	
		
			
				|  |  | -        $attr_organism = array_merge($attr_organism, $tags[$tag_name]);
 | 
	
		
			
				|  |  | +        if (count($tags[$tag_name]) > 1) {
 | 
	
		
			
				|  |  | +          throw new Exception(t('Each feature can only have one "organism" attribute. The feature %uniquename has more than one: %organism',
 | 
	
		
			
				|  |  | +            ['%uniquename' => $ret['uniquename'], '%organism' => $ret['organism']]));
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        $attr_organism = $this->findOrganism($tags[$tag_name][0], $this->current_line);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      elseif (strcmp($tag_name, 'Target') == 0) {
 | 
	
		
			
				|  |  | +        $matches = [];
 | 
	
		
			
				|  |  | +        if (count($tags[$tag_name]) > 1) {
 | 
	
		
			
				|  |  | +          throw new Exception(t('Each feature can only have one "Target" attribute. The feature %uniquename has more than one.',
 | 
	
		
			
				|  |  | +              ['%uniquename' => $ret['uniquename']]));
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        if (preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags[$tag_name][0]), $matches)) {
 | 
	
		
			
				|  |  | +          $attr_target['name'] = $matches[1];
 | 
	
		
			
				|  |  | +          $attr_target['start'] = $matches[2];
 | 
	
		
			
				|  |  | +          $attr_target['stop'] = $matches[3];
 | 
	
		
			
				|  |  | +          $tfmin = $attr_target['start'] - 1;
 | 
	
		
			
				|  |  | +          $tfmax = $attr_target['stop'];
 | 
	
		
			
				|  |  | +          if ($attr_target['stop'] < $attr_target['start']) {
 | 
	
		
			
				|  |  | +            $tfmin = $attr_target['stop'] - 1;
 | 
	
		
			
				|  |  | +            $tfmax = $attr_target['start'];
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          $attr_target['start'] = $tfmin;
 | 
	
		
			
				|  |  | +          $attr_target['stop'] = $tfmax;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +          $attr_target['phase'] = '';
 | 
	
		
			
				|  |  | +          $attr_target['strand'] = 0;
 | 
	
		
			
				|  |  | +          if (!empty($matches[4])) {
 | 
	
		
			
				|  |  | +            if (preg_match('/^\+$/', trim($matches[4]))) {
 | 
	
		
			
				|  |  | +              $attr_target['strand'] = 1;
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            elseif (preg_match('/^\-$/', trim($matches[4]))) {
 | 
	
		
			
				|  |  | +              $attr_target['strand'] = -1;
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          $attr_target['organism_id'] = $this->target_organism_id ? $this->target_organism_id : $this->organism_id;
 | 
	
		
			
				|  |  | +          $attr_target['type_id'] = $this->target_type_id ? $this->target_type_id : NULL;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      elseif (strcmp($tag_name, 'target_organism') == 0) {
 | 
	
		
			
				|  |  | +        $attr_target['organism_id'] = $this->findOrganism($tags[$tag_name][0], $this->current_line);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      elseif (strcmp($tag_name, 'target_type') == 0) {
 | 
	
		
			
				|  |  | +        $attr_target['type'] = $tags[$tag_name][0];
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        // Get the list of non-reserved attributes these will get added
 | 
	
		
			
				|  |  | -      // as properties to the featureprop table.  The 'Note' and 'Gap'
 | 
	
		
			
				|  |  | +      // as properties to the featureprop table.  The 'Note', 'Gap', 'Is_Circular',
 | 
	
		
			
				|  |  |        // attributes will go in as a property so those are not in the list
 | 
	
		
			
				|  |  |        // checked below.
 | 
	
		
			
				|  |  |        elseif (strcmp($tag_name, 'Name') !=0 and strcmp($tag_name, 'ID') !=0 and
 | 
	
		
			
				|  |  |                strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
 | 
	
		
			
				|  |  |                strcmp($tag_name, 'Target') != 0 and strcmp($tag_name, 'Derives_from') != 0 and
 | 
	
		
			
				|  |  |                strcmp($tag_name, 'Dbxref') != 0 and strcmp($tag_name, 'Ontology_term') != 0 and
 | 
	
		
			
				|  |  | -              strcmp($tag_name, 'Is_circular') != 0 and strcmp($tag_name, 'target_organism') != 0 and
 | 
	
		
			
				|  |  | -              strcmp($tag_name, 'target_type') != 0 and strcmp($tag_name, 'organism' != 0)) {
 | 
	
		
			
				|  |  | +              strcmp($tag_name, 'target_organism') != 0 and strcmp($tag_name, 'target_type') != 0 and
 | 
	
		
			
				|  |  | +              strcmp($tag_name, 'organism' != 0)) {
 | 
	
		
			
				|  |  |          foreach ($tags[$tag_name] as $value) {
 | 
	
		
			
				|  |  |            if (!array_key_exists($tag_name, $attr_others)) {
 | 
	
		
			
				|  |  |              $attr_others[$tag_name] = [];
 | 
	
	
		
			
				|  | @@ -1007,6 +1061,9 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    // A feature may get ignored. But let's default this to FALSE.
 | 
	
		
			
				|  |  | +    $ret['skipped'] = FALSE;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      // If neither name nor uniquename are provided then generate one.
 | 
	
		
			
				|  |  |      $names = $this->getFeatureName($tags, $ret['type'], $ret['landmark'], $fmin, $fmax);
 | 
	
		
			
				|  |  |      $attr_uniquename = $names['uniquename'];
 | 
	
	
		
			
				|  | @@ -1060,19 +1117,20 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $ret['attrs'][$key] = $value;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      // Add the organism  entry.
 | 
	
		
			
				|  |  | -    $ret['organism'] = '';
 | 
	
		
			
				|  |  | -    if (count($attr_organism) == 1) {
 | 
	
		
			
				|  |  | -      $ret['organism'] = $attr_organism[0];
 | 
	
		
			
				|  |  | +    $ret['organism'] = $attr_organism;
 | 
	
		
			
				|  |  | +    if (!$ret['organism']) {
 | 
	
		
			
				|  |  | +      $ret['skipped'] = TRUE;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | -    if (count($attr_organism) > 1) {
 | 
	
		
			
				|  |  | -      throw new Exception(t('Each feature can only have one "organism" attribute. The feature %uniquename has more than one: %organism',
 | 
	
		
			
				|  |  | -        [
 | 
	
		
			
				|  |  | -          '%uniquename' => $ret['uniquename'],
 | 
	
		
			
				|  |  | -          '%organism' => $ret['organism'],
 | 
	
		
			
				|  |  | -        ]));
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Add the target. If the type_id is missing then remove it and we'll
 | 
	
		
			
				|  |  | +    // skip it.
 | 
	
		
			
				|  |  | +    $ret['target'] = $attr_target;
 | 
	
		
			
				|  |  | +    if (!$ret['target']['type']) {
 | 
	
		
			
				|  |  | +      $ret['target'] = [];
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Add the properties and parent.
 | 
	
		
			
				|  |  |      $ret['properties'] = $attr_others;
 | 
	
		
			
				|  |  |      $ret['parent'] = $attr_parent;
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1297,19 +1355,6 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        // Parse this feature from this line of the GFF3 file.
 | 
	
		
			
				|  |  |        $gff_feature = $this->parseFeature($line);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // A feature may get ignored. But let's default this to FALSE.
 | 
	
		
			
				|  |  | -      $gff_feature['skipped'] = FALSE;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Lookup the organism ID if one is requested.
 | 
	
		
			
				|  |  | -      if ($gff_feature['organism']) {
 | 
	
		
			
				|  |  | -        $organism_id = $this->findOrganism($gff_feature['organism'], $line_num);
 | 
	
		
			
				|  |  | -        if ($organism_id) {
 | 
	
		
			
				|  |  | -          $gff_feature['organism'] = $organism_id;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        elsE {
 | 
	
		
			
				|  |  | -          $gff_feature['skipped'] = TRUE;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        // Add the landmark if it doesn't exist in the landmark list.
 | 
	
		
			
				|  |  |        if (!array_key_exists($gff_feature['landmark'], $this->landmarks)) {
 | 
	
	
		
			
				|  | @@ -1347,6 +1392,13 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        $feature_cvterms[$gff_feature['type']]++;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +      // Add any target feature types to the list as well.
 | 
	
		
			
				|  |  | +      if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | +        if (!array_key_exists($gff_feature['target']['type'], $feature_cvterms)) {
 | 
	
		
			
				|  |  | +          $feature_cvterms[$gff_feature['target']['type']] = 0;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        $feature_cvterms[$gff_feature['target']['type']]++;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        // Organize the feature property types for faster access later on.
 | 
	
		
			
				|  |  |        foreach ($gff_feature['properties'] as $prop_name => $value) {
 | 
	
	
		
			
				|  | @@ -1360,6 +1412,18 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        if ($gff_feature['uniquename'] != $gff_feature['landmark']) {
 | 
	
		
			
				|  |  |          $this->cacheFeature($gff_feature);
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // If this feature has a target then we need to add the target as
 | 
	
		
			
				|  |  | +      // new feature for insertion.
 | 
	
		
			
				|  |  | +      if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | +        $this->addTargetFeature($gff_feature);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Make sure we have the protein term in our list.
 | 
	
		
			
				|  |  | +    if (!array_key_exists('protein', $feature_cvterms) and
 | 
	
		
			
				|  |  | +        !array_key_exists('polypeptide', $feature_cvterms)) {
 | 
	
		
			
				|  |  | +      $feature_cvterms['polypeptide'] = 0;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      // Iterate through the feature type terms and get a chado object for each.
 | 
	
	
		
			
				|  | @@ -1372,6 +1436,158 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      foreach (array_keys($featureprop_cvterms) as $name) {
 | 
	
		
			
				|  |  |        $this->getTypeID($name, TRUE);
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Finally, add any protein features that need to be created.
 | 
	
		
			
				|  |  | +    $this->addProteinFeatures();
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  /**
 | 
	
		
			
				|  |  | +   * Checks the features and finds those that need proteins added.
 | 
	
		
			
				|  |  | +   */
 | 
	
		
			
				|  |  | +  private function addProteinFeatures() {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Don't do anything if the user wants to skip creation of non listed
 | 
	
		
			
				|  |  | +    // proteins. Proteins that have actual lines in the GFF will still be
 | 
	
		
			
				|  |  | +    // created.
 | 
	
		
			
				|  |  | +    if ($this->skip_protein) {
 | 
	
		
			
				|  |  | +      $this->logMessage('  Skipping creation of non-specified proteins...');
 | 
	
		
			
				|  |  | +      return;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    $proteins = [];
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // First, store records for which proteins need to exist. These
 | 
	
		
			
				|  |  | +    // will be for any parent that has a 'CDS' or 'protein' child.
 | 
	
		
			
				|  |  | +    foreach ($this->features as $info) {
 | 
	
		
			
				|  |  | +      $findex = $info['findex'];
 | 
	
		
			
				|  |  | +      $feature = $this->getCachedFeature($findex);
 | 
	
		
			
				|  |  | +      $type = $feature['type'];
 | 
	
		
			
				|  |  | +      if ($type == 'cds' or $type == 'protein' or $type == 'polypeptide') {
 | 
	
		
			
				|  |  | +        $parent_name = $feature['parent'];
 | 
	
		
			
				|  |  | +        if ($parent_name) {
 | 
	
		
			
				|  |  | +          if (!array_key_exists($parent_name, $proteins)) {
 | 
	
		
			
				|  |  | +            $proteins[$parent_name] = [];
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          if ($type == 'cds') {
 | 
	
		
			
				|  |  | +            $proteins[$parent_name]['cds'][] = $findex;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          if ($type == 'protein' or $type == 'polypeptide') {
 | 
	
		
			
				|  |  | +            $proteins[$parent_name]['protein'] = $findex;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Second, iterate through the protein list and for any parents that
 | 
	
		
			
				|  |  | +    // don't already have a protein we need to create one.
 | 
	
		
			
				|  |  | +    foreach ($proteins as $parent_name => $info) {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // Skip addition of any proteins that are already in the GFF file.
 | 
	
		
			
				|  |  | +      if (array_key_exists('protein', $info)) {
 | 
	
		
			
				|  |  | +        continue;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // If we don't have a protein
 | 
	
		
			
				|  |  | +      if (array_key_exists('cds', $info)) {
 | 
	
		
			
				|  |  | +        $start = INF;
 | 
	
		
			
				|  |  | +        $stop = -INF;
 | 
	
		
			
				|  |  | +        $start_phase = 0;
 | 
	
		
			
				|  |  | +        $stop_phase = 0;
 | 
	
		
			
				|  |  | +        // Find the starting and end CDS.
 | 
	
		
			
				|  |  | +        foreach ($info['cds'] as $findex) {
 | 
	
		
			
				|  |  | +          $cds = $this->getCachedFeature($findex);
 | 
	
		
			
				|  |  | +          if ($cds['start'] < $start) {
 | 
	
		
			
				|  |  | +            $start = $cds['start'];
 | 
	
		
			
				|  |  | +            $start_phase = $cds['phase'];
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          if ($cds['stop'] > $stop) {
 | 
	
		
			
				|  |  | +            $stop = $cds['stop'];
 | 
	
		
			
				|  |  | +            $stop_phase =  $cds['phase'];
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // Set the start of the protein to be the start of the coding
 | 
	
		
			
				|  |  | +        // sequence minus the phase.
 | 
	
		
			
				|  |  | +        if ($cds['strand'] == '-1') {
 | 
	
		
			
				|  |  | +          $stop -= $stop_phase;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        else {
 | 
	
		
			
				|  |  | +          $start += $start_phase;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // Get the name for the protein
 | 
	
		
			
				|  |  | +        $name = $parent_name;
 | 
	
		
			
				|  |  | +        if ($this->re_mrna and $this->re_protein) {
 | 
	
		
			
				|  |  | +          // We use a regex to generate protein name from parent name
 | 
	
		
			
				|  |  | +          $uname = preg_replace("/$this->re_mrna/", $this->re_protein, $parent_name);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        else {
 | 
	
		
			
				|  |  | +          // No regex, use the default '-protein' suffix
 | 
	
		
			
				|  |  | +          $uname = $parent_name . '-protein';
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // Now create the protein feature.
 | 
	
		
			
				|  |  | +        $feature = [
 | 
	
		
			
				|  |  | +          'line' => $cds['line'],
 | 
	
		
			
				|  |  | +          'landmark' => $cds['landmark'],
 | 
	
		
			
				|  |  | +          'source' => $cds['source'],
 | 
	
		
			
				|  |  | +          'type' => 'polypeptide',
 | 
	
		
			
				|  |  | +          'start' => $start,
 | 
	
		
			
				|  |  | +          'stop' => $stop,
 | 
	
		
			
				|  |  | +          'strand' => $cds['strand'],
 | 
	
		
			
				|  |  | +          'phase' => '',
 | 
	
		
			
				|  |  | +          'attr' => [],
 | 
	
		
			
				|  |  | +          'skipped' => FALSE,
 | 
	
		
			
				|  |  | +          'name' => $name,
 | 
	
		
			
				|  |  | +          'uniquename' => $uname,
 | 
	
		
			
				|  |  | +          'synonyms' => [],
 | 
	
		
			
				|  |  | +          'dbxrefs' => [],
 | 
	
		
			
				|  |  | +          'terms' => [],
 | 
	
		
			
				|  |  | +          'derives_from' => NULL,
 | 
	
		
			
				|  |  | +          'organism' => $cds['organism_id'],
 | 
	
		
			
				|  |  | +          'target' => [],
 | 
	
		
			
				|  |  | +          'properties' => [],
 | 
	
		
			
				|  |  | +          'parent' => $cds['parent'],
 | 
	
		
			
				|  |  | +        ];
 | 
	
		
			
				|  |  | +        $this->cacheFeature($feature);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  /**
 | 
	
		
			
				|  |  | +   * Adds a new target feature to the feature list.
 | 
	
		
			
				|  |  | +   *
 | 
	
		
			
				|  |  | +   * @param $gff_feature
 | 
	
		
			
				|  |  | +   *   The feature array created by the parseFeature function.
 | 
	
		
			
				|  |  | +   */
 | 
	
		
			
				|  |  | +  private function addTargetFeature($gff_feature) {
 | 
	
		
			
				|  |  | +    if (!array_key_exists($gff_feature['target']['name'], $this->features)) {
 | 
	
		
			
				|  |  | +      $feature = [
 | 
	
		
			
				|  |  | +        'is_target' => TRUE,
 | 
	
		
			
				|  |  | +        'line' => $this->current_line,
 | 
	
		
			
				|  |  | +        'landmark' => $gff_feature['landmark'],
 | 
	
		
			
				|  |  | +        'source' => $gff_feature['source'],
 | 
	
		
			
				|  |  | +        'type' => $gff_feature['target']['type'],
 | 
	
		
			
				|  |  | +        'start' => $gff_feature['target']['start'],
 | 
	
		
			
				|  |  | +        'stop' => $gff_feature['target']['stop'],
 | 
	
		
			
				|  |  | +        'strand' => $gff_feature['target']['strand'],
 | 
	
		
			
				|  |  | +        'phase' => $gff_feature['target']['phase'],
 | 
	
		
			
				|  |  | +        'attr' => [],
 | 
	
		
			
				|  |  | +        'skipped' => FALSE,
 | 
	
		
			
				|  |  | +        'name' => $gff_feature['target']['name'],
 | 
	
		
			
				|  |  | +        'uniquename' => $gff_feature['target']['name'],
 | 
	
		
			
				|  |  | +        'synonyms' => [],
 | 
	
		
			
				|  |  | +        'dbxrefs' => [],
 | 
	
		
			
				|  |  | +        'terms' => [],
 | 
	
		
			
				|  |  | +        'derives_from' => NULL,
 | 
	
		
			
				|  |  | +        'organism' => $gff_feature['target']['organism_id'],
 | 
	
		
			
				|  |  | +        'target' => [],
 | 
	
		
			
				|  |  | +        'properties' => [],
 | 
	
		
			
				|  |  | +        'parent' => '',
 | 
	
		
			
				|  |  | +      ];
 | 
	
		
			
				|  |  | +      $this->cacheFeature($feature);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    /**
 | 
	
	
		
			
				|  | @@ -1471,7 +1687,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        // Only do an insert if this feature doesn't already exist in the databse.
 | 
	
		
			
				|  |  |        if (!$feature_id and !$feature['skipped']) {
 | 
	
		
			
				|  |  |          $residues = $this->getResidues($feature, FALSE);
 | 
	
		
			
				|  |  | -        $type_id = $this->feature_cvterm_lookup[strtolower($feature['type'])];
 | 
	
		
			
				|  |  | +        $type_id = $this->feature_cvterm_lookup[$feature['type']];
 | 
	
		
			
				|  |  |          $sql .= "(:uniquename_$i, :name_$i, :type_id_$i, :organism_id_$i, :residues_$i, " .
 | 
	
		
			
				|  |  |                 " :md5checksum_$i, :seqlen_$i, FALSE, FALSE),\n";
 | 
	
		
			
				|  |  |          $args[":uniquename_$i"] = $uniquename;
 | 
	
	
		
			
				|  | @@ -1534,7 +1750,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |            while ($f = $results->fetchObject()) {
 | 
	
		
			
				|  |  |              $matched_findex = $this->features[$f->uniquename]['findex'];
 | 
	
		
			
				|  |  |              $matched_feature = $this->getCachedFeature($matched_findex);
 | 
	
		
			
				|  |  | -            $matched_type_id = $this->feature_cvterm_lookup[strtolower($matched_feature['type'])];
 | 
	
		
			
				|  |  | +            $matched_type_id = $this->feature_cvterm_lookup[$matched_feature['type']];
 | 
	
		
			
				|  |  |              $matched_organism_id = $matched_feature['organism'] ? $matched_feature['organism'] : $this->organism->getID();
 | 
	
		
			
				|  |  |              if ($matched_type_id == $f->type_id and $matched_organism_id == $f->organism_id) {
 | 
	
		
			
				|  |  |                $this->features[$f->uniquename]['feature_id'] = $f->feature_id;
 | 
	
	
		
			
				|  | @@ -1677,8 +1893,9 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      $this->setItemsHandled(0);
 | 
	
		
			
				|  |  |      $this->setTotalItems($num_batches);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Get the 'part_of' cvterm
 | 
	
		
			
				|  |  | -    $type_id = $this->getTypeID('part_of', FALSE);
 | 
	
		
			
				|  |  | +    // Get the 'part_of' and 'derives_from cvterm.
 | 
	
		
			
				|  |  | +    $part_of = $this->getTypeID('part_of', FALSE);
 | 
	
		
			
				|  |  | +    $derives_from = $this->getTypeID('derives_from', FALSE);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      $init_sql = "INSERT INTO {feature_relationship} (subject_id, object_id, type_id, rank) VALUES\n";
 | 
	
		
			
				|  |  |      $i = 0;
 | 
	
	
		
			
				|  | @@ -1702,6 +1919,10 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |            $child_feature = $this->getCachedFeature($child_findex);
 | 
	
		
			
				|  |  |            $child_uniquename = $child_feature['uniquename'];
 | 
	
		
			
				|  |  |            $child_feature_id = $this->features[$child_uniquename]['feature_id'];
 | 
	
		
			
				|  |  | +          $type_id = $part_of;
 | 
	
		
			
				|  |  | +          if ($child_feature['type'] == 'polypeptide' or $child_feature['type'] == 'protein') {
 | 
	
		
			
				|  |  | +            $type_id = $derives_from;
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  |            $sql .= "(:subject_id_$j, :object_id_$j, :type_id_$j, :rank_$j),\n";
 | 
	
		
			
				|  |  |            $args[":subject_id_$j"] = $child_feature_id;
 | 
	
		
			
				|  |  |            $args[":object_id_$j"] = $parent_feature_id;
 | 
	
	
		
			
				|  | @@ -2017,6 +2238,76 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +  /**
 | 
	
		
			
				|  |  | +   *
 | 
	
		
			
				|  |  | +   */
 | 
	
		
			
				|  |  | +  private function insertFeatureTargets() {
 | 
	
		
			
				|  |  | +    $batch_size = 1000;
 | 
	
		
			
				|  |  | +    $num_features = count(array_keys($this->features));
 | 
	
		
			
				|  |  | +    $num_batches = (int) ($num_features / $batch_size) + 1;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    $this->setItemsHandled(0);
 | 
	
		
			
				|  |  | +    $this->setTotalItems($num_batches);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    $init_sql = "
 | 
	
		
			
				|  |  | +      INSERT INTO {featureloc}
 | 
	
		
			
				|  |  | +        (srcfeature_id, feature_id, fmin, fmax, strand, phase, rank)
 | 
	
		
			
				|  |  | +      VALUES\n";
 | 
	
		
			
				|  |  | +    $i = 0;
 | 
	
		
			
				|  |  | +    $total = 0;
 | 
	
		
			
				|  |  | +    $batch_num = 1;
 | 
	
		
			
				|  |  | +    $sql = '';
 | 
	
		
			
				|  |  | +    $args = [];
 | 
	
		
			
				|  |  | +    foreach ($this->features as $info) {
 | 
	
		
			
				|  |  | +      $findex = $info['findex'];
 | 
	
		
			
				|  |  | +      $feature_id = $info['feature_id'];
 | 
	
		
			
				|  |  | +      $feature = $this->getCachedFeature($findex);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      $total++;
 | 
	
		
			
				|  |  | +      $i++;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // If the feature is not skipped and has a target then insert the
 | 
	
		
			
				|  |  | +      // target alignment.
 | 
	
		
			
				|  |  | +      if (!$feature['skipped'] and array_key_exists('name', $feature['target'])) {
 | 
	
		
			
				|  |  | +        $tname = $feature['target']['name'];
 | 
	
		
			
				|  |  | +        $tfindex = $this->features[$tname]['findex'];
 | 
	
		
			
				|  |  | +        $tfeature_id = $this->features[$tname]['feature_id'];
 | 
	
		
			
				|  |  | +        $target = $this->getCachedFeature($tfindex);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // According to the Chado instructions for rank, the feature aligned
 | 
	
		
			
				|  |  | +        // to the landmark will have a rank of 0.  The feature aligned to the
 | 
	
		
			
				|  |  | +        // target match will have a rank of 1.
 | 
	
		
			
				|  |  | +        $rank = 1;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        $sql .= "(:srcfeature_id_$i, :feature_id_$i, :fmin_$i, :fmax_$i," .
 | 
	
		
			
				|  |  | +          " :strand_$i, :phase_$i, :rank_$i),\n";
 | 
	
		
			
				|  |  | +        $args[":srcfeature_id_$i"] = $tfeature_id;
 | 
	
		
			
				|  |  | +        $args[":feature_id_$i"] = $feature_id;
 | 
	
		
			
				|  |  | +        $args[":fmin_$i"] = $target['start'];
 | 
	
		
			
				|  |  | +        $args[":fmax_$i"] = $target['stop'];
 | 
	
		
			
				|  |  | +        $args[":strand_$i"] = $target['strand'];
 | 
	
		
			
				|  |  | +        $args[":phase_$i"] = $target['phase'] ? $target['phase'] : NULL;
 | 
	
		
			
				|  |  | +        $args[":rank_$i"] = $rank;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // If we've reached the size of the batch then let's do the insert.
 | 
	
		
			
				|  |  | +      if ($i == $batch_size or $total == $num_features) {
 | 
	
		
			
				|  |  | +        if (count($args) > 0) {
 | 
	
		
			
				|  |  | +          $sql = rtrim($sql, ",\n");
 | 
	
		
			
				|  |  | +          $sql = $init_sql . $sql;
 | 
	
		
			
				|  |  | +          chado_query($sql, $args);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        $this->setItemsHandled($batch_num);
 | 
	
		
			
				|  |  | +        $batch_num++;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // Now reset all of the varables for the next batch.
 | 
	
		
			
				|  |  | +        $sql = '';
 | 
	
		
			
				|  |  | +        $i = 0;
 | 
	
		
			
				|  |  | +        $args = [];
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |    /**
 | 
	
		
			
				|  |  |     *
 | 
	
		
			
				|  |  |     */
 | 
	
	
		
			
				|  | @@ -2100,10 +2391,10 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $feature = $this->getCachedFeature($findex);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        $total++;
 | 
	
		
			
				|  |  | +      $i++;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // If the feature is not skipped
 | 
	
		
			
				|  |  | -      if (!$feature['skipped']) {
 | 
	
		
			
				|  |  | -        $i++;
 | 
	
		
			
				|  |  | +      // If the feature is not skipped and is not a match "target".
 | 
	
		
			
				|  |  | +      if (!$feature['skipped'] and $feature['is_target'] == FALSE) {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          // Get the rank of this feature by iterating through all siblings of the
 | 
	
		
			
				|  |  |          // parent and finding where this feature is in terms of start position.
 | 
	
	
		
			
				|  | @@ -2423,10 +2714,11 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $name = $attrs['Name'][0];
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Does this uniquename already exist? This can happen for subfeatures
 | 
	
		
			
				|  |  | -    // (e.g. CDS features) that have multiple components but are really
 | 
	
		
			
				|  |  | -    // all the same thing.
 | 
	
		
			
				|  |  | +    // Does this uniquename already exist?
 | 
	
		
			
				|  |  |      if (array_key_exists($uniquename, $this->features)) {
 | 
	
		
			
				|  |  | +      $prev_feature = $this->getCachedFeature($this->features[$uniquename]['findex']);
 | 
	
		
			
				|  |  | +      // A name can be duplicated for subfeatures (e.g. CDS features)
 | 
	
		
			
				|  |  | +      // that have the same parent but are really all the same thing.
 | 
	
		
			
				|  |  |        if (array_key_exists('Parent', $attrs)) {
 | 
	
		
			
				|  |  |          // Iterate through the list of similar IDs and see how many we have
 | 
	
		
			
				|  |  |          // then add a numeric suffix.
 | 
	
	
		
			
				|  | @@ -2436,6 +2728,13 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          $uniquename = $uniquename . "_" . $i;
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | +      // A name can be duplicated if there is a target match alignment and
 | 
	
		
			
				|  |  | +      // the feature appears first in the GFF as a target before it appears
 | 
	
		
			
				|  |  | +      // on it's own independent line of the gff file.
 | 
	
		
			
				|  |  | +      elseif ($prev_feature['is_target'] == TRUE) {
 | 
	
		
			
				|  |  | +        // Do nothing, the previous feature is a target so we'll overwrite
 | 
	
		
			
				|  |  | +        // it with this record.
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  |        else {
 | 
	
		
			
				|  |  |          throw new Exception(t("A feature with the same ID exists multiple times: !uname", ['!uname' => $uniquename]));
 | 
	
		
			
				|  |  |        }
 | 
	
	
		
			
				|  | @@ -2605,149 +2904,4 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  /**
 | 
	
		
			
				|  |  | -   * Load the target attribute of a gff3 record
 | 
	
		
			
				|  |  | -   *
 | 
	
		
			
				|  |  | -   * @param $feature
 | 
	
		
			
				|  |  | -   * @param $tags
 | 
	
		
			
				|  |  | -   * @param $target_organism_id
 | 
	
		
			
				|  |  | -   * @param $target_type
 | 
	
		
			
				|  |  | -   * @param $create_target
 | 
	
		
			
				|  |  | -   * @param $attr_locgroup
 | 
	
		
			
				|  |  | -   *
 | 
	
		
			
				|  |  | -   * @ingroup gff3_loader
 | 
	
		
			
				|  |  | -   */
 | 
	
		
			
				|  |  | -  private function loadTarget($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup) {
 | 
	
		
			
				|  |  | -    // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-"
 | 
	
		
			
				|  |  | -    $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    // the organism and type of the target may also be specified as an attribute. If so, then get that
 | 
	
		
			
				|  |  | -    // information
 | 
	
		
			
				|  |  | -    $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : '';
 | 
	
		
			
				|  |  | -    $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : '';
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    // if we have matches and the Target is in the correct format then load the alignment
 | 
	
		
			
				|  |  | -    if ($matched) {
 | 
	
		
			
				|  |  | -      $target_feature = $matches[1];
 | 
	
		
			
				|  |  | -      $start = $matches[2];
 | 
	
		
			
				|  |  | -      $end = $matches[3];
 | 
	
		
			
				|  |  | -      // if we have an optional strand, convert it to a numeric value.
 | 
	
		
			
				|  |  | -      if (!empty($matches[4])) {
 | 
	
		
			
				|  |  | -        if (preg_match('/^\+$/', trim($matches[4]))) {
 | 
	
		
			
				|  |  | -          $target_strand = 1;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        elseif (preg_match('/^\-$/', trim($matches[4]))) {
 | 
	
		
			
				|  |  | -          $target_strand = -1;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        else {
 | 
	
		
			
				|  |  | -          $target_strand = 0;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -      else {
 | 
	
		
			
				|  |  | -        $target_strand = 0;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      $target_fmin = $start - 1;
 | 
	
		
			
				|  |  | -      $target_fmax = $end;
 | 
	
		
			
				|  |  | -      if ($end < $start) {
 | 
	
		
			
				|  |  | -        $target_fmin = $end - 1;
 | 
	
		
			
				|  |  | -        $target_fmax = $start;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // default the target organism to be the value passed into the function, but if the GFF
 | 
	
		
			
				|  |  | -      // file species the target organism then use that instead.
 | 
	
		
			
				|  |  | -      $t_organism_id = $target_organism_id;
 | 
	
		
			
				|  |  | -      if ($gff_target_organism) {
 | 
	
		
			
				|  |  | -        // get the genus and species
 | 
	
		
			
				|  |  | -        $success = preg_match('/^(.*?):(.*?)$/', $gff_target_organism, $matches);
 | 
	
		
			
				|  |  | -        if ($success) {
 | 
	
		
			
				|  |  | -          $values = [
 | 
	
		
			
				|  |  | -            'genus' => $matches[1],
 | 
	
		
			
				|  |  | -            'species' => $matches[2],
 | 
	
		
			
				|  |  | -          ];
 | 
	
		
			
				|  |  | -          $torganism = chado_select_record('organism', ['organism_id'], $values);
 | 
	
		
			
				|  |  | -          if (count($torganism) == 1) {
 | 
	
		
			
				|  |  | -            $t_organism_id = $torganism[0]->organism_id;
 | 
	
		
			
				|  |  | -          }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -          else {
 | 
	
		
			
				|  |  | -            $this->logMessage("Cannot find organism for target %target.",
 | 
	
		
			
				|  |  | -              ['%target' => $gff_target_organism], TRIPAL_WARNING);
 | 
	
		
			
				|  |  | -            $t_organism_id = '';
 | 
	
		
			
				|  |  | -          }
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        else {
 | 
	
		
			
				|  |  | -          $this->logMessage("The target_organism attribute is improperly formatted: %target. " .
 | 
	
		
			
				|  |  | -            "It should be target_organism=genus:species.",
 | 
	
		
			
				|  |  | -            ['%target' => $gff_target_organism], TRIPAL_WARNING);
 | 
	
		
			
				|  |  | -          $t_organism_id = '';
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // default the target type to be the value passed into the function, but if the GFF file
 | 
	
		
			
				|  |  | -      // species the target type then use that instead
 | 
	
		
			
				|  |  | -      $t_type_id = '';
 | 
	
		
			
				|  |  | -      if ($target_type) {
 | 
	
		
			
				|  |  | -        $values = [
 | 
	
		
			
				|  |  | -          'name' => $target_type,
 | 
	
		
			
				|  |  | -          'cv_id' => [
 | 
	
		
			
				|  |  | -            'name' => 'sequence',
 | 
	
		
			
				|  |  | -          ],
 | 
	
		
			
				|  |  | -        ];
 | 
	
		
			
				|  |  | -        $type = chado_select_record('cvterm', ['cvterm_id'], $values);
 | 
	
		
			
				|  |  | -        if (count($type) == 1) {
 | 
	
		
			
				|  |  | -          $t_type_id = $type[0]->cvterm_id;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        else {
 | 
	
		
			
				|  |  | -          throw new Exception(t("The target type does not exist in the sequence ontology: %type. ",
 | 
	
		
			
				|  |  | -            ['%type' => $target_type]));
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -      if ($gff_target_type) {
 | 
	
		
			
				|  |  | -        $values = [
 | 
	
		
			
				|  |  | -          'name' => $gff_target_type,
 | 
	
		
			
				|  |  | -          'cv_id' => [
 | 
	
		
			
				|  |  | -            'name' => 'sequence',
 | 
	
		
			
				|  |  | -          ],
 | 
	
		
			
				|  |  | -        ];
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        // get the cvterm_id for the target type
 | 
	
		
			
				|  |  | -        $type = chado_select_record('cvterm', ['cvterm_id'], $values);
 | 
	
		
			
				|  |  | -        if (count($type) == 1) {
 | 
	
		
			
				|  |  | -          $t_type_id = $type[0]->cvterm_id;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        else {
 | 
	
		
			
				|  |  | -          // check to see if this is a synonym
 | 
	
		
			
				|  |  | -          $sql = "
 | 
	
		
			
				|  |  | -            SELECT CVTS.cvterm_id
 | 
	
		
			
				|  |  | -            FROM {cvtermsynonym} CVTS
 | 
	
		
			
				|  |  | -              INNER JOIN {cvterm} CVT ON CVT.cvterm_id = CVTS.cvterm_id
 | 
	
		
			
				|  |  | -              INNER JOIN {cv} CV      ON CV.cv_id = CVT.cv_id
 | 
	
		
			
				|  |  | -            WHERE CV.name = 'sequence' and CVTS.synonym = :synonym
 | 
	
		
			
				|  |  | -            ";
 | 
	
		
			
				|  |  | -          $synonym = chado_query($sql, [':synonym' => $gff_target_type])->fetchObject();
 | 
	
		
			
				|  |  | -          if ($synonym) {
 | 
	
		
			
				|  |  | -            $t_type_id = $synonym->cvterm_id;
 | 
	
		
			
				|  |  | -          }
 | 
	
		
			
				|  |  | -          else {
 | 
	
		
			
				|  |  | -            $this->logMessage("The target_type attribute does not exist in the sequence ontology: %type.",
 | 
	
		
			
				|  |  | -              ['%type' => $gff_target_type], TRIPAL_WARNING);
 | 
	
		
			
				|  |  | -            $t_type_id = '';
 | 
	
		
			
				|  |  | -          }
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
 | 
	
		
			
				|  |  | -      // and the landmark as the feature.
 | 
	
		
			
				|  |  | -      $this->loadFeatureLoc($feature, NULL, $target_feature, $target_fmin,
 | 
	
		
			
				|  |  | -        $target_fmax, $target_strand, NULL, NULL, NULL, NULL,
 | 
	
		
			
				|  |  | -        $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -    // the target attribute is not correctly formatted
 | 
	
		
			
				|  |  | -    else {
 | 
	
		
			
				|  |  | -      $this->logMessage("Could not add 'Target' alignment as it is improperly formatted:  '%target'",
 | 
	
		
			
				|  |  | -        ['%target' => $tags['Target'][0]], TRIPAL_ERROR);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  |  }
 |