|  | @@ -903,7 +903,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |     *  An associative array containing the 9 elements othe GFF3 file. The
 | 
	
		
			
				|  |  |     *  9th element is an associative array of the attributes.
 | 
	
		
			
				|  |  |     */
 | 
	
		
			
				|  |  | -  private function parseFeature($line) {
 | 
	
		
			
				|  |  | +  private function parseGFF3Line($line) {
 | 
	
		
			
				|  |  |      $date = getdate();
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      // get the columns
 | 
	
	
		
			
				|  | @@ -937,12 +937,12 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      $ret['start'] = $fmin;
 | 
	
		
			
				|  |  |      $ret['stop'] = $fmax;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Landmark (seqid) validation checks based on GFF3 specifications 
 | 
	
		
			
				|  |  | +    // Landmark (seqid) validation checks based on GFF3 specifications
 | 
	
		
			
				|  |  |      preg_match('/[a-zA-Z0-9\.:\^\*\$@!\+_\?-\|]*/', $ret['landmark'], $matches);
 | 
	
		
			
				|  |  |      if ($matches[0] != $ret['landmark']) {
 | 
	
		
			
				|  |  | -      throw new Exception(t("Landmark/seqid !landmark contains invalid 
 | 
	
		
			
				|  |  | -        characters. Only characters included in this regular expression is 
 | 
	
		
			
				|  |  | -        allowed [a-zA-Z0-9.:^*$@!+_?-|]", 
 | 
	
		
			
				|  |  | +      throw new Exception(t("Landmark/seqid !landmark contains invalid
 | 
	
		
			
				|  |  | +        characters. Only characters included in this regular expression is
 | 
	
		
			
				|  |  | +        allowed [a-zA-Z0-9.:^*$@!+_?-|]",
 | 
	
		
			
				|  |  |          ['!landmark' => $ret['landmark']]));
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1007,7 +1007,7 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        // Break apart each attribute into key/value pairs.
 | 
	
		
			
				|  |  |        $tag = preg_split("/=/", $attr, 2);
 | 
	
		
			
				|  |  | -      
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |        // Multiple values of an attribute are separated by commas
 | 
	
		
			
				|  |  |        $tag_name = $tag[0];
 | 
	
		
			
				|  |  |        if (!array_key_exists($tag_name, $tags)) {
 | 
	
	
		
			
				|  | @@ -1103,11 +1103,43 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      // A feature may get ignored. But let's default this to FALSE.
 | 
	
		
			
				|  |  |      $ret['skipped'] = FALSE;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    // A line may have more than one feature (e.g. match, EST_match, etc).
 | 
	
		
			
				|  |  | +    // This flag, when TRUE, tells the parseGFF3 function to repeat this line.
 | 
	
		
			
				|  |  | +    $ret['repeat'] = FALSE;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      // If neither name nor uniquename are provided then generate one.
 | 
	
		
			
				|  |  |      $names = $this->getFeatureNames($tags, $ret['type'], $ret['landmark'], $ret['start'], $ret['stop']);
 | 
	
		
			
				|  |  |      $attr_uniquename = $names['uniquename'];
 | 
	
		
			
				|  |  |      $attr_name = $names['name'];
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    // If this is a match feature (match, EST_match, cDNA_match, etc), then
 | 
	
		
			
				|  |  | +    // we need to handle this line specially.
 | 
	
		
			
				|  |  | +    if (preg_match('/match$/i', $ret['type'])) {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // If the feature already exists that means we need to add a match_part
 | 
	
		
			
				|  |  | +      // feature.  If not, then we will add a flag to the results to tell
 | 
	
		
			
				|  |  | +      // the parseGFF3 function to repeat this line, as it has two features:
 | 
	
		
			
				|  |  | +      // the match and the match_part.  All other match feature with the same
 | 
	
		
			
				|  |  | +      // ID in the GFF3 will just be match_part features.
 | 
	
		
			
				|  |  | +      $parent_check = preg_replace('/_part_\d+/', '', $attr_uniquename);
 | 
	
		
			
				|  |  | +      if (array_key_exists($parent_check, $this->features)) {
 | 
	
		
			
				|  |  | +         // Set the match_part parent
 | 
	
		
			
				|  |  | +         // remove the "_part_X" suffix added by the getFeatureNames to find
 | 
	
		
			
				|  |  | +         // the parent.
 | 
	
		
			
				|  |  | +         $attr_parent = $parent_check;
 | 
	
		
			
				|  |  | +         $ret['type'] = 'match_part';
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      else {
 | 
	
		
			
				|  |  | +        // Unset all attributes as these belong on the match_part
 | 
	
		
			
				|  |  | +        $attr_dbxref = [];
 | 
	
		
			
				|  |  | +        $attr_aliases = [];
 | 
	
		
			
				|  |  | +        $attr_terms = [];
 | 
	
		
			
				|  |  | +        $attr_derives = [];
 | 
	
		
			
				|  |  | +        $attr_others = [];
 | 
	
		
			
				|  |  | +        $ret['repeat'] = TRUE;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      $ret['name'] = $attr_name;
 | 
	
		
			
				|  |  |      $ret['uniquename'] = $attr_uniquename;
 | 
	
		
			
				|  |  |      $ret['synonyms'] = $attr_aliases;
 | 
	
	
		
			
				|  | @@ -1156,7 +1188,9 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        $ret['attrs'][$key] = $value;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Add the organism  entry.
 | 
	
		
			
				|  |  | +    // Add the organism entry, but if we don't have one for this feature
 | 
	
		
			
				|  |  | +    // (in the case where the target_organism attribute doesn't match
 | 
	
		
			
				|  |  | +    // an organism in the databse) then skip this feature.
 | 
	
		
			
				|  |  |      $ret['organism'] = $attr_organism;
 | 
	
		
			
				|  |  |      if (!$ret['organism']) {
 | 
	
		
			
				|  |  |        $ret['skipped'] = TRUE;
 | 
	
	
		
			
				|  | @@ -1177,7 +1211,6 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |            ]));
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      // Add the properties and parent.
 | 
	
		
			
				|  |  |      $ret['properties'] = $attr_others;
 | 
	
		
			
				|  |  |      $ret['parent'] = $attr_parent;
 | 
	
	
		
			
				|  | @@ -1395,69 +1428,14 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        // Parse this feature from this line of the GFF3 file.
 | 
	
		
			
				|  |  | -      $gff_feature = $this->parseFeature($line);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Add the landmark if it doesn't exist in the landmark list.
 | 
	
		
			
				|  |  | -      if (!array_key_exists($gff_feature['landmark'], $this->landmarks)) {
 | 
	
		
			
				|  |  | -        $this->landmarks[$gff_feature['landmark']] = FALSE;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Organize DBs and DBXrefs for faster access later on.
 | 
	
		
			
				|  |  | -      foreach ($gff_feature['dbxrefs'] as $index => $info) {
 | 
	
		
			
				|  |  | -        if (!array_key_exists($info['db'], $this->db_lookup)) {
 | 
	
		
			
				|  |  | -          $this->db_lookup[$info['db']] = FALSE;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        if (!array_key_exists($index, $this->dbxref_lookup)) {
 | 
	
		
			
				|  |  | -          $this->dbxref_lookup[$index] = $info;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // We want to make sure the Ontology_term attribute dbxrefs are
 | 
	
		
			
				|  |  | -      // also easily looked up... but we do not want to create them
 | 
	
		
			
				|  |  | -      // if they do not exist the precense of the 'cvterm' key will
 | 
	
		
			
				|  |  | -      // tell the loadDbxrefs() function to not create the term.
 | 
	
		
			
				|  |  | -      foreach ($gff_feature['terms'] as $index => $info) {
 | 
	
		
			
				|  |  | -        if (!array_key_exists($info['db'], $this->db_lookup)) {
 | 
	
		
			
				|  |  | -          $this->db_lookup[$info['db']] = FALSE;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        if (!array_key_exists($index, $this->dbxref_lookup)) {
 | 
	
		
			
				|  |  | -          $this->dbxref_lookup[$index] = $info;
 | 
	
		
			
				|  |  | -          $this->dbxref_lookup[$index]['cvterm_id'] = NULL;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Organize the CVterms for faster access later on.
 | 
	
		
			
				|  |  | -      if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
 | 
	
		
			
				|  |  | -        $feature_cvterms[$gff_feature['type']] = 0;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -      $feature_cvterms[$gff_feature['type']]++;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Add any target feature types to the list as well.
 | 
	
		
			
				|  |  | -      if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | -        if (!array_key_exists($gff_feature['target']['type'], $feature_cvterms)) {
 | 
	
		
			
				|  |  | -          $feature_cvterms[$gff_feature['target']['type']] = 0;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        $feature_cvterms[$gff_feature['target']['type']]++;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Organize the feature property types for faster access later on.
 | 
	
		
			
				|  |  | -      foreach ($gff_feature['properties'] as $prop_name => $value) {
 | 
	
		
			
				|  |  | -        if (!array_key_exists($prop_name, $featureprop_cvterms)) {
 | 
	
		
			
				|  |  | -          $featureprop_cvterms[$prop_name] = NULL;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        $featureprop_cvterms[$prop_name]++;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // Cache the GFF feature details for later lookup.
 | 
	
		
			
				|  |  | -      if (strcmp($gff_feature['uniquename'], $gff_feature['landmark']) != 0) {
 | 
	
		
			
				|  |  | -        $this->cacheFeature($gff_feature);
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | +      $gff_feature = $this->parseGFF3Line($line);
 | 
	
		
			
				|  |  | +      $this->prepareFeature($gff_feature, $feature_cvterms, $featureprop_cvterms);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // If this feature has a target then we need to add the target as
 | 
	
		
			
				|  |  | -      // new feature for insertion.
 | 
	
		
			
				|  |  | -      if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | -        $this->addTargetFeature($gff_feature);
 | 
	
		
			
				|  |  | +      // If there is a second feature (in the case of a match) then
 | 
	
		
			
				|  |  | +      // repeat this line (to get the match_part).
 | 
	
		
			
				|  |  | +      if ($gff_feature['repeat'] === TRUE) {
 | 
	
		
			
				|  |  | +        $gff_feature = $this->parseGFF3Line($line);
 | 
	
		
			
				|  |  | +        $this->prepareFeature($gff_feature, $feature_cvterms, $featureprop_cvterms);
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1477,7 +1455,75 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |      foreach (array_keys($featureprop_cvterms) as $name) {
 | 
	
		
			
				|  |  |        $this->getTypeID($name, TRUE);
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  /**
 | 
	
		
			
				|  |  | +   *
 | 
	
		
			
				|  |  | +   */
 | 
	
		
			
				|  |  | +  private function prepareFeature($gff_feature, &$feature_cvterms, &$featureprop_cvterms) {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Add the landmark if it doesn't exist in the landmark list.
 | 
	
		
			
				|  |  | +    if (!array_key_exists($gff_feature['landmark'], $this->landmarks)) {
 | 
	
		
			
				|  |  | +      $this->landmarks[$gff_feature['landmark']] = FALSE;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    // Organize DBs and DBXrefs for faster access later on.
 | 
	
		
			
				|  |  | +    foreach ($gff_feature['dbxrefs'] as $index => $info) {
 | 
	
		
			
				|  |  | +      if (!array_key_exists($info['db'], $this->db_lookup)) {
 | 
	
		
			
				|  |  | +        $this->db_lookup[$info['db']] = FALSE;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      if (!array_key_exists($index, $this->dbxref_lookup)) {
 | 
	
		
			
				|  |  | +        $this->dbxref_lookup[$index] = $info;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // We want to make sure the Ontology_term attribute dbxrefs are
 | 
	
		
			
				|  |  | +    // also easily looked up... but we do not want to create them
 | 
	
		
			
				|  |  | +    // if they do not exist the precense of the 'cvterm' key will
 | 
	
		
			
				|  |  | +    // tell the loadDbxrefs() function to not create the term.
 | 
	
		
			
				|  |  | +    foreach ($gff_feature['terms'] as $index => $info) {
 | 
	
		
			
				|  |  | +      if (!array_key_exists($info['db'], $this->db_lookup)) {
 | 
	
		
			
				|  |  | +        $this->db_lookup[$info['db']] = FALSE;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      if (!array_key_exists($index, $this->dbxref_lookup)) {
 | 
	
		
			
				|  |  | +        $this->dbxref_lookup[$index] = $info;
 | 
	
		
			
				|  |  | +        $this->dbxref_lookup[$index]['cvterm_id'] = NULL;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Organize the CVterms for faster access later on.
 | 
	
		
			
				|  |  | +    if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
 | 
	
		
			
				|  |  | +      $feature_cvterms[$gff_feature['type']] = 0;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    $feature_cvterms[$gff_feature['type']]++;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Add any target feature types to the list as well.
 | 
	
		
			
				|  |  | +    if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | +      if (!array_key_exists($gff_feature['target']['type'], $feature_cvterms)) {
 | 
	
		
			
				|  |  | +        $feature_cvterms[$gff_feature['target']['type']] = 0;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      $feature_cvterms[$gff_feature['target']['type']]++;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Organize the feature property types for faster access later on.
 | 
	
		
			
				|  |  | +    foreach ($gff_feature['properties'] as $prop_name => $value) {
 | 
	
		
			
				|  |  | +      if (!array_key_exists($prop_name, $featureprop_cvterms)) {
 | 
	
		
			
				|  |  | +        $featureprop_cvterms[$prop_name] = NULL;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      $featureprop_cvterms[$prop_name]++;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // Cache the GFF feature details for later lookup.
 | 
	
		
			
				|  |  | +    if (strcmp($gff_feature['uniquename'], $gff_feature['landmark']) != 0) {
 | 
	
		
			
				|  |  | +      $this->cacheFeature($gff_feature);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // If this feature has a target then we need to add the target as
 | 
	
		
			
				|  |  | +    // new feature for insertion.
 | 
	
		
			
				|  |  | +    if (array_key_exists('name', $gff_feature['target'])) {
 | 
	
		
			
				|  |  | +      $this->addTargetFeature($gff_feature);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    /**
 | 
	
	
		
			
				|  | @@ -2796,12 +2842,20 @@ class GFF3Importer extends TripalImporter {
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          $uniquename = $uniquename . "_" . $i;
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | -      // A name can be duplicated if there is a target match alignment and
 | 
	
		
			
				|  |  | -      // the feature appears first in the GFF as a target before it appears
 | 
	
		
			
				|  |  | -      // on it's own independent line of the gff file.
 | 
	
		
			
				|  |  | -      elseif ($prev_feature['is_target'] == TRUE) {
 | 
	
		
			
				|  |  | -        // Do nothing, the previous feature is a target so we'll overwrite
 | 
	
		
			
				|  |  | -        // it with this record.
 | 
	
		
			
				|  |  | +      // If this is a match feature (e.g. match, EST_match, cDNA_match, etc).
 | 
	
		
			
				|  |  | +      // then we can accept a duplicated ID in the GFF3 file.  But we
 | 
	
		
			
				|  |  | +      // must rename it before going into Chado.  For this, we will allow
 | 
	
		
			
				|  |  | +      // the match feature to keep the original ID and we will create a new
 | 
	
		
			
				|  |  | +      // name for the match_part.
 | 
	
		
			
				|  |  | +      elseif (preg_match('/match$/', $type)) {
 | 
	
		
			
				|  |  | +        $i = 1;
 | 
	
		
			
				|  |  | +        $temp_uname = $uniquename;
 | 
	
		
			
				|  |  | +        do {
 | 
	
		
			
				|  |  | +          $temp_uname = $uniquename . "_part_" . $i;
 | 
	
		
			
				|  |  | +          $i++;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        while (array_key_exists($temp_uname, $this->features));
 | 
	
		
			
				|  |  | +        $uniquename = $temp_uname;
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        else {
 | 
	
		
			
				|  |  |          throw new Exception(t("A feature with the same ID exists multiple times: !uname", ['!uname' => $uniquename]));
 |