9 лет назад · 2a38ada0be
--- a/tripal/api/tripal.importer.api.inc
+++ b/tripal/api/tripal.importer.api.inc
@@ -110,14 +110,23 @@ function tripal_run_importer($import_id, $job_id = NULL) {
 
				       $loader_name = $class::$name;
			
 
				       $loader = new $class();
			
 
				       try {
			
 
				+
			
 
				+        // begin the transaction
			
 
				+        $transaction = db_transaction();
			
 
				+        print "\nNOTE: Loading of this file is performed using a database transaction. \n" .
			
 
				+            "If the load fails or is terminated prematurely then the entire set of \n" .
			
 
				+            "insertions/updates is rolled back and will not be found in the database\n\n";
			
 
				         $loader->preRun($details);
			
 
				         $loader->run($details, $job_id);
			
 
				         $loader->postRun($details);
			
 
				 
			
 
				         // Check for new fields and notify the user.
			
 
				         tripal_tripal_cron_notification();
			
 
				+
			
 
				+        print "\nDone\n";
			
 
				       }
			
 
				       catch (Exception $e) {
			
 
				+        $transaction->rollback();
			
 
				         throw new Exception('Cannot run loader, ' . $loader_name . ': ' .  $e->getMessage());
			
 
				       }
			
 
				     }
			
--- a/tripal/api/tripal.notice.api.inc
+++ b/tripal/api/tripal.notice.api.inc
@@ -26,6 +26,8 @@ define('TRIPAL_NOTICE',5);
 
				 define('TRIPAL_INFO',6);
			
 
				 define('TRIPAL_DEBUG',7);
			
 
				 
			
 
				+
			
 
				+
			
 
				 /**
			
 
				  * Provide better error notice for Tripal. If the environment variable
			
 
				  * 'TRIPAL_DEBUG' is set to 1 then this function will add backtrace
			
@@ -83,7 +85,7 @@ function tripal_report_error($type, $severity, $message, $variables = array(), $
 
				       break;
			
 
				   }
			
 
				 
			
 
				-  // get the backtrace and include in the error message, but only if the
			
 
				+  // Get the backtrace and include in the error message, but only if the
			
 
				   // TRIPAL_DEBUG environment variable is set.
			
 
				   if (getenv('TRIPAL_DEBUG') == 1) {
			
 
				     $backtrace = debug_backtrace();
			
@@ -123,8 +125,10 @@ function tripal_report_error($type, $severity, $message, $variables = array(), $
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * Display messages to tripal administrators. This can be used instead of
			
 
				- * drupal_set_message when you want to target tripal administrators.
			
 
				+ * Display messages to tripal administrators.
			
 
				+ *
			
 
				+ * This can be used instead of drupal_set_message when you want to target
			
 
				+ * tripal administrators.
			
 
				  *
			
 
				  * @param $message
			
 
				  *   The message to be displayed to the tripal administrators
			
--- a/tripal/includes/TripalImporter.inc
+++ b/tripal/includes/TripalImporter.inc
@@ -84,5 +84,4 @@ class TripalImporter {
 
				   public function postRun($details) {
			
 
				 
			
 
				   }
			
 
				-
			
 
				 }
			
--- a/tripal/includes/TripalJob.inc
+++ b/tripal/includes/TripalJob.inc
@@ -0,0 +1,70 @@
 
				+<?php
			
 
				+
			
 
				+class TripalJob {
			
 
				+
			
 
				+  /**
			
 
				+   * A database connection distinct from the transaction used for the job.
			
 
				+   * This allows for updating the job progress without that progress being
			
 
				+   * wiped out if the job fails.
			
 
				+   */
			
 
				+  private $logger;
			
 
				+
			
 
				+  /**
			
 
				+   * Instantiates a new TripalJob object.
			
 
				+   *
			
 
				+   * @param $job_id
			
 
				+   */
			
 
				+  public function __construct() {
			
 
				+    $logger =
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  public function run() {
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Provides a message to the user by logging a system message.
			
 
				+   *
			
 
				+   * This function takes similar arguments as Drupal's watchdog function.
			
 
				+   *
			
 
				+   * @param $message
			
 
				+   *   The message to store in the log. Keep $message translatable by not
			
 
				+   *   concatenating dynamic values into it! Variables in the message should
			
 
				+   *   be added by using placeholder strings alongside the variables argument
			
 
				+   *   to declare the value of the placeholders. See t() for documentation on
			
 
				+   *   how $message and $variables interact.
			
 
				+   * @param $variables
			
 
				+   *   Array of variables to replace in the message on display or NULL if
			
 
				+   *   message is already translated or not possible to translate.
			
 
				+   * @param $severity
			
 
				+   *   The severity of the message; one of the following values:
			
 
				+   *     - TRIPAL_CRITICAL: Critical conditions.
			
 
				+   *     - TRIPAL_ERROR: Error conditions.
			
 
				+   *     - TRIPAL_WARNING: Warning conditions.
			
 
				+   *     - TRIPAL_NOTICE: Normal but significant conditions.
			
 
				+   *     - TRIPAL_INFO: (default) Informational messages.
			
 
				+   *     - TRIPAL_DEBUG: Debug-level messages.
			
 
				+   */
			
 
				+  public function logError($message, $variables = array(), $severity = TRIPAL_INFO) {
			
 
				+
			
 
				+  }
			
 
				+  /**
			
 
				+   * @param $message
			
 
				+   *   (optional) The translated message to be displayed to the user. For
			
 
				+   *   consistency with other messages, it should begin with a capital letter
			
 
				+   *   and end with a period.
			
 
				+   * @param $type
			
 
				+   *   (optional) The message's type. Defaults to 'status'. These values are
			
 
				+   *   supported:
			
 
				+   *     - 'status'
			
 
				+   *     - 'warning'
			
 
				+   *     - 'error'
			
 
				+   * @param $repeat
			
 
				+   *   (optional) If this is FALSE and the message is already set, then the
			
 
				+   *   message won't be repeated. Defaults to TRUE.
			
 
				+   */
			
 
				+  public function logOutput($message, $type = 'status', $repeat = FALSE) {
			
 
				+
			
 
				+  }
			
 
				+}
			
--- a/tripal_chado/includes/TripalImporter/FASTAImporter.inc
+++ b/tripal_chado/includes/TripalImporter/FASTAImporter.inc
@@ -0,0 +1,968 @@
 
				+<?php
			
 
				+class FASTAImporter extends TripalImporter {
			
 
				+  /**
			
 
				+   * The name of this loader.  This name will be presented to the site
			
 
				+   * user.
			
 
				+   */
			
 
				+  public static $name = 'Chado FASTA Loader';
			
 
				+
			
 
				+  /**
			
 
				+   * The machine name for this loader. This name will be used to construct
			
 
				+   * the URL for the loader.
			
 
				+   */
			
 
				+  public static $machine_name = 'fasta_loader';
			
 
				+
			
 
				+  /**
			
 
				+   * A brief description for this loader.  This description will be
			
 
				+   * presented to the site user.
			
 
				+   */
			
 
				+  public static $description = 'Load sequences from a multi-FASTA file into Chado';
			
 
				+
			
 
				+  /**
			
 
				+   * An array containing the extensions of allowed file types.
			
 
				+   */
			
 
				+  public static $file_types = array('fasta');
			
 
				+
			
 
				+
			
 
				+  /**
			
 
				+   * Provides information to the user about the file upload.  Typically this
			
 
				+   * may include a description of the file types allowed.
			
 
				+   */
			
 
				+  public static $upload_description = 'Please provide the FASTA file. The file must have a .fasta extension.';
			
 
				+
			
 
				+  /**
			
 
				+   * The title that should appear above the upload button.
			
 
				+   */
			
 
				+  public static $upload_title = 'FASTA Upload';
			
 
				+
			
 
				+
			
 
				+  public function form($form, &$form_state) {
			
 
				+
			
 
				+    // get the list of organisms
			
 
				+    $sql = "SELECT * FROM {organism} ORDER BY genus, species";
			
 
				+    $org_rset = chado_query($sql);
			
 
				+    $organisms = array();
			
 
				+    $organisms[''] = '';
			
 
				+    while ($organism = $org_rset->fetchObject()) {
			
 
				+      $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
			
 
				+    }
			
 
				+    $form['organism_id'] = array('#title' => t('Organism'),'#type' => t('select'),
			
 
				+      '#description' => t("Choose the organism to which these sequences are associated"),
			
 
				+      '#required' => TRUE,'#options' => $organisms
			
 
				+    );
			
 
				+
			
 
				+    // get the sequence ontology CV ID
			
 
				+    $values = array('name' => 'sequence');
			
 
				+    $cv = chado_select_record('cv', array('cv_id'), $values);
			
 
				+    $cv_id = $cv[0]->cv_id;
			
 
				+
			
 
				+    $form['seqtype'] = array('#type' => 'textfield','#title' => t('Sequence Type'),
			
 
				+      '#required' => TRUE,
			
 
				+      '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, polypeptide, etc...)'),
			
 
				+      '#autocomplete_path' => "admin/tripal/storage/chado/auto_name/cvterm/$cv_id"
			
 
				+    );
			
 
				+
			
 
				+    $form['method'] = array('#type' => 'radios','#title' => 'Method','#required' => TRUE,
			
 
				+      '#options' => array(t('Insert only'),t('Update only'),t('Insert and update')
			
 
				+      ),
			
 
				+      '#description' => t('Select how features in the FASTA file are handled.
			
 
				+       Select "Insert only" to insert the new features. If a feature already
			
 
				+       exists with the same name or unique name and type then it is skipped.
			
 
				+       Select "Update only" to only update featues that already exist in the
			
 
				+       database.  Select "Insert and Update" to insert features that do
			
 
				+       not exist and upate those that do.'),'#default_value' => 2
			
 
				+    );
			
 
				+    $form['match_type'] = array('#type' => 'radios','#title' => 'Name Match Type','#required' => TRUE,
			
 
				+      '#options' => array(t('Name'),t('Unique name')
			
 
				+      ),
			
 
				+      '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert".
			
 
				+      Feature data is stored in Chado with both a human-readable
			
 
				+      name and a unique name. If the features in your FASTA file are uniquely identified using
			
 
				+      a human-readable name then select the "Name" button. If your features are
			
 
				+      uniquely identified using the unique name then select the "Unique name" button.  If you
			
 
				+      loaded your features first using the GFF loader then the unique name of each
			
 
				+      features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
			
 
				+      By default, the FASTA loader will use the first word (character string
			
 
				+      before the first space) as  the name for your feature. If
			
 
				+      this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
			
 
				+      Additionally, you may import both a name and a unique name for each sequence using the advanced options.'),
			
 
				+      '#default_value' => 1
			
 
				+    );
			
 
				+
			
 
				+    $form['analysis'] = array('#type' => 'fieldset','#title' => t('Analysis Used to Derive Features'),
			
 
				+      '#collapsed' => TRUE
			
 
				+    );
			
 
				+    $form['analysis']['desc'] = array(
			
 
				+      '#markup' => t("Why specify an analysis for a data load?  All data comes
			
 
				+       from some place, even if downloaded from Genbank. By specifying
			
 
				+       analysis details for all data uploads, it allows an end user to reproduce the
			
 
				+       data set, but at least indicates the source of the data.")
			
 
				+    );
			
 
				+
			
 
				+    // get the list of organisms
			
 
				+    $sql = "SELECT * FROM {analysis} ORDER BY name";
			
 
				+    $org_rset = chado_query($sql);
			
 
				+    $analyses = array();
			
 
				+    $analyses[''] = '';
			
 
				+    while ($analysis = $org_rset->fetchObject()) {
			
 
				+      $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
			
 
				+    }
			
 
				+    $form['analysis']['analysis_id'] = array('#title' => t('Analysis'),'#type' => t('select'),
			
 
				+      '#description' => t("Choose the analysis to which these features are associated"),
			
 
				+      '#required' => TRUE,'#options' => $analyses
			
 
				+    );
			
 
				+
			
 
				+    // Advanced Options
			
 
				+    $form['advanced'] = array('#type' => 'fieldset','#title' => t('Advanced Options'),
			
 
				+      '#collapsible' => TRUE,'#collapsed' => TRUE
			
 
				+    );
			
 
				+    $form['advanced']['re_help'] = array('#type' => 'item',
			
 
				+      '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
			
 
				+                   Your FASTA file may contain both a human-readable name and a unique name for each sequence.
			
 
				+                   If you want to import
			
 
				+                   both the name and unique name for all sequences, then you must provide regular expressions
			
 
				+                   so that the loader knows how to separate them.
			
 
				+                   Otherwise the name and uniquename will be the same.
			
 
				+                   By default, this loader will use the first word in the definition
			
 
				+                   lines of the FASTA file
			
 
				+                   as the name or unique name of the feature.')
			
 
				+    );
			
 
				+    $form['advanced']['re_name'] = array('#type' => 'textfield',
			
 
				+      '#title' => t('Regular expression for the name'),'#required' => FALSE,
			
 
				+      '#description' => t('Enter the regular expression that will extract the
			
 
				+       feature name from the FASTA definition line. For example, for a
			
 
				+       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
			
 
				+       the regular expression for the name would be, "^(.*?)\|.*$".  All FASTA
			
 
				+       definition lines begin with the ">" symbol.  You do not need to incldue
			
 
				+       this symbol in your regular expression.')
			
 
				+    );
			
 
				+    $form['advanced']['re_uname'] = array('#type' => 'textfield',
			
 
				+      '#title' => t('Regular expression for the unique name'),'#required' => FALSE,
			
 
				+      '#description' => t('Enter the regular expression that will extract the
			
 
				+       feature name from the FASTA definition line. For example, for a
			
 
				+       defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
			
 
				+       the regular expression for the unique name would be "^.*?\|(.*)$").  All FASTA
			
 
				+       definition lines begin with the ">" symbol.  You do not need to incldue
			
 
				+       this symbol in your regular expression.')
			
 
				+    );
			
 
				+
			
 
				+    // Advanced database cross reference options.
			
 
				+    $form['advanced']['db'] = array('#type' => 'fieldset',
			
 
				+      '#title' => t('External Database Reference'),'#weight' => 6,'#collapsed' => TRUE
			
 
				+    );
			
 
				+    $form['advanced']['db']['re_accession'] = array('#type' => 'textfield',
			
 
				+      '#title' => t('Regular expression for the accession'),'#required' => FALSE,
			
 
				+      '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
			
 
				+      '#weight' => 2
			
 
				+    );
			
 
				+
			
 
				+    // get the list of databases
			
 
				+    $sql = "SELECT * FROM {db} ORDER BY name";
			
 
				+    $db_rset = chado_query($sql);
			
 
				+    $dbs = array();
			
 
				+    $dbs[''] = '';
			
 
				+    while ($db = $db_rset->fetchObject()) {
			
 
				+      $dbs[$db->db_id] = "$db->name";
			
 
				+    }
			
 
				+    $form['advanced']['db']['db_id'] = array('#title' => t('External Database'),
			
 
				+      '#type' => t('select'),
			
 
				+      '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
			
 
				+      '#required' => FALSE,'#options' => $dbs,'#weight' => 1
			
 
				+    );
			
 
				+
			
 
				+    $form['advanced']['relationship'] = array('#type' => 'fieldset','#title' => t('Relationships'),
			
 
				+      '#weight' => 6,'#collapsed' => TRUE
			
 
				+    );
			
 
				+    $rels = array();
			
 
				+    $rels[''] = '';
			
 
				+    $rels['part_of'] = 'part of';
			
 
				+    $rels['derives_from'] = 'produced by (derives from)';
			
 
				+
			
 
				+    // Advanced references options
			
 
				+    $form['advanced']['relationship']['rel_type'] = array('#title' => t('Relationship Type'),
			
 
				+      '#type' => t('select'),
			
 
				+      '#description' => t("Use this option to create associations, or relationships between the
			
 
				+                        features of this FASTA file and existing features in the database. For
			
 
				+                        example, to associate a FASTA file of peptides to existing genes or transcript sequence,
			
 
				+                        select the type 'produced by'. For a CDS sequences select the type 'part of'"),
			
 
				+      '#required' => FALSE,'#options' => $rels,'#weight' => 5
			
 
				+    );
			
 
				+    $form['advanced']['relationship']['re_subject'] = array('#type' => 'textfield',
			
 
				+      '#title' => t('Regular expression for the parent'),'#required' => FALSE,
			
 
				+      '#description' => t('Enter the regular expression that will extract the unique
			
 
				+                         name needed to identify the existing sequence for which the
			
 
				+                         relationship type selected above will apply.'),'#weight' => 6
			
 
				+    );
			
 
				+    $form['advanced']['relationship']['parent_type'] = array('#type' => 'textfield',
			
 
				+      '#title' => t('Parent Type'),'#required' => FALSE,
			
 
				+      '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
			
 
				+                         if the FASTA file being loaded is a set of proteins that are
			
 
				+                         products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
			
 
				+                         this type must match the type for already loaded features.'),
			
 
				+      '#weight' => 7
			
 
				+    );
			
 
				+    return $form;
			
 
				+  }
			
 
				+  /**
			
 
				+   * @see TripalImporter::formValidate()
			
 
				+   */
			
 
				+  public function formValidate($form, &$form_state) {
			
 
				+    $fasta_file = trim($form_state['values']['fasta_file']);
			
 
				+    $organism_id = $form_state['values']['organism_id'];
			
 
				+    $type = trim($form_state['values']['seqtype']);
			
 
				+    $method = trim($form_state['values']['method']);
			
 
				+    $match_type = trim($form_state['values']['match_type']);
			
 
				+    $re_name = trim($form_state['values']['re_name']);
			
 
				+    $re_uname = trim($form_state['values']['re_uname']);
			
 
				+    $re_accession = trim($form_state['values']['re_accession']);
			
 
				+    $db_id = $form_state['values']['db_id'];
			
 
				+    $rel_type = $form_state['values']['rel_type'];
			
 
				+    $re_subject = trim($form_state['values']['re_subject']);
			
 
				+    $parent_type = trim($form_state['values']['parent_type']);
			
 
				+
			
 
				+    if ($method == 0) {
			
 
				+      $method = 'Insert only';
			
 
				+    }
			
 
				+    if ($method == 1) {
			
 
				+      $method = 'Update only';
			
 
				+    }
			
 
				+    if ($method == 2) {
			
 
				+      $method = 'Insert and update';
			
 
				+    }
			
 
				+
			
 
				+    if ($match_type == 0) {
			
 
				+      $match_type = 'Name';
			
 
				+    }
			
 
				+
			
 
				+    if ($match_type == 1) {
			
 
				+      $match_type = 'Unique name';
			
 
				+    }
			
 
				+
			
 
				+    if ($re_name and !$re_uname and strcmp($match_type, 'Unique name') == 0) {
			
 
				+      form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
			
 
				+    }
			
 
				+
			
 
				+    if (!$re_name and $re_uname and strcmp($match_type, 'Name') == 0) {
			
 
				+      form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
			
 
				+    }
			
 
				+
			
 
				+    // check to see if the file is located local to Drupal
			
 
				+    $fasta_file = trim($fasta_file);
			
 
				+    $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file;
			
 
				+    if (!file_exists($dfile)) {
			
 
				+      // if not local to Drupal, the file must be someplace else, just use
			
 
				+      // the full path provided
			
 
				+      $dfile = $fasta_file;
			
 
				+    }
			
 
				+    if (!file_exists($dfile)) {
			
 
				+      form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
			
 
				+    }
			
 
				+
			
 
				+    // make sure if a relationship is specified that all fields are provided.
			
 
				+    if (($rel_type or $parent_type) and !$re_subject) {
			
 
				+      form_set_error('re_subject', t("Please provide a regular expression for the parent"));
			
 
				+    }
			
 
				+    if (($rel_type or $re_subject) and !$parent_type) {
			
 
				+      form_set_error('parent_type', t("Please provide a SO term for the parent"));
			
 
				+    }
			
 
				+    if (($parent_type or $re_subject) and !$rel_type) {
			
 
				+      form_set_error('rel_type', t("Please select a relationship type"));
			
 
				+    }
			
 
				+
			
 
				+    // make sure if a database is specified that all fields are provided
			
 
				+    if ($db_id and !$re_accession) {
			
 
				+      form_set_error('re_accession', t("Please provide a regular expression for the accession"));
			
 
				+    }
			
 
				+    if ($re_accession and !$db_id) {
			
 
				+      form_set_error('db_id', t("Please select a database"));
			
 
				+    }
			
 
				+
			
 
				+    // check to make sure the types exists
			
 
				+    $cvtermsql ="
			
 
				+      SELECT CVT.cvterm_id
			
 
				+      FROM {cvterm} CVT
			
 
				+        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				+        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				+      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
			
 
				+    ";
			
 
				+    $cvterm = chado_query($cvtermsql,
			
 
				+      array(':cvname' => 'sequence',':name' => $type,':synonym' => $type))->fetchObject();
			
 
				+    if (!$cvterm) {
			
 
				+      form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
			
 
				+    }
			
 
				+    if ($rel_type) {
			
 
				+      $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $parent_type,
			
 
				+        ':synonym' => $parent_type
			
 
				+      ))->fetchObject();
			
 
				+      if (!$cvterm) {
			
 
				+        form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // check to make sure the 'relationship' and 'sequence' ontologies are loaded
			
 
				+    $form_state['storage']['dfile'] = $dfile;
			
 
				+  }
			
 
				+  /**
			
 
				+   * @see TripalImporter::run()
			
 
				+   */
			
 
				+  public function run($details, $job_id = NULL) {
			
 
				+    $arguments = $details->arguments;
			
 
				+
			
 
				+    $dfile = $arguments['file_path'];
			
 
				+    $organism_id = $arguments['organism_id'];
			
 
				+    $type = $arguments['seqtype'];
			
 
				+    $method = $arguments['method'];
			
 
				+    $match_type = $arguments['match_type'];
			
 
				+    $re_name = $arguments['re_name'];
			
 
				+    $re_uname = $arguments['re_uname'];
			
 
				+    $re_accession = $arguments['re_accession'];
			
 
				+    $db_id = $arguments['db_id'];
			
 
				+    $rel_type = $arguments['rel_type'];
			
 
				+    $re_subject = $arguments['re_subject'];
			
 
				+    $parent_type = $arguments['parent_type'];
			
 
				+    $analysis_id = $arguments['analysis_id'];
			
 
				+
			
 
				+    $this->load($dfile, $organism_id, $type, $re_name, $re_uname, $re_accession,
			
 
				+      $db_id, $rel_type, $re_subject, $parent_type, $method, $uid, $analysis_id,
			
 
				+      $match_type, $job_id);
			
 
				+  }
			
 
				+  /**
			
 
				+   * Load a fasta file.
			
 
				+   *
			
 
				+   * @param $dfile
			
 
				+   *   The full path to the fasta file to load.
			
 
				+   * @param $organism_id
			
 
				+   *   The organism_id of the organism these features are from.
			
 
				+   * @param $type
			
 
				+   *   The type of features contained in the fasta file.
			
 
				+   * @param $re_name
			
 
				+   *   The regular expression to extract the feature.name from the fasta header.
			
 
				+   * @param $re_uname
			
 
				+   *   The regular expression to extract the feature.uniquename from the fasta
			
 
				+   *   header.
			
 
				+   * @param $re_accession
			
 
				+   *   The regular expression to extract the accession of the feature.dbxref_id.
			
 
				+   * @param $db_id
			
 
				+   *   The database ID of the above accession.
			
 
				+   * @param $rel_type
			
 
				+   *   The type of relationship when creating a feature_relationship between
			
 
				+   *   this feature (object) and an extracted subject.
			
 
				+   * @param $re_subject
			
 
				+   *   The regular expression to extract the uniquename of the feature to be
			
 
				+   *   the subject of the above specified relationship.
			
 
				+   * @param $parent_type
			
 
				+   *   The type of the parent feature.
			
 
				+   * @param $method
			
 
				+   *   The method of feature adding. (ie: 'Insert only', 'Update only',
			
 
				+   *   'Insert and update').
			
 
				+   * @param $uid
			
 
				+   *   The user id of the user who submitted the job.
			
 
				+   * @param $analysis_id
			
 
				+   *   The analysis_id to associate the features in this fasta file with.
			
 
				+   * @param $match_type
			
 
				+   *  Whether to match existing features based on the 'Name' or 'Unique name'.
			
 
				+   * @param $job
			
 
				+   *  The tripal job
			
 
				+   */
			
 
				+  private function loadFasta($dfile, $organism_id, $type, $re_name, $re_uname, $re_accession,
			
 
				+      $db_id, $rel_type, $re_subject, $parent_type, $method, $uid, $analysis_id, $match_type,
			
 
				+      $job = NULL) {
			
 
				+
			
 
				+    // First get the type for this sequence.
			
 
				+    $cvtermsql = "
			
 
				+    SELECT CVT.cvterm_id
			
 
				+    FROM {cvterm} CVT
			
 
				+      INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
			
 
				+      LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
			
 
				+    WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
			
 
				+  ";
			
 
				+    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $type,':synonym' => $type))->fetchObject();
			
 
				+    if (!$cvterm) {
			
 
				+      tripal_report_error("T_fasta_loader", TRIPAL_ERROR,
			
 
				+          "Cannot find the term type: '%type'", array('%type' => $type));
			
 
				+      return 0;
			
 
				+    }
			
 
				+
			
 
				+    // Second, if there is a parent type then get that.
			
 
				+    if ($parent_type) {
			
 
				+      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type,':synonym' => $parent_type))->fetchObject();
			
 
				+      if (!$parentcvterm) {
			
 
				+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array(
			
 
				+          '%type' => $parentcvterm
			
 
				+        ));
			
 
				+        return 0;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // Third, if there is a relationship type then get that.
			
 
				+    if ($rel_type) {
			
 
				+      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $rel_type,':synonym' => $rel_type))->fetchObject();
			
 
				+      if (!$relcvterm) {
			
 
				+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array(
			
 
				+          '%type' => $relcvterm
			
 
				+        ));
			
 
				+        return 0;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // We need to get the table schema to make sure we don't overrun the
			
 
				+    // size of fields with what our regular expressions retrieve
			
 
				+    $feature_tbl = chado_get_schema('feature');
			
 
				+    $dbxref_tbl = chado_get_schema('dbxref');
			
 
				+
			
 
				+    print "Step 1: finding sequences\n";
			
 
				+    $filesize = filesize($dfile);
			
 
				+    $fh = fopen($dfile, 'r');
			
 
				+    if (!$fh) {
			
 
				+      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array(
			
 
				+        '%dfile' => $dfile
			
 
				+      ));
			
 
				+      return 0;
			
 
				+    }
			
 
				+
			
 
				+    // Calculate the interval at which we will print to the screen that status.
			
 
				+    $interval = intval($filesize * 0.01);
			
 
				+    if ($interval < 1) {
			
 
				+      $interval = 1;
			
 
				+    }
			
 
				+    $inv_read = 0;
			
 
				+    $num_read = 0;
			
 
				+
			
 
				+    // Iterate through the lines of the file. Keep a record for
			
 
				+    // where in the file each line is at for later import.
			
 
				+    $seqs = array();
			
 
				+    $num_seqs = 0;
			
 
				+    $prev_pos = 0;
			
 
				+    $set_start = FALSE;
			
 
				+    while ($line = fgets($fh)) {
			
 
				+      $num_read += strlen($line);
			
 
				+      $intv_read += strlen($line);
			
 
				+
			
 
				+      // If we encounter a definition line then get the name, uniquename,
			
 
				+      // accession and relationship subject from the definition line.
			
 
				+      if (preg_match('/^>/', $line)) {
			
 
				+
			
 
				+        // Remove the > symbol from the defline.
			
 
				+        $defline = preg_replace("/^>/", '', $line);
			
 
				+
			
 
				+        // Get the feature name if a regular expression is provided.
			
 
				+        if ($re_name) {
			
 
				+          if (!preg_match("/$re_name/", $defline, $matches)) {
			
 
				+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array(
			
 
				+              '%line' => $i
			
 
				+            ), 'error');
			
 
				+          }
			
 
				+          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
			
 
				+            tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array(
			
 
				+              '%line' => $i
			
 
				+            ), 'error');
			
 
				+          }
			
 
				+          else {
			
 
				+            $name = trim($matches[1]);
			
 
				+          }
			
 
				+        }
			
 
				+        // If the match_type is name and no regular expression was provided
			
 
				+        // then use the first word as the name, otherwise we don't set the name.
			
 
				+        elseif (strcmp($match_type, 'Name') == 0) {
			
 
				+          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
			
 
				+            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
			
 
				+              tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array(
			
 
				+                '%line' => $i), 'error');
			
 
				+            }
			
 
				+            else {
			
 
				+              $name = trim($matches[1]);
			
 
				+            }
			
 
				+          }
			
 
				+          else {
			
 
				+            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array(
			
 
				+              '%line' => $i), 'error');
			
 
				+          }
			
 
				+        }
			
 
				+
			
 
				+        // Get the feature uniquename if a regular expression is provided.
			
 
				+        if ($re_uname) {
			
 
				+          if (!preg_match("/$re_uname/", $defline, $matches)) {
			
 
				+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array(
			
 
				+              '%line' => $i), 'error');
			
 
				+          }
			
 
				+          $uname = trim($matches[1]);
			
 
				+        }
			
 
				+        // If the match_type is name and no regular expression was provided
			
 
				+        // then use the first word as the name, otherwise, we don't set the
			
 
				+        // unqiuename.
			
 
				+        elseif (strcmp($match_type, 'Unique name') == 0) {
			
 
				+          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
			
 
				+            $uname = trim($matches[1]);
			
 
				+          }
			
 
				+          else {
			
 
				+            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array(
			
 
				+              '%line' => $i), 'error');
			
 
				+          }
			
 
				+        }
			
 
				+
			
 
				+        // Get the accession if a regular expression is provided.
			
 
				+        preg_match("/$re_accession/", $defline, $matches);
			
 
				+        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
			
 
				+          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. " .
			
 
				+              "Cannot add cross reference. Line %line.", array('%line' => $i
			
 
				+              ), 'warning');
			
 
				+        }
			
 
				+        else {
			
 
				+          $accession = trim($matches[1]);
			
 
				+        }
			
 
				+
			
 
				+        // Get the relationship subject
			
 
				+        preg_match("/$re_subject/", $line, $matches);
			
 
				+        $subject = trim($matches[1]);
			
 
				+
			
 
				+        // Add the details to the sequence.
			
 
				+        $seqs[$num_seqs] = array(
			
 
				+          'name' => $name,
			
 
				+          'uname' => $uname,
			
 
				+          'accession' => $accession,
			
 
				+          'subject' => $subject,
			
 
				+          'seq_start' => ftell($fh)
			
 
				+        );
			
 
				+        $set_start = TRUE;
			
 
				+        // If this isn't the first sequence, then we want to specify where
			
 
				+        // the previous sequence ended.
			
 
				+        if ($num_seqs > 0) {
			
 
				+          $seqs[$num_seqs - 1]['seq_end'] = $prev_pos;
			
 
				+        }
			
 
				+        $num_seqs++;
			
 
				+      }
			
 
				+      // Keep the current file position so we can use it to set the sequence
			
 
				+      // ending position
			
 
				+      $prev_pos = ftell($fh);
			
 
				+
			
 
				+      // update the job status every % bytes
			
 
				+      if ($job and $intv_read >= $interval) {
			
 
				+        $intv_read = 0;
			
 
				+        $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				+        if ($name) {
			
 
				+          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				+          " bytes.\r";
			
 
				+        }
			
 
				+        else {
			
 
				+          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				+          " bytes.\r";
			
 
				+        }
			
 
				+        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
			
 
				+      }
			
 
				+    }
			
 
				+    $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
			
 
				+    print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) .
			
 
				+    " bytes.\r";
			
 
				+    tripal_set_job_progress($job, 50);
			
 
				+
			
 
				+    // Set the end position for the last sequence.
			
 
				+    $seqs[$num_seqs - 1]['seq_end'] = $num_read - strlen($line);
			
 
				+
			
 
				+    // Now that we know where the sequences are in the file we need to add them.
			
 
				+    print "\nStep 2: Importing sequences\n";
			
 
				+    for ($i = 0; $i < $num_seqs; $i++) {
			
 
				+      $seq = $seqs[$i];
			
 
				+      print "Importing " . ($i + 1) . " of $num_seqs. ";
			
 
				+      if ($name) {
			
 
				+        print "Current feature: " . $seq['name'] . ".\n";
			
 
				+      }
			
 
				+      else {
			
 
				+        print "Current feature: " . $seq['uname'] . ".\n";
			
 
				+      }
			
 
				+
			
 
				+      loadFastaFeature($fh, $seq['name'], $seq['uname'], $db_id, $seq['accession'], $seq['subject'], $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, $source, $method, $re_name, $match_type, $parentcvterm, $relcvterm, $seq['seq_start'], $seq['seq_end']);
			
 
				+    }
			
 
				+    tripal_set_job_progress($job, 100);
			
 
				+    fclose($fh);
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * A helper function for loadFasta() to load a single feature
			
 
				+   *
			
 
				+   * @ingroup fasta_loader
			
 
				+   */
			
 
				+  private function loadFastaFeature($fh, $name, $uname, $db_id, $accession, $parent,
			
 
				+      $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, $source, $method, $re_name,
			
 
				+      $match_type, $parentcvterm, $relcvterm, $seq_start, $seq_end) {
			
 
				+
			
 
				+    // Check to see if this feature already exists if the match_type is 'Name'.
			
 
				+    if (strcmp($match_type, 'Name') == 0) {
			
 
				+      $values = array('organism_id' => $organism_id,'name' => $name,'type_id' => $cvterm->cvterm_id
			
 
				+      );
			
 
				+      $results = chado_select_record('feature', array('feature_id'
			
 
				+      ), $values);
			
 
				+      if (count($results) > 1) {
			
 
				+        tripal_report_error('T_fasta_loader', "Multiple features exist with the name '%name' of type
			
 
				+               '%type' for the organism.  skipping", array('%name' => $name,'%type' => $type));
			
 
				+        return 0;
			
 
				+      }
			
 
				+      if (count($results) == 1) {
			
 
				+        $feature = $results[0];
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // Check if this feature already exists if the match_type is 'Unique Name'.
			
 
				+    if (strcmp($match_type, 'Unique name') == 0) {
			
 
				+      $values = array(
			
 
				+        'organism_id' => $organism_id,
			
 
				+        'uniquename' => $uname,
			
 
				+        'type_id' => $cvterm->cvterm_id
			
 
				+      );
			
 
				+
			
 
				+      $results = chado_select_record('feature', array('feature_id'), $values);
			
 
				+      if (count($results) > 1) {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_WARNING, "Multiple features exist with the name '%name' of type '%type' for the organism.  skipping", array(
			
 
				+          '%name' => $name,'%type' => $type));
			
 
				+        return 0;
			
 
				+      }
			
 
				+      if (count($results) == 1) {
			
 
				+        $feature = $results[0];
			
 
				+      }
			
 
				+
			
 
				+      // If the feature exists but this is an "insert only" then skip.
			
 
				+      if ($feature and (strcmp($method, 'Insert only') == 0)) {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_WARNING, "Feature already exists '%name' ('%uname') while matching on %type. Skipping insert.", array(
			
 
				+          '%name' => $name,'%uname' => $uname,'%type' => drupal_strtolower($match_type)
			
 
				+        ));
			
 
				+        return 0;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // If we don't have a feature and we're doing an insert then do the insert.
			
 
				+    $inserted = 0;
			
 
				+    if (!$feature and (strcmp($method, 'Insert only') == 0 or strcmp($method, 'Insert and update') == 0)) {
			
 
				+      // If we have a unique name but not a name then set them to be the same
			
 
				+      if (!$uname) {
			
 
				+        $uname = $name;
			
 
				+      }
			
 
				+      elseif (!$name) {
			
 
				+        $name = $uname;
			
 
				+      }
			
 
				+
			
 
				+      // Insert the feature record.
			
 
				+      $values = array(
			
 
				+        'organism_id' => $organism_id,
			
 
				+        'name' => $name,
			
 
				+        'uniquename' => $uname,
			
 
				+        'type_id' => $cvterm->cvterm_id
			
 
				+      );
			
 
				+      $success = chado_insert_record('feature', $values);
			
 
				+      if (!$success) {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to insert feature '%name (%uname)'", array(
			
 
				+          '%name' => $name,'%uname' => $numane));
			
 
				+        return 0;
			
 
				+      }
			
 
				+
			
 
				+      // now get the feature we just inserted
			
 
				+      $values = array(
			
 
				+        'organism_id' => $organism_id,
			
 
				+        'uniquename' => $uname,
			
 
				+        'type_id' => $cvterm->cvterm_id
			
 
				+      );
			
 
				+      $results = chado_select_record('feature', array('feature_id'), $values);
			
 
				+      if (count($results) == 1) {
			
 
				+        $inserted = 1;
			
 
				+        $feature = $results[0];
			
 
				+      }
			
 
				+      else {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to retreive newly inserted feature '%name (%uname)'", array(
			
 
				+          '%name' => $name,'%uname' => $numane));
			
 
				+        return 0;
			
 
				+      }
			
 
				+
			
 
				+      // Add the residues for this feature
			
 
				+      loadFastaResidues($fh, $feature->feature_id, $seq_start, $seq_end);
			
 
				+    }
			
 
				+
			
 
				+    // if we don't have a feature and the user wants to do an update then fail
			
 
				+    if (!$feature and (strcmp($method, 'Update only') == 0 or
			
 
				+        drupal_strcmp($method, 'Insert and update') == 0)) {
			
 
				+          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to find feature '%name' ('%uname') while matching on " .
			
 
				+              drupal_strtolower($match_type), array('%name' => $name,'%uname' => $uname));
			
 
				+          return 0;
			
 
				+        }
			
 
				+
			
 
				+        // if we do have a feature and this is an update then proceed with the update
			
 
				+        if ($feature and !$inserted and (strcmp($method, 'Update only') == 0 or
			
 
				+            strcmp($method, 'Insert and update') == 0)) {
			
 
				+
			
 
				+              // if the user wants to match on the Name field
			
 
				+              if (strcmp($match_type, 'Name') == 0) {
			
 
				+
			
 
				+                // if we're matching on the name but do not have a unique name then we
			
 
				+                // don't want to update the uniquename.
			
 
				+                $values = array();
			
 
				+                if ($uname) {
			
 
				+
			
 
				+                  // First check to make sure that by changing the unique name of this
			
 
				+                  // feature that we won't conflict with another existing feature of
			
 
				+                  // the same name
			
 
				+        $values = array(
			
 
				+          'organism_id' => $organism_id,
			
 
				+          'uniquename' => $uname,
			
 
				+          'type_id' => $cvterm->cvterm_id
			
 
				+                  );
			
 
				+                      $results = chado_select_record('feature', array('feature_id'
			
 
				+          ), $values);
			
 
				+          if (count($results) > 0) {
			
 
				+          tripal_report_error('T_fasta_loader', "Cannot update the feature '%name' with a uniquename of '%uname' and type of '%type' as it
			
 
				+            conflicts with an existing feature with the same uniquename and type.", array(
			
 
				+                  '%name' => $name,'%uname' => $uname,'%type' => $type
			
 
				+          ));
			
 
				+          return 0;
			
 
				+                }
			
 
				+
			
 
				+                // the changes to the uniquename don't conflict so proceed with the update
			
 
				+                $values = array('uniquename' => $uname);
			
 
				+                  $match = array(
			
 
				+                  'name' => $name,
			
 
				+                  'organism_id' => $organism_id,
			
 
				+                  'type_id' => $cvterm->cvterm_id
			
 
				+                );
			
 
				+
			
 
				+                // perform the update
			
 
				+                  $success = chado_update_record('feature', $match, $values);
			
 
				+        if (!$success) {
			
 
				+          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to update feature '%name' ('%name')", array(
			
 
				+                  '%name' => $name,'%uiname' => $uname
			
 
				+                  ));
			
 
				+                  return 0;
			
 
				+            }
			
 
				+  }
			
 
				+  }
			
 
				+
			
 
				+  // If the user wants to match on the unique name field.
			
 
				+  if (strcmp($match_type, 'Unique name') == 0) {
			
 
				+  // If we're matching on the uniquename and have a new name then
			
 
				+  // we want to update the name.
			
 
				+  $values = array();
			
 
				+  if ($name) {
			
 
				+      $values = array('name' => $name);
			
 
				+        $match = array(
			
 
				+          'uniquename' => $uname,
			
 
				+          'organism_id' => $organism_id,
			
 
				+          'type_id' => $cvterm->cvterm_id
			
 
				+  );
			
 
				+  $success = chado_update_record('feature', $match, $values);
			
 
				+        if (!$success) {
			
 
				+          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to update feature '%name' ('%name')", array(
			
 
				+          '%name' => $name,'%uiname' => $uname
			
 
				+          ));
			
 
				+          return 0;
			
 
				+  }
			
 
				+  }
			
 
				+  }
			
 
				+  }
			
 
				+
			
 
				+  // Update the residues for this feature
			
 
				+  loadFastaResidues($fh, $feature->feature_id, $seq_start, $seq_end);
			
 
				+
			
 
				+  // add in the analysis link
			
 
				+  if ($analysis_id) {
			
 
				+  // if the association doens't alredy exist then add one
			
 
				+  $values = array(
			
 
				+      'analysis_id' => $analysis_id,
			
 
				+      'feature_id' => $feature->feature_id
			
 
				+  );
			
 
				+  $results = chado_select_record('analysisfeature', array('analysisfeature_id'), $values);
			
 
				+                  if (count($results) == 0) {
			
 
				+                  $success = chado_insert_record('analysisfeature', $values);
			
 
				+                    if (!$success) {
			
 
				+                        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to associate analysis and feature '%name' ('%name')", array(
			
 
				+                        '%name' => $name,'%uname' => $uname
			
 
				+                        ));
			
 
				+                        return 0;
			
 
				+                        }
			
 
				+                        }
			
 
				+                        }
			
 
				+
			
 
				+                        // now add the database cross reference
			
 
				+                        if ($db_id) {
			
 
				+                        // check to see if this accession reference exists, if not add it
			
 
				+                        $values = array(
			
 
				+                        'db_id' => $db_id,
			
 
				+                            'accession' => $accession
			
 
				+                        );
			
 
				+    $results = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				+      // if the accession doesn't exist then add it
			
 
				+                            if (count($results) == 0) {
			
 
				+                            $results = chado_insert_record('dbxref', $values);
			
 
				+                            if (!$results) {
			
 
				+                            tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add database accession '%accession'", array(
			
 
				+                                '%accession' => $accession));
			
 
				+                        return 0;
			
 
				+                        }
			
 
				+                        $results = chado_select_record('dbxref', array('dbxref_id'), $values);
			
 
				+                        if (count($results) == 1) {
			
 
				+                        $dbxref = $results[0];
			
 
				+                        }
			
 
				+                        else {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to retreive newly inserted dbxref '%name (%uname)'", array(
			
 
				+          '%name' => $name,'%uname' => $numane));
			
 
				+            return 0;
			
 
				+                        }
			
 
				+                        }
			
 
				+                        else {
			
 
				+      $dbxref = $results[0];
			
 
				+    }
			
 
				+
			
 
				+    // check to see if the feature dbxref record exists if not, then add it
			
 
				+                        $values = array(
			
 
				+                        'feature_id' => $feature->feature_id,
			
 
				+                        'dbxref_id' => $dbxref->dbxref_id
			
 
				+                        );
			
 
				+                        $results = chado_select_record('feature_dbxref', array('feature_dbxref_id'), $values);
			
 
				+                        if (count($results) == 0) {
			
 
				+                        $success = chado_insert_record('feature_dbxref', $values);
			
 
				+      if (!$success) {
			
 
				+        tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add associate database accession '%accession' with feature", array(
			
 
				+          '%accession' => $accession
			
 
				+          ));
			
 
				+          return 0;
			
 
				+                        }
			
 
				+                        }
			
 
				+                        }
			
 
				+
			
 
				+                        // now add in the relationship if one exists. If not, then add it
			
 
				+                        if ($rel_type) {
			
 
				+                        $values = array('organism_id' => $organism_id,'uniquename' => $parent,
			
 
				+      'type_id' => $parentcvterm->cvterm_id
			
 
				+                        );
			
 
				+                          $results = chado_select_record('feature', array('feature_id'
			
 
				+                          ), $values);
			
 
				+    if (count($results) != 1) {
			
 
				+      tripal_report_error('T_fasta_loader', "Cannot find a unique fature for the parent '%parent' of type
			
 
				+        '%type' for the feature.", array(
			
 
				+                          '%parent' => $parent,'%type' => $parent_type
			
 
				+                        ));
			
 
				+                          return 0;
			
 
				+                          }
			
 
				+                          $parent_feature = $results[0];
			
 
				+
			
 
				+                          // check to see if the relationship already exists if not then add it
			
 
				+                          $values = array(
			
 
				+                            'subject_id' => $feature->feature_id,
			
 
				+                            'object_id' => $parent_feature->feature_id,
			
 
				+                            'type_id' => $relcvterm->cvterm_id
			
 
				+                          );
			
 
				+                          $results = chado_select_record('feature_relationship', array('feature_relationship_id'), $values);
			
 
				+                          if (count($results) == 0) {
			
 
				+                          $success = chado_insert_record('feature_relationship', $values);
			
 
				+                          if (!$success) {
			
 
				+                          tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "Failed to add associate database accession '%accession' with feature", array(
			
 
				+                          '%accession' => $accession
			
 
				+                          ));
			
 
				+                          return 0;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+  /**
			
 
				+   * Adds the residues column to the feature.
			
 
				+   *
			
 
				+   * This function seeks to the proper location in the file for the sequence
			
 
				+   * and reads in chunks of sequence and appends them to the feature.residues
			
 
				+   * column in the database.
			
 
				+   *
			
 
				+   * @param $fh
			
 
				+   * @param $feature_id
			
 
				+   * @param $seq_start
			
 
				+   * @param $seq_end
			
 
				+   */
			
 
				+   private function loadFastaResidues($fh, $feature_id, $seq_start, $seq_end) {
			
 
				+
			
 
				+     // First position the file at the beginning of the sequence
			
 
				+     fseek($fh, $seq_start, SEEK_SET);
			
 
				+     $chunk_size = 100000000;
			
 
				+     $chunk = '';
			
 
				+     $seqlen = ($seq_end - $seq_start) + 1;
			
 
				+
			
 
				+  // Calculate the interval at which we updated the precent complete.
			
 
				+  $interval = intval($seqlen * 0.01);
			
 
				+    if ($interval < 1) {
			
 
				+    $interval = 1;
			
 
				+     }
			
 
				+  // We don't to repeat the update too often or it slows things down, so
			
 
				+    // if the interval is less than 1000 then bring it up to that.
			
 
				+    if ($interval < 100000) {
			
 
				+        $interval = 100000;
			
 
				+     }
			
 
				+     $chunk_intv_read = 0;
			
 
				+     $intv_read = 0;
			
 
				+       $num_read = 0;
			
 
				+       $total_seq_size = 0;
			
 
				+
			
 
				+       // First, make sure we don't have a null in the residues
			
 
				+       $sql = "UPDATE {feature} SET residues = '' WHERE feature_id = :feature_id";
			
 
				+       chado_query($sql, array(':feature_id' => $feature_id
			
 
				+       ));
			
 
				+
			
 
				+       // Read in the lines until we reach the end of the sequence. Once we
			
 
				+         // get a specific bytes read then append the sequence to the one in the
			
 
				+         // database.
			
 
				+           print "Sequence complete: 0%. Memory: " . number_format(memory_get_usage()) . " bytes. \r";
			
 
				+  while ($line = fgets($fh)) {
			
 
				+           $num_read += strlen($line) + 1;
			
 
				+    $chunk_intv_read += strlen($line) + 1;
			
 
				+           $intv_read += strlen($line) + 1;
			
 
				+           $chunk .= trim($line);
			
 
				+
			
 
				+           // If we've read in enough of the sequence then append it to the database.
			
 
				+           if ($chunk_intv_read >= $chunk_size) {
			
 
				+      $sql = "
			
 
				+        UPDATE {feature}
			
 
				+        SET residues = residues || :chunk
			
 
				+        WHERE feature_id = :feature_id
			
 
				+          ";
			
 
				+          $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk
			
 
				+           ));
			
 
				+      if (!$success) {
			
 
				+        return FALSE;
			
 
				+                          }
			
 
				+                          $total_seq_size += strlen($chunk);
			
 
				+        $chunk = '';
			
 
				+        $chunk_intv_read = 0;
			
 
				+      }
			
 
				+      if ($intv_read >= $interval) {
			
 
				+        $percent = sprintf("%.2f", ($total_seq_size / $seqlen) * 100);
			
 
				+        print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
			
 
				+           " bytes. \r";
			
 
				+        $intv_read = 0;
			
 
				+      }
			
 
				+
			
 
				+      // If we've reached the ned of the sequence then break out of the loop
			
 
				+      if (ftell($fh) == $seq_end) {
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // write the last bit of sequence if it remains
			
 
				+    if (strlen($chunk) > 0) {
			
 
				+      $sql = "
			
 
				+          UPDATE {feature}
			
 
				+          SET residues = residues || :chunk
			
 
				+          WHERE feature_id = :feature_id
			
 
				+        ";
			
 
				+      $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk
			
 
				+      ));
			
 
				+      if (!$success) {
			
 
				+        return FALSE;
			
 
				+      }
			
 
				+      $total_seq_size += strlen($chunk);
			
 
				+      $chunk = '';
			
 
				+      $chunk_intv_read = 0;
			
 
				+    }
			
 
				+
			
 
				+    // Now update the seqlen and md5checksum fields
			
 
				+    $sql = "UPDATE {feature} SET seqlen = char_length(residues),  md5checksum = md5(residues) WHERE feature_id = :feature_id";
			
 
				+    chado_query($sql, array(':feature_id' => $feature_id
			
 
				+    ));
			
 
				+
			
 
				+    $percent = sprintf("%.2f", ($num_read / $seqlen) * 100);
			
 
				+    print "Sequence complete: " . $percent . "%. Memory: " . number_format(memory_get_usage()) .
			
 
				+       " bytes. \r";
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/tripal_chado/includes/TripalImporter/GFF3Importer.inc
+++ b/tripal_chado/includes/TripalImporter/GFF3Importer.inc
@@ -257,16 +257,6 @@ class GFF3Importer extends TripalImporter {
 
				 
			
 
				     return $form;
			
 
				   }
			
 
				-  /**
			
 
				-   * @see TripalImporter::formSubmit()
			
 
				-   */
			
 
				-  public function formSubmit($form, &$form_state) {
			
 
				-    global $user;
			
 
				-
			
 
				-
			
 
				-
			
 
				-    return '';
			
 
				-  }
			
 
				   /**
			
 
				    * @see TripalImporter::formValidate()
			
 
				    */