Browse Source

Merge branch '7.x-3.x-jobClass' into 7.x-3.x

Stephen Ficklin 8 years ago
parent
commit
90bbb2d69a

+ 19 - 4
tripal/api/tripal.importer.api.inc

@@ -88,7 +88,7 @@ function tripal_load_include_importer_class($class) {
  *   The ID of the import record.
  * @throws Exception
  */
-function tripal_run_importer($import_id, $job_id = NULL) {
+function tripal_run_importer($import_id, TripalJob $job = NULL) {
   $details = db_select('tripal_import', 'ti')
     ->fields('ti')
     ->condition('ti.import_id', $import_id)
@@ -107,18 +107,33 @@ function tripal_run_importer($import_id, $job_id = NULL) {
     $class = $details->class;
     tripal_load_include_importer_class($class);
     if (class_exists($class)) {
+
       $loader_name = $class::$name;
-      $loader = new $class();
+      $loader = new $class($job);
       try {
+
+        // begin the transaction
+        $transaction = db_transaction();
+        print "\nNOTE: Loading of this file is performed using a database transaction. \n" .
+            "If the load fails or is terminated prematurely then the entire set of \n" .
+            "insertions/updates is rolled back and will not be found in the database\n\n";
         $loader->preRun($details);
-        $loader->run($details, $job_id);
+        $loader->run($details);
         $loader->postRun($details);
+        if ($job) {
+          $job->logMessage("Done");
+        }
+        print "\nDone\n";
 
         // Check for new fields and notify the user.
         tripal_tripal_cron_notification();
+
       }
       catch (Exception $e) {
-        throw new Exception('Cannot run loader, ' . $loader_name . ': ' .  $e->getMessage());
+        $transaction->rollback();
+        if ($job) {
+          $job->logMessage($e->getMessage(), array(), TRIPAL_ERROR);
+        }
       }
     }
   }

+ 149 - 179
tripal/api/tripal.jobs.api.inc

@@ -80,77 +80,41 @@
 function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid,
     $priority = 10, $includes = array()) {
 
-  if (!$job_name) {
-    watchdog('tripal', "Must provide a \$job_name argument to the tripal_add_job() function.");
-    return FALSE;
-  }
-  if (!$modulename) {
-    watchdog('tripal', "Must provide a \$modulename argument to the tripal_add_job() function.");
-    return FALSE;
-  }
-  if (!$callback) {
-    watchdog('tripal', "Must provide a \$callback argument to the tripal_add_job() function.");
-    return FALSE;
-  }
-  foreach ($includes as $include) {
-    require_once($include);
-  }
-  if (!function_exists($callback)) {
-    watchdog('tripal', "Must provide a valid callback function to the tripal_add_job() function.");
-    return FALSE;
-  }
-  if (!is_numeric($uid)) {
-    watchdog('tripal', "Must provide a numeric \$uid argument to the tripal_add_job() function.");
-    return FALSE;
-  }
-  if (!$priority or !is_numeric($priority) or $priority < 1 or $priority > 10) {
-    watchdog('tripal', "Must provide a numeric \$priority argument between 1 and 10 to the tripal_add_job() function.");
-    return FALSE;
-  }
-  if (!is_array($arguments)) {
-    watchdog('tripal', "Must provide an array as the \$arguments argument to the tripal_add_job() function.");
-    return FALSE;
-  }
-
   $user = user_load($uid);
 
-  // convert the arguments into a string for storage in the database
-  $args = array();
-  if (is_array($arguments)) {
-    $args = serialize($arguments);
-  }
-
-  $job_id = db_insert('tripal_jobs')
-  ->fields(array(
-    'job_name' => $job_name,
-    'modulename' => $modulename,
-    'callback' => $callback,
-    'status' => 'Waiting',
-    'submit_date' => time(),
-    'uid' => $uid,
-    # The lower the number the higher the priority.
-    'priority' => $priority,
-    'arguments' => $args,
-    'includes' => serialize($includes),
-  ))
-  ->execute();
-
-  if ($job_id) {
+  try {
+    $job = new TripalJob();
+    $job->create(array(
+      'job_name' => $job_name,
+      'modulename' => $modulename,
+      'callback' => $callback,
+      'arguments' => $arguments,
+      'uid' => $uid,
+      'priority' => $priority,
+      'includes' => $includes,
+    ));
+
+    // If no exceptions were thrown then we know the creation worked.  So
+    // let the user know!
     drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job_name)));
+
+    // If this is the Tripal admin user then give a bit more information
+    // about how to run the job.
     if (user_access('administer tripal')) {
       $jobs_url = url("admin/tripal/tripal_jobs");
       drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
-        array('!jobs_url' => $jobs_url)));
+          array('!jobs_url' => $jobs_url)));
       drupal_set_message(t("You can execute the job queue manually on the command line " .
-        "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
-        array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
+          "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
+          array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
     }
+    return $job->getJobID();
   }
-  else {
-    drupal_set_message(t("Failed to add job %job_name.", array('%job_name' => $job_name)));
+  catch (Exception $e) {
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
+    drupal_set_message($e->getMessage(), 'error');
+    return FALSE;
   }
-
-  return $job_id;
 }
 
 /**
@@ -160,24 +124,22 @@ function tripal_add_job($job_name, $modulename, $callback, $arguments, $uid,
  *   The unique identifier of the job
  *
  * @return
- *   An object describing the job if a job is found or FALSE on failure.
+ *   An object representing a record from the tripal_job table or FALSE on
+ *   failure.
  *
  * @ingroup tripal_jobs_api
  */
 function tripal_get_job($job_id) {
-  if (!$job_id or !is_numeric($job_id)) {
-    watchdog('tripal', "Must provide a numeric \$job_id to the tripal_cancel_job() function.");
+  try {
+    $job = new TripalJob();
+    $job->load($job_id);
+    return $job->getJob();
+  }
+  catch (Exception $e) {
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
+    drupal_set_message($e->getMessage(), 'error');
     return FALSE;
   }
-
-  $job = db_query('SELECT j.* FROM {tripal_jobs} j WHERE j.job_id=:job_id', array(':job_id' => $job_id))
-    ->fetchObject();
-
-  $job->submit_date_string = $job->submit_date ? format_date($job->submit_date) : '';
-  $job->start_time_string = $job->start_time ? format_date($job->start_time): '';
-  $job->end_time_string = $job->end_time ? format_date($job->end_time): '';
-
-  return $job;
 }
 
 /**
@@ -280,24 +242,47 @@ function tripal_rerun_job($job_id, $goto_jobs_page = TRUE) {
 
   $user_id = $user->uid;
 
-  $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
-  $results = db_query($sql, array(':job_id' => $job_id));
-  $job = $results->fetchObject();
-  // arguments for jobs used to be stored as plain string with a double colon
-  // separating them.  But as of Tripal v2.0 the arguments are stored as
-  // a serialized array.  To be backwards compatible, we should check for serialization
-  // and if not then we will use the old style
-  $includes = unserialize($job->includes);
-  $args = unserialize($job->arguments);
-  if (!$args) {
-    $args = explode("::", $job->arguments);
+  $job = new TripalJob();
+  $job->load($job_id);
+
+  $arguments = $job->getArguments();
+  $includes = $job->getIncludes();
+
+  $newJob = new Tripaljob();
+  try {
+    $job->create(array(
+      'job_name' => $job->getJobName(),
+      'modulename' => $job->getModuleName(),
+      'callback' => $job->getCallback(),
+      'arguments' => $arguments,
+      'uid' => $user_id,
+      'priority' => $job->getPriority(),
+      'includes' => $includes
+    ));
+    // If no exceptions were thrown then we know the creation worked.  So
+    // let the user know!
+    drupal_set_message(t("Job '%job_name' submitted.", array('%job_name' => $job->getJobName())));
+
+    // If this is the Tripal admin user then give a bit more information
+    // about how to run the job.
+    if (user_access('administer tripal')) {
+      $jobs_url = url("admin/tripal/tripal_jobs");
+      drupal_set_message(t("Check the <a href='!jobs_url'>jobs page</a> for status.",
+          array('!jobs_url' => $jobs_url)));
+      drupal_set_message(t("You can execute the job queue manually on the command line " .
+          "using the following Drush command: <br>drush trp-run-jobs --username=%uname --root=%base_path",
+          array('%base_path' => DRUPAL_ROOT, '%uname' => $user->name)));
+    }
+    if ($goto_jobs_page) {
+      drupal_goto("admin/tripal/tripal_jobs");
+    }
   }
-  $job_id = tripal_add_job($job->job_name, $job->modulename, $job->callback, $args, $user_id, $job->priority, $includes);
-
-  if ($goto_jobs_page) {
-    drupal_goto("admin/tripal/tripal_jobs");
+  catch (Exception $e){
+    drupal_set_message($e->getMessage(), 'error');
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
   }
-  return $job_id;
+
+
 }
 
 /**
@@ -319,28 +304,21 @@ function tripal_cancel_job($job_id, $redirect = TRUE) {
     return FALSE;
   }
 
-  $sql = "SELECT * FROM {tripal_jobs} WHERE job_id = :job_id";
-  $results = db_query($sql, array(':job_id' => $job_id));
-  $job = $results->fetchObject();
-
-  // set the end time for this job
-  if ($job->start_time == 0) {
-    $record = new stdClass();
-    $record->job_id = $job->job_id;
-    $record->end_time = time();
-    $record->status = 'Cancelled';
-    $record->progress = '0';
-    drupal_write_record('tripal_jobs', $record, 'job_id');
-    drupal_set_message(t("Job #%job_id cancelled", array('%job_id' => $job_id)));
-  }
-  else {
-    drupal_set_message(t("Job %job_id cannot be cancelled. It is in progress or has finished.", array('%job_id' => $job_id)));
-    return FALSE;
+  try {
+    $job = new Tripaljob();
+    $job->load($job_id);
+    $job->cancel();
+
+    drupal_set_message('Job is now cancelled.');
+    drupal_goto("admin/tripal/tripal_jobs");
+    return TRUE;
   }
-  if ($redirect) {
+  catch (Exception $e) {
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
+    drupal_set_message($e->getMessage(), 'error');
     drupal_goto("admin/tripal/tripal_jobs");
+    return FALSE;
   }
-  return TRUE;
 }
 
 /**
@@ -364,9 +342,8 @@ function tripal_cancel_job($job_id, $redirect = TRUE) {
  */
 function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $single = 0) {
 
-  // first check if any jobs are currently running
-  // if they are, don't continue, we don't want to have
-  // more than one job script running at a time
+  // First check if any jobs are currently running if they are, don't continue,
+  // we don't want to have more than one job script running at a time.
   if (!$do_parallel and tripal_is_job_running()) {
     print "Jobs are still running. Use the --parallel=1 option with the Drush command to run jobs in parallel.\n";
     return;
@@ -377,55 +354,45 @@ function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $si
     return;
   }
 
-  // get all jobs that have not started and order them such that
-  // they are processed in a FIFO manner.
+  // Get all jobs that have not started and order them such that they are
+  // processed in a FIFO manner.
   if ($job_id) {
-    $sql =  "SELECT * FROM {tripal_jobs} TJ " .
-            "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL and TJ.job_id = :job_id " .
-            "ORDER BY priority ASC,job_id ASC";
-    $job_res = db_query($sql, array(':job_id' => $job_id));
+    $sql =  "
+      SELECT TJ.job_id
+      FROM {tripal_jobs} TJ
+      WHERE
+        TJ.start_time IS NULL AND
+        TJ.end_time IS NULL AND
+        TJ.job_id = :job_id AND
+        TJ.status =! 'Cancelled'
+      ORDER BY priority ASC, job_id ASC
+    ";
+    $jobs = db_query($sql, array(':job_id' => $job_id));
   }
   else {
-    $sql =  "SELECT * FROM {tripal_jobs} TJ " .
-            "WHERE TJ.start_time IS NULL and TJ.end_time IS NULL " .
-            "ORDER BY priority ASC,job_id ASC";
-    $job_res = db_query($sql);
+    $sql =  "
+      SELECT TJ.job_id
+      FROM {tripal_jobs} TJ
+      WHERE
+        TJ.start_time IS NULL AND
+        TJ.end_time IS NULL
+      ORDER BY priority ASC,job_id ASC
+    ";
+    $jobs = db_query($sql);
   }
-  print "There are " . $job_res->rowCount() . " jobs queued.\n";
-  foreach ($job_res as $job) {
+  print "There are " . $jobs->rowCount() . " jobs queued.\n";
+  foreach ($jobs as $jid) {
 
-    // Include the necessary files
-    foreach (unserialize($job->includes) as $path) {
-      if ($path) {
-        require_once $path;
-      }
-    }
-    // set the start time for this job
-    $record = new stdClass();
-    $record->job_id = $job->job_id;
-    $record->start_time = time();
-    $record->status = 'Running';
-    $record->pid = getmypid();
-    drupal_write_record('tripal_jobs', $record, 'job_id');
-
-    // call the function provided in the callback column.
-    // Add the job_id as the last item in the list of arguments. All
-    // callback functions should support this argument.
-    $callback = $job->callback;
-
-    // arguments for jobs used to be stored as plain string with a double colon
-    // separating them.  But as of Tripal v2.0 the arguments are stored as
-    // a serialized array.  To be backwards compatible, we should check for serialization
-    // and if not then we will use the old style
-    $args = unserialize($job->arguments);
-    if (!$args) {
-      $args = explode("::", $job->arguments);
-    }
-    $args[] = $job->job_id;
+    $job_id = $jid->job_id;
+
+    // Create the Tripoaljob object.
+    $job = new TripalJob();
+    $job->load($job_id);
 
     // We need to do some additional processing for printing since the switch
     // to serialized arrays now allows nested arrays which cause errors when
     // printed using implode alone.
+    $args = $job->getArguments();
     $string_args = array();
     foreach ($args as $k => $a) {
       if (is_array($a)) {
@@ -439,13 +406,16 @@ function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $si
       }
     }
 
-    print "Calling: $callback(" . implode(", ", $string_args) . ")\n";
-    call_user_func_array($callback, $args);
-    // set the end time for this job
-    $record->end_time = time();
-    $record->status = 'Completed';
-    $record->progress = '100';
-    drupal_write_record('tripal_jobs', $record, 'job_id');
+    // Run the job
+    $callback = $job->getCallback();
+    print "Calling Job #$job_id: $callback(" . implode(", ", $string_args) . ")\n";
+    try {
+      $job->run();
+    }
+    catch (Exception $e) {
+      $job->logMessage($e->getMessage(), array(), TRIPAL_ERROR);
+      drupal_set_message($e->getMessage(), 'error');
+    }
 
     if ($single) {
       // Don't start any more jobs
@@ -455,7 +425,7 @@ function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $si
       break;
     }
 
-    // send an email to the user advising that the job has finished
+    // TODO: Send an email to the user advising that the job has finished
   }
 }
 
@@ -474,16 +444,17 @@ function tripal_launch_job($do_parallel = 0, $job_id = NULL, $max_jobs = -1, $si
  */
 function tripal_set_job_progress($job_id, $percentage) {
 
-  if (preg_match("/^(\d+|100)$/", $percentage)) {
-    $record = new stdClass();
-    $record->job_id = $job_id;
-    $record->progress = $percentage;
-    if (drupal_write_record('tripal_jobs', $record, 'job_id')) {
-      return TRUE;
-    }
+  try {
+    $job = new TripalJob();
+    $job->load($job_id);
+    $job->setProgress($percentage);
   }
-
-  return FALSE;
+  catch (Exception $e) {
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
+    drupal_set_message($e->getMessage(), 'error');
+    return FALSE;
+  }
+  return TRUE;
 }
 
 /**
@@ -494,22 +465,21 @@ function tripal_set_job_progress($job_id, $percentage) {
  *
  * @return
  *   A value between 0 and 100 indicating the percentage complete of the job.
+ *   FALSE on failure.
  */
 function tripal_get_job_progress($job_id) {
 
-  if (!$job_id) {
-    watchdog('tripal', "Must provide a \$$job_id argument to the tripal_get_job_progress() function.");
+  try {
+    $job = new TripalJob();
+    $job->load($job_id);
+    $progress = $job->getProgress();
+    return $progress;
+  }
+  catch (Exception $e) {
+    tripal_report_error('tripal', TRIPAL_ERROR, $e->getMessage());
+    drupal_set_message($e->getMessage(), 'error');
     return FALSE;
   }
-
-  // Get the progress.
-  $progress = db_select('tripal_jobs', 'tj')
-    ->fields('tj', array('progress'))
-    ->condition('job_id', $job_id)
-    ->execute()
-    ->fetchField();
-
-  return $progress;
 }
 /**
  * Returns a list of jobs that are active.

+ 11 - 6
tripal/api/tripal.notice.api.inc

@@ -26,10 +26,13 @@ define('TRIPAL_NOTICE',5);
 define('TRIPAL_INFO',6);
 define('TRIPAL_DEBUG',7);
 
+
+
 /**
- * Provide better error notice for Tripal. If the environment variable
- * 'TRIPAL_DEBUG' is set to 1 then this function will add backtrace
- * information to the message.
+ * Provide better error notice for Tripal.
+ *
+ * If the environment variable 'TRIPAL_DEBUG' is set to 1 then this function
+ * will add backtrace information to the message.
  *
  * @param $type
  *   The catagory to which this message belongs. Can be any string, but the
@@ -83,7 +86,7 @@ function tripal_report_error($type, $severity, $message, $variables = array(), $
       break;
   }
 
-  // get the backtrace and include in the error message, but only if the
+  // Get the backtrace and include in the error message, but only if the
   // TRIPAL_DEBUG environment variable is set.
   if (getenv('TRIPAL_DEBUG') == 1) {
     $backtrace = debug_backtrace();
@@ -123,8 +126,10 @@ function tripal_report_error($type, $severity, $message, $variables = array(), $
 }
 
 /**
- * Display messages to tripal administrators. This can be used instead of
- * drupal_set_message when you want to target tripal administrators.
+ * Display messages to tripal administrators.
+ *
+ * This can be used instead of drupal_set_message when you want to target
+ * tripal administrators.
  *
  * @param $message
  *   The message to be displayed to the tripal administrators

+ 185 - 6
tripal/includes/TripalImporter.inc

@@ -1,5 +1,6 @@
 <?php
 class TripalImporter {
+
   // --------------------------------------------------------------------------
   //                     EDITABLE STATIC CONSTANTS
   //
@@ -43,6 +44,67 @@ class TripalImporter {
    */
   public static $upload_title = 'File Upload';
 
+  /**
+   * If the loader should require an analysis record.  To maintain provenance
+   * we should always indiate where the data we are uploading comes from.
+   * The method that Tripal attempts to use for this by associating upload files
+   * with an analysis record.  The analysis record provides the details for
+   * how the file was created or obtained. Set this to FALSE if the loader
+   * should not require an analysis when loading. if $use_analysis is set to
+   * true then the form values will have an 'analysis_id' key in the $form_state
+   * array on submitted forms.
+   */
+  public static $use_analysis = TRUE;
+
+  /**
+   * If the $use_analysis value is set above then this value indicates if the
+   * analysis should be required.
+   */
+  public static $require_analysis = TRUE;
+
+  /**
+   * Text that should appear on the button at the bottom of the importer
+   * form.
+   */
+  public static $button_text = 'Import File';
+
+
+  // --------------------------------------------------------------------------
+  //                  PRIVATE MEMBERS -- DO NOT EDIT
+  // --------------------------------------------------------------------------
+  /**
+   * The job that this importer is associated with.  This is needed for
+   * updating the status of the job.
+   */
+  private $job = NULL;
+
+  /**
+   * The number of items that this importer needs to process. A progress
+   * can be calculated by dividing the number of items process by this
+   * number.
+   */
+  private $total_items = 0;
+
+  /**
+   * The number of items that have been handled so far.  This must never
+   * be below 0 and never exceed $total_items;
+   */
+  private $num_handled = 0;
+
+  /**
+   * The interval when the job progress should be updated. Updating the job
+   * progress incurrs a database write which takes time and if it occurs to
+   * frequently can slow down the loader.  This should be a value between
+   * 0 and 100 to indicate a percent interval (e.g. 1 means update the
+   * progress every time the num_handled increases by 1%).
+   */
+  private $interval = 1;
+
+  /**
+   * Each time the job progress is updated this variable gets set.  It is
+   * used to calculate if the $interval has passed for the next update.
+   */
+  private $prev_update = 0;
 
   // --------------------------------------------------------------------------
   //                          CONSTRUCTORS
@@ -50,10 +112,11 @@ class TripalImporter {
   /**
    * Instantiates a new TripalImporter object.
    *
-   * @param $job_id
+   * @param TripalJob $job
+   *   An optional TripalJob object that this loader is associated with.
    */
-  public function __construct() {
-
+  public function __construct(TripalJob $job = NULL) {
+    $this->job = $job;
   }
 
   // --------------------------------------------------------------------------
@@ -66,7 +129,6 @@ class TripalImporter {
   // --------------------------------------------------------------------------
 
   public function form($form, &$form_state) {
-
     return $form;
   }
   public function formSubmit($form, &$form_state) {
@@ -78,11 +140,128 @@ class TripalImporter {
   public function preRun($details) {
 
   }
-  public function run($details, $job_id = NULL) {
-
+  public function run($details) {
   }
   public function postRun($details) {
 
   }
+  /**
+   * Logs a message for the importer.
+   *
+   * There is no distinction between status messages and error logs.  Any
+   * message that is intended for the user to review the status of the loading
+   * can be provided here.  If this importer is associated with a job then
+   * the logging is passed on to the job for storage.
+   *
+   * Messages that are are of severity TRIPAL_CRITICAL or TRIPAL_ERROR
+   * are also logged to the watchdog.
+   *
+   * @param $message
+   *   The message to store in the log. Keep $message translatable by not
+   *   concatenating dynamic values into it! Variables in the message should
+   *   be added by using placeholder strings alongside the variables argument
+   *   to declare the value of the placeholders. See t() for documentation on
+   *   how $message and $variables interact.
+   * @param $variables
+   *   Array of variables to replace in the message on display or NULL if
+   *   message is already translated or not possible to translate.
+   * @param $severity
+   *   The severity of the message; one of the following values:
+   *     - TRIPAL_CRITICAL: Critical conditions.
+   *     - TRIPAL_ERROR: Error conditions.
+   *     - TRIPAL_WARNING: Warning conditions.
+   *     - TRIPAL_NOTICE: Normal but significant conditions.
+   *     - TRIPAL_INFO: (default) Informational messages.
+   *     - TRIPAL_DEBUG: Debug-level messages.
+   */
+  protected function logMessage($message, $variables = array(), $severity = TRIPAL_INFO) {
+    // Generate a translated message.
+    $tmessage = t($message, $variables);
+
+    // For the sake of the command-line user, print the message to the
+    // terminal.
+    print $tmessage . "\n";
+
+    // If we have a job then pass along the messaging to the job.
+    if ($this->job) {
+      $this->job->logMessage($message, $variables, $severity);
+    }
+    // If we don't have a job then just use the drpual_set_message.
+    else {
+      // Report this message to watchdog or set a message.
+      if ($severity == TRIPAL_CRITICAL or $severity == TRIPAL_ERROR) {
+        drupal_set_message($tmessage, 'error');
+      }
+      if ($severity == TRIPAL_WARNING) {
+        drupal_set_message($tmessage, 'warning');
+      }
+    }
+  }
 
+  /**
+   * Sets the total number if items to be processed.
+   *
+   * This should typically be called near the beginning of the loading process
+   * to indicate the number of items that must be processed.
+   *
+   * @param $total_items
+   *   The total number of items to process.
+   */
+  protected function setTotalItems($total_items) {
+     $this->total_items = $total_items;
+  }
+  /**
+   * Adds to the count of the total number of items that have been handle.d
+   * @param unknown $num_handled
+   */
+  protected function addItemsHandled($num_handled) {
+    $this->setItemsHandled($this->num_handled += $num_handled);
+  }
+  /**
+   * Sets the number of items that have been processed.
+   *
+   * This should be called anytime the loader wants to indicate how many
+   * items have been processed.  The amount of progress will be
+   * calculated using this number.  If the amount of items handled exceeds
+   * the interval specified then the progress is reported to the user.  If
+   * this loader is associated with a job then the job progress is also updated.
+   *
+   * @param $total_handled
+   *   The total number of items that have been processed.
+   */
+  protected function setItemsHandled($total_handled) {
+    // First set the number of items handled.
+    $this->num_handled = $total_handled;
+
+    // Now see if we need to report to the user the percent done.  A message
+    // will be printed on the command-line if the job is run there.
+    $percent = sprintf("%.2f", ($this->num_handled / $this->total_items) * 100);
+    $diff = $percent - $this->prev_update;
+    if ($diff >= $interval) {
+      $this->prev_update = $diff;
+      $memory = number_format(memory_get_usage());
+      print "Percent complete: " . $percent . "%. Memory: " . $memory . " bytes.\r";
+
+      // If we have a job the update the job progress too.
+      if ($this->job) {
+        $this->job->setProgress($percent);
+      }
+    }
+  }
+
+  /**
+   * Updates the percent interval when the job progress is updated.
+   *
+   * Updating the job
+   * progress incurrs a database write which takes time and if it occurs to
+   * frequently can slow down the loader.  This should be a value between
+   * 0 and 100 to indicate a percent interval (e.g. 1 means update the
+   * progress every time the num_handled increases by 1%).
+   *
+   * @param $interval
+   *   A number between 0 and 100.
+   */
+  protected function setInterval($interval) {
+    $this->interval = $interval;
+  }
 }

+ 468 - 0
tripal/includes/TripalJob.inc

@@ -0,0 +1,468 @@
+<?php
+
+class TripalJob {
+
+  /**
+   * The ID of the job.
+   */
+  private $job_id = NULL;
+
+  /**
+   * Contains the job record for this job.
+   */
+  private $job = NULL;
+
+  /**
+   * Instantiates a new TripalJob object.
+   *
+   * By default the job object is "empty". It must be associated with
+   * job details either by calling the load() function or the
+   * create() function.
+   */
+  public function __construct() {
+    $this->log = '';
+  }
+
+  /**
+   * Loads a job for this object.
+   *
+   * @param $job_id
+   *   The ID of the job.
+   */
+  public function load($job_id) {
+    if (!$job_id or !is_numeric($job_id)) {
+      throw new Exception("Must provide a numeric \$job_id to the tripal_cancel_job() function.");
+    }
+
+    $sql = 'SELECT j.* FROM {tripal_jobs} j WHERE j.job_id = :job_id';
+    $args = array(':job_id' => $job_id);
+    $this->job = db_query($sql, $args)->fetchObject();
+    if (!$this->job) {
+      throw new Exception("Cannot find a job with this ID provided.");
+    }
+
+    // Fix the date/time fields.
+    $this->job->submit_date_string = $this->job->submit_date ? format_date($this->job->submit_date) : '';
+    $this->job->start_time_string = $this->job->start_time ? format_date($this->job->start_time): '';
+    $this->job->end_time_string = $this->job->end_time ? format_date($this->job->end_time): '';
+
+    // Unserialize the includes.
+    $this->job->includes = unserialize($this->job->includes);
+
+    // Arguments for jobs used to be stored as plain string with a double colon
+    // separating them.  But as of Tripal v2.0 the arguments are stored as
+    // a serialized array.  To be backwards compatible, we should check for
+    // serialization and if not then we will use the old style
+    $this->job->arguments = unserialize($this->job->arguments);
+    if (!is_array($this->job->arguments)) {
+      $this->job->arguments = explode("::", $this->job->arguments);
+    }
+
+  }
+
+  /**
+   * Creates a new job.
+   *
+   * @param $details
+   *   An associative array of the job details or a single job_id.  If the
+   *   details are provided then the job is created and added to the database
+   *   otherwise if a job_id is provided then the object is loaded from the
+   *   database.  The following keys are allowed:
+   *   - job_name: The human readable name for the job.
+   *   - modulename: The name of the module adding the job.
+   *   - callback: The name of a function to be called when the job is executed.
+   *   - arguments:  An array of arguments to be passed on to the callback.
+   *   - uid: The uid of the user adding the job
+   *   - priority: The priority at which to run the job where the highest
+   *     priority is 10 and the lowest priority is 1. The default
+   *     priority is 10.
+   *   - includes: An array of paths to files that should be included in order
+   *     to execute the job. Use the module_load_include function to get a path
+   *     for a given file.
+   *
+   * @throws Exception
+   *   On failure an exception is thrown.
+   */
+  public function create($details) {
+
+    // Set some defaults
+    if (!array_key_exists('prority', $details)) {
+      $details['priority'] = 10;
+    }
+    if (!array_key_exists('includes', $details)) {
+      $details['includes'] = array();
+    }
+
+    // Make sure the arguments are correct.
+    if (!$details['job_name']) {
+      throw new Exception("Must provide a \$job_name argument to the tripal_add_job() function.");
+    }
+    if (!$details['modulename']) {
+      throw new Exception("Must provide a \$modulename argument to the tripal_add_job() function.");
+    }
+    if (!$details['callback']) {
+      throw new Exception("Must provide a \$callback argument to the tripal_add_job() function.");
+    }
+
+    $includes = $details['includes'];
+    foreach ($includes as $include) {
+      require_once($include);
+    }
+    if (!function_exists($details['callback'])) {
+      throw new Exception("Must provide a valid callback function to the tripal_add_job() function.");
+    }
+    if (!is_numeric($details['uid'])) {
+     throw new Exception("Must provide a numeric \$uid argument to the tripal_add_job() function.");
+    }
+    $priority = $details['priority'];
+    if (!$priority or !is_numeric($priority) or $priority < 1 or $priority > 10) {
+      throw new Exception("Must provide a numeric \$priority argument between 1 and 10 to the tripal_add_job() function.");
+    }
+    $arguments = $details['arguments'];
+    if (!is_array($arguments)) {
+      throw new Exception("Must provide an array as the \$arguments argument to the tripal_add_job() function.");
+    }
+
+    // convert the arguments into a string for storage in the database
+    $args = array();
+    if (is_array($arguments)) {
+      $args = serialize($arguments);
+    }
+
+    try {
+      $job_id = db_insert('tripal_jobs')
+        ->fields(array(
+          'job_name' => $details['job_name'],
+          'modulename' => $details['modulename'],
+          'callback' => $details['callback'],
+          'status' => 'Waiting',
+          'submit_date' => time(),
+          'uid' => $details['uid'],
+          'priority' => $priority,
+          'arguments' => $args,
+          'includes' => serialize($includes),
+        ))
+        ->execute();
+      // Now load the job into this object.
+      $this->load($job_id);
+    }
+    catch (Exception $e) {
+      throw new Exception('Cannot create job: ' .  $e->getMessage());
+    }
+  }
+
+  /**
+   * Cancels the job and prevents it from running.
+   */
+  public function cancel() {
+
+    if (!$this->job) {
+      throw new Exception("There is no job associated with this object. Cannot cancel");
+    }
+
+    if ($this->job->status == 'Running') {
+      throw new Exception("Job Cannot be cancelled it is currently running.");
+
+    }
+    if ($this->job->status == 'Completed') {
+      throw new Exception("Job Cannot be cancelled it has already finished.");
+    }
+    if ($this->job->status == 'Error') {
+      throw new Exception("Job Cannot be cancelled it is in an error state.");
+    }
+    if ($this->job->status == 'Cancelled') {
+      throw new Exception("Job Cannot be cancelled it is already cancelled.");
+    }
+
+    // Set the end time for this job.
+    try {
+      if ($this->job->start_time == 0) {
+        $record = new stdClass();
+        $record->job_id = $this->job->job_id;
+        $record->status = 'Cancelled';
+        $record->progress = '0';
+        drupal_write_record('tripal_jobs', $record, 'job_id');
+      }
+    }
+    catch (Exception $e) {
+      throw new Exception('Cannot cancel job: ' .  $e->getMessage());
+    }
+  }
+
+  /**
+   * Executes the job.
+   */
+  public function run() {
+
+    if (!$this->job) {
+      throw new Exception('Cannot launch job as no job is associated with this object.');
+    }
+
+    try {
+
+      // Include the necessary files needed to run the job.
+      foreach ($this->job->includes as $path) {
+        if ($path) {
+          require_once $path;
+        }
+      }
+
+      // Set the start time for this job.
+      $record = new stdClass();
+      $record->job_id = $this->job->job_id;
+      $record->start_time = time();
+      $record->status = 'Running';
+      $record->pid = getmypid();
+      drupal_write_record('tripal_jobs', $record, 'job_id');
+
+
+      // Callback functions need the job in order to update
+      // progress.  But prior to Tripal v3 the job callback functions
+      // only accepted a $job_id as the final argument.  So, we need
+      // to see if the callback is Tv3 compatible or older.  If older
+      // we want to still support it and pass the job_id.
+      $arguments = $this->job->arguments;
+      $callback = $this->job->callback;
+      $ref = new ReflectionFunction($callback);
+      $refparams = $ref->getParameters();
+      if (count($refparams) > 0) {
+        $lastparam = $refparams[count($refparams)-1];
+        if ($lastparam->getName() == 'job_id') {
+          $arguments[] = $this->job->job_id;
+        }
+        else {
+          $arguments[] = $this;
+        }
+      }
+
+      // Launch the job.
+      call_user_func_array($callback, $arguments);
+
+      // Set the end time for this job.
+      $record = new stdClass();
+      $record->job_id = $this->job->job_id;
+      $record->end_time = time();
+      $record->error_msg = $this->job->error_msg;
+      $record->progress = $this->job->progress;
+      $record->status = 'Completed';
+      $record->pid = '';
+
+      drupal_write_record('tripal_jobs', $record, 'job_id');
+      $this->load($this->job->job_id);
+    }
+    catch (Exception $e) {
+      $record->end_time = time();
+      $record->error_msg = $this->job->error_msg;
+      $record->progress = $this->job->progress;
+      $record->status = 'Error';
+      $record->pid = '';
+      drupal_write_record('tripal_jobs', $record, 'job_id');
+      drupal_set_message('Job execution failed: ' . $e->getMessage(), 'error');
+    }
+  }
+
+  /**
+   * Inidcates if the job is running.
+   *
+   * @return
+   *   TRUE if the job is running, FALSE otherwise.
+   */
+  public function isRunning() {
+    if (!$this->job) {
+      throw new Exception('Cannot check running status as no job is associated with this object.');
+    }
+
+    $status = shell_exec('ps -p ' . escapeshellarg($this->job->pid) . ' -o pid=');
+    if ($this->job->pid && $status) {
+      // The job is still running.
+      return TRUE;
+    }
+    // return FALSE to indicate that no jobs are currently running.
+    return FALSE;
+  }
+
+  /**
+   * Retrieve the job object as if from a database query.
+   */
+  public function getJob(){
+    return $this->job;
+  }
+  /**
+   * Retrieves the job ID.
+   */
+  public function getJobID(){
+    return $this->job->job_id;
+  }
+  /**
+   * Retrieves the user ID of the user that submitted the job.
+   */
+  public function getUID() {
+    return $this->job->uid;
+  }
+  /**
+   * Retrieves the job name.
+   */
+  public function getJobName() {
+    return $this->job->job_name;
+  }
+  /**
+   * Retrieves the name of the module that submitted the job.
+   */
+  public function getModuleName() {
+    return $this->job->modulename;
+  }
+  /**
+   * Retrieves the callback function for the job.
+   */
+  public function getCallback() {
+    return $this->job->callback;
+  }
+  /**
+   * Retrieves the array of arguments for the job.
+   */
+  public function getArguments() {
+    return $this->job->arguments;
+  }
+  /**
+   * Retrieves the current percent complete (i.e. progress) of the job.
+   */
+  public function getProgress() {
+    return $this->job->progress;
+  }
+  /**
+   * Sets the current percent complete of a job.
+   *
+   * @param $percent_done
+   *   A value between 0 and 100 indicating the percentage complete of the job.
+   */
+  public function setProgress($percent_done) {
+    if (!$this->job) {
+      throw new Exception('Cannot set progress as no job is associated with this object.');
+    }
+
+    $this->job->progress = $percent_done;
+
+    $progress = sprintf("%d", $percent_done);
+    db_update('tripal_jobs')
+      ->fields(array(
+        'progress' => $progress,
+      ))
+      ->condition('job_id', $this->job->job_id)
+      ->execute();
+  }
+  /**
+   * Retrieves the status of the job.
+   */
+  public function getStatus() {
+    return $this->job->status;
+  }
+  /**
+   * Retrieves the time the job was submitted.
+   */
+  public function getSubmitTime() {
+    return $this->job->submit_date;
+  }
+  /**
+   * Retieves the time the job began execution (i.e. the start time).
+   */
+  public function getStartTime() {
+    return $this->job->start_time;
+  }
+  /**
+   * Retieves the time the job completed execution (i.e. the end time).
+   */
+  public function getEndTime() {
+    return $this->job->end_time;
+  }
+  /**
+   * Retieves the log for the job.
+   *
+   * @return
+   *   A large string containing the text of the job log.  It contains both
+   *   status upates and errors.
+   */
+  public function getLog() {
+    return $this->job->error_msg;
+  }
+  /**
+   * Retrieves the process ID of the job.
+   */
+  public function getPID() {
+    return $this->job->pid;
+  }
+  /**
+   * Retreieves the priority that is currently set for the job.
+   */
+  public function getPriority() {
+    return $this->job->priority;
+  }
+  /**
+   * Get the MLock value of the job.
+   *
+   * The MLock value indicates if no other jobs from a give module
+   * should be executed while this job is running.
+   */
+  public function getMLock() {
+    return $this->job->mlock;
+  }
+  /**
+   * Get the lock value of the job.
+   *
+   * The lock value indicates if no other jobs from any module
+   * should be executed while this job is running.
+   */
+  public function getLock() {
+    return $this->job->lock;
+  }
+  /**
+   * Get the list of files that must be included prior to job execution.
+   */
+  public function getIncludes() {
+    return $this->job->includes;
+  }
+
+  /**
+   * Logs a message for the job.
+   *
+   * There is no distinction between status messages and error logs.  Any
+   * message that is intended for the user to review the status of the job
+   * can be provided here.
+   *
+   * Messages that are are of severity TRIPAL_CRITICAL or TRIPAL_ERROR
+   * are also logged to the watchdog.
+   *
+   * Logging works regardless if the job uses a transaction. If the
+   * transaction must be rolled back to to an error the error messages will
+   * persist.
+   *
+   * @param $message
+   *   The message to store in the log. Keep $message translatable by not
+   *   concatenating dynamic values into it! Variables in the message should
+   *   be added by using placeholder strings alongside the variables argument
+   *   to declare the value of the placeholders. See t() for documentation on
+   *   how $message and $variables interact.
+   * @param $variables
+   *   Array of variables to replace in the message on display or NULL if
+   *   message is already translated or not possible to translate.
+   * @param $severity
+   *   The severity of the message; one of the following values:
+   *     - TRIPAL_CRITICAL: Critical conditions.
+   *     - TRIPAL_ERROR: Error conditions.
+   *     - TRIPAL_WARNING: Warning conditions.
+   *     - TRIPAL_NOTICE: Normal but significant conditions.
+   *     - TRIPAL_INFO: (default) Informational messages.
+   *     - TRIPAL_DEBUG: Debug-level messages.
+   */
+  public function logMessage($message, $variables = array(), $severity = TRIPAL_INFO) {
+    // Generate a translated message.
+    $tmessage = t($message, $variables);
+
+    // Add this message to the job's log.
+    $this->job->error_msg .= "\n" . $tmessage;
+
+    // Report this message to watchdog or set a message.
+    if ($severity == TRIPAL_CRITICAL or $severity == TRIPAL_ERROR) {
+      tripal_report_error('tripal_job', $severity, $message, $variables);
+    }
+  }
+}

+ 24 - 1
tripal/includes/tripal.importer.inc

@@ -36,13 +36,36 @@ function tripal_get_importer_form($form, &$form_state, $class) {
     '#description'   => t('If the file is local to the Tripal server please provide the full path here.'),
   );
 
+  if ($class::$use_analysis) {
+    // get the list of analyses
+    $sql = "SELECT * FROM {analysis} ORDER BY name";
+    $org_rset = chado_query($sql);
+    $analyses = array();
+    $analyses[''] = '';
+    while ($analysis = $org_rset->fetchObject()) {
+      $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
+    }
+    $form['analysis_id'] = array(
+      '#title'       => t('Analysis'),
+      '#type'        => t('select'),
+      '#description' => t('Choose the analysis to which the uploaded data will be associated. ' .
+        'Why specify an analysis for a data load?  All data comes from some place, even if ' .
+        'downloaded from a website. By specifying analysis details for all data imports it ' .
+        'provides provenance and helps end user to reproduce the data set if needed. At ' .
+        'a minimum it indicates the source of the data.'),
+      '#required'    => $class::$require_analysis,
+      '#options'     => $analyses,
+    );
+  }
+
+
   $importer = new $class();
   $element = array();
   $form['class_elements'] = $importer->form($element, $form_state);
 
   $form['button'] = array(
     '#type' => 'submit',
-    '#value' => t('Import ' . $class),
+    '#value' => t($class::$button_text),
     '#weight' => 10,
   );
   return $form;

+ 1 - 0
tripal/tripal.module

@@ -29,6 +29,7 @@ require_once "includes/TripalFields/TripalField.inc";
 require_once "includes/TripalFields/TripalFieldWidget.inc";
 require_once "includes/TripalFields/TripalFieldFormatter.inc";
 require_once "includes/TripalFieldQuery.inc";
+require_once "includes/TripalJob.inc";
 require_once "includes/TripalImporter.inc";
 
 /**

+ 3 - 5
tripal/tripal.views.inc

@@ -30,7 +30,7 @@ function tripal_views_data() {
   $data = array();
   // Job Management System
   tripal_views_data_jobs($data);
-  tripal_entity_views_data($data);
+  tripal_views_data_tripal_entity($data);
   tripal_views_data_fields($data);
 
   return $data;
@@ -78,7 +78,7 @@ function tripal_views_data_fields(&$data) {
 /**
  * Integrates the TripalEntity entities with Drupal Views.
  */
-function tripal_entity_views_data(&$data) {
+function tripal_views_data_tripal_entity(&$data) {
 
   // Get the list of all of the bundles (entity types) and add them
   // as "base tables" for views.
@@ -122,7 +122,7 @@ function tripal_entity_views_data(&$data) {
  *
  * @ingroup tripal
  */
-function tripal_views_data_jobs($data) {
+function tripal_views_data_jobs(&$data) {
 
   $data['tripal_jobs']['table']['group'] = t('Tripal Jobs');
   $data['tripal_jobs']['table']['base'] = array(
@@ -371,6 +371,4 @@ function tripal_views_data_jobs($data) {
       'handler' => 'views_handler_sort',
     ),
   );
-
-  return $data;
 }

+ 3 - 0
tripal_chado/api/tripal_chado.mviews.api.inc

@@ -354,6 +354,7 @@ function tripal_populate_mview($mview_id) {
     // execute the query inside a transaction so that it doesn't destroy existing data
     // that may leave parts of the site unfunctional
     $transaction = db_transaction();
+    $previous_db = chado_set_active('chado');  // use chado database
     try {
       $success = chado_query("DELETE FROM {" . $mview->mv_table . "}");
       $success = chado_query("INSERT INTO {" . $mview->mv_table . "} ($mview->query)");
@@ -372,8 +373,10 @@ function tripal_populate_mview($mview_id) {
       else {
         throw new Exception("ERROR populating the materialized view ". $mview->mv_table . ". See Drupal's recent log entries for details.");
       }
+      chado_set_active($previous_db);
     }
     catch (Exception $e) {
+      chado_set_active($previous_db);
       $transaction->rollback();
       // print and save the error message
       $record = new stdClass();

+ 911 - 0
tripal_chado/includes/TripalImporter/FASTAImporter.inc

@@ -0,0 +1,911 @@
+<?php
+class FASTAImporter extends TripalImporter {
+  /**
+   * The name of this loader.  This name will be presented to the site
+   * user.
+   */
+  public static $name = 'Chado FASTA Loader';
+
+  /**
+   * The machine name for this loader. This name will be used to construct
+   * the URL for the loader.
+   */
+  public static $machine_name = 'fasta_loader';
+
+  /**
+   * A brief description for this loader.  This description will be
+   * presented to the site user.
+   */
+  public static $description = 'Load sequences from a multi-FASTA file into Chado';
+
+  /**
+   * An array containing the extensions of allowed file types.
+   */
+  public static $file_types = array('fasta');
+
+
+  /**
+   * Provides information to the user about the file upload.  Typically this
+   * may include a description of the file types allowed.
+   */
+  public static $upload_description = 'Please provide the FASTA file. The file must have a .fasta extension.';
+
+  /**
+   * The title that should appear above the upload button.
+   */
+  public static $upload_title = 'FASTA Upload';
+
+  /**
+   * Text that should appear on the button at the bottom of the importer
+   * form.
+   */
+  public static $button_text = 'Import FASTA file';
+
+  /**
+   * @see TripalImporter::form()
+   */
+  public function form($form, &$form_state) {
+
+    // get the list of organisms
+    $sql = "SELECT * FROM {organism} ORDER BY genus, species";
+    $org_rset = chado_query($sql);
+    $organisms = array();
+    $organisms[''] = '';
+    while ($organism = $org_rset->fetchObject()) {
+      $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
+    }
+    $form['organism_id'] = array(
+      '#title' => t('Organism'),
+      '#type' => t('select'),
+      '#description' => t("Choose the organism to which these sequences are associated"),
+      '#required' => TRUE,
+      '#options' => $organisms
+    );
+
+    // get the sequence ontology CV ID
+    $values = array('name' => 'sequence');
+    $cv = chado_select_record('cv', array('cv_id'), $values);
+    $cv_id = $cv[0]->cv_id;
+
+    $form['seqtype'] = array(
+      '#type' => 'textfield',
+      '#title' => t('Sequence Type'),
+      '#required' => TRUE,
+      '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, polypeptide, etc...)'),
+      '#autocomplete_path' => "admin/tripal/storage/chado/auto_name/cvterm/$cv_id"
+    );
+
+    $form['method'] = array(
+      '#type' => 'radios',
+      '#title' => 'Method',
+      '#required' => TRUE,
+      '#options' => array(
+        t('Insert only'),
+        t('Update only'),
+        t('Insert and update')
+      ),
+      '#description' => t('Select how features in the FASTA file are handled.
+         Select "Insert only" to insert the new features. If a feature already
+         exists with the same name or unique name and type then it is skipped.
+         Select "Update only" to only update featues that already exist in the
+         database.  Select "Insert and Update" to insert features that do
+         not exist and upate those that do.'),
+      '#default_value' => 2
+    );
+    $form['match_type'] = array('#type' => 'radios','#title' => 'Name Match Type','#required' => TRUE,
+      '#options' => array(t('Name'),t('Unique name')
+      ),
+      '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert".
+        Feature data is stored in Chado with both a human-readable
+        name and a unique name. If the features in your FASTA file are uniquely identified using
+        a human-readable name then select the "Name" button. If your features are
+        uniquely identified using the unique name then select the "Unique name" button.  If you
+        loaded your features first using the GFF loader then the unique name of each
+        features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
+        By default, the FASTA loader will use the first word (character string
+        before the first space) as  the name for your feature. If
+        this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
+        Additionally, you may import both a name and a unique name for each sequence using the advanced options.'),
+      '#default_value' => 1
+    );
+
+    // Additional Options
+    $form['additional'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Additional Options'),
+      '#collapsible' => TRUE,
+      '#collapsed' => TRUE
+    );
+
+    $form['additional']['re_help'] = array('#type' => 'item',
+      '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
+         Your FASTA file may contain both a human-readable name and a unique name for each sequence.
+         If you want to import
+         both the name and unique name for all sequences, then you must provide regular expressions
+         so that the loader knows how to separate them.
+         Otherwise the name and uniquename will be the same.
+         By default, this loader will use the first word in the definition
+         lines of the FASTA file
+         as the name or unique name of the feature.')
+    );
+    $form['additional']['re_name'] = array('#type' => 'textfield',
+      '#title' => t('Regular expression for the name'),'#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the
+         feature name from the FASTA definition line. For example, for a
+         defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
+         the regular expression for the name would be, "^(.*?)\|.*$".  All FASTA
+         definition lines begin with the ">" symbol.  You do not need to incldue
+         this symbol in your regular expression.')
+    );
+    $form['additional']['re_uname'] = array('#type' => 'textfield',
+      '#title' => t('Regular expression for the unique name'),'#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the
+         feature name from the FASTA definition line. For example, for a
+         defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
+         the regular expression for the unique name would be "^.*?\|(.*)$").  All FASTA
+         definition lines begin with the ">" symbol.  You do not need to incldue
+         this symbol in your regular expression.')
+    );
+
+    // Advanced database cross reference options.
+    $form['additional']['db'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('External Database Reference'),
+      '#weight' => 6,
+      '#collapsed' => TRUE
+    );
+    $form['additional']['db']['re_accession'] = array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the accession'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
+      '#weight' => 2
+    );
+
+    // get the list of databases
+    $sql = "SELECT * FROM {db} ORDER BY name";
+    $db_rset = chado_query($sql);
+    $dbs = array();
+    $dbs[''] = '';
+    while ($db = $db_rset->fetchObject()) {
+      $dbs[$db->db_id] = "$db->name";
+    }
+    $form['additional']['db']['db_id'] = array(
+      '#title' => t('External Database'),
+      '#type' => t('select'),
+      '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
+      '#required' => FALSE,
+      '#options' => $dbs,
+      '#weight' => 1
+    );
+
+    $form['additional']['relationship'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Relationships'),
+      '#weight' => 6,'#collapsed' => TRUE
+    );
+    $rels = array();
+    $rels[''] = '';
+    $rels['part_of'] = 'part of';
+    $rels['derives_from'] = 'produced by (derives from)';
+
+    // Advanced references options
+    $form['additional']['relationship']['rel_type'] = array(
+      '#title' => t('Relationship Type'),
+      '#type' => t('select'),
+      '#description' => t("Use this option to create associations, or relationships between the
+                        features of this FASTA file and existing features in the database. For
+                        example, to associate a FASTA file of peptides to existing genes or transcript sequence,
+                        select the type 'produced by'. For a CDS sequences select the type 'part of'"),
+      '#required' => FALSE,
+      '#options' => $rels,
+      '#weight' => 5
+    );
+    $form['additional']['relationship']['re_subject'] = array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the parent'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the unique
+                         name needed to identify the existing sequence for which the
+                         relationship type selected above will apply.'),
+      '#weight' => 6
+    );
+    $form['additional']['relationship']['parent_type'] = array(
+      '#type' => 'textfield',
+      '#title' => t('Parent Type'),
+      '#required' => FALSE,
+      '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
+                         if the FASTA file being loaded is a set of proteins that are
+                         products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
+                         this type must match the type for already loaded features.'),
+      '#weight' => 7
+    );
+    return $form;
+  }
+  /**
+   * @see TripalImporter::formValidate()
+   */
+  public function formValidate($form, &$form_state) {
+    $organism_id = $form_state['values']['organism_id'];
+    $type = trim($form_state['values']['seqtype']);
+    $method = trim($form_state['values']['method']);
+    $match_type = trim($form_state['values']['match_type']);
+    $re_name = trim($form_state['values']['re_name']);
+    $re_uname = trim($form_state['values']['re_uname']);
+    $re_accession = trim($form_state['values']['re_accession']);
+    $db_id = $form_state['values']['db_id'];
+    $rel_type = $form_state['values']['rel_type'];
+    $re_subject = trim($form_state['values']['re_subject']);
+    $parent_type = trim($form_state['values']['parent_type']);
+
+    if ($method == 0) {
+      $method = 'Insert only';
+    }
+    if ($method == 1) {
+      $method = 'Update only';
+    }
+    if ($method == 2) {
+      $method = 'Insert and update';
+    }
+
+    if ($match_type == 0) {
+      $match_type = 'Name';
+    }
+
+    if ($match_type == 1) {
+      $match_type = 'Unique name';
+    }
+
+    if ($re_name and !$re_uname and strcmp($match_type, 'Unique name') == 0) {
+      form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
+    }
+
+    if (!$re_name and $re_uname and strcmp($match_type, 'Name') == 0) {
+      form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
+    }
+
+    // make sure if a relationship is specified that all fields are provided.
+    if (($rel_type or $parent_type) and !$re_subject) {
+      form_set_error('re_subject', t("Please provide a regular expression for the parent"));
+    }
+    if (($rel_type or $re_subject) and !$parent_type) {
+      form_set_error('parent_type', t("Please provide a SO term for the parent"));
+    }
+    if (($parent_type or $re_subject) and !$rel_type) {
+      form_set_error('rel_type', t("Please select a relationship type"));
+    }
+
+    // make sure if a database is specified that all fields are provided
+    if ($db_id and !$re_accession) {
+      form_set_error('re_accession', t("Please provide a regular expression for the accession"));
+    }
+    if ($re_accession and !$db_id) {
+      form_set_error('db_id', t("Please select a database"));
+    }
+
+    // check to make sure the types exists
+    $cvtermsql ="
+      SELECT CVT.cvterm_id
+      FROM {cvterm} CVT
+        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
+        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
+    ";
+    $cvterm = chado_query($cvtermsql,
+      array(':cvname' => 'sequence',':name' => $type,':synonym' => $type))->fetchObject();
+    if (!$cvterm) {
+      form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
+    }
+    if ($rel_type) {
+      $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $parent_type,
+        ':synonym' => $parent_type
+      ))->fetchObject();
+      if (!$cvterm) {
+        form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
+      }
+    }
+  }
+  /**
+   * @see TripalImporter::run()
+   */
+  public function run($details) {
+    $arguments = $details->arguments;
+
+    $dfile = $arguments['file_path'];
+    $organism_id = $arguments['organism_id'];
+    $type = $arguments['seqtype'];
+    $method = $arguments['method'];
+    $match_type = $arguments['match_type'];
+    $re_name = $arguments['re_name'];
+    $re_uname = $arguments['re_uname'];
+    $re_accession = $arguments['re_accession'];
+    $db_id = $arguments['db_id'];
+    $rel_type = $arguments['rel_type'];
+    $re_subject = $arguments['re_subject'];
+    $parent_type = $arguments['parent_type'];
+    $method = $arguments['method'];
+    $analysis_id = $arguments['analysis_id'];
+    $match_type = $arguments['match_type'];
+
+
+    if ($method == 0) {
+      $method = 'Insert only';
+    }
+    if ($method == 1) {
+      $method = 'Update only';
+    }
+    if ($method == 2) {
+      $method = 'Insert and update';
+    }
+
+    if ($match_type == 0) {
+      $match_type = 'Name';
+    }
+
+    if ($match_type == 1) {
+      $match_type = 'Unique name';
+    }
+
+    $this->loadFasta($dfile, $organism_id, $type, $re_name, $re_uname, $re_accession,
+      $db_id, $rel_type, $re_subject, $parent_type, $method, $analysis_id,
+      $match_type);
+  }
+  /**
+   * Load a fasta file.
+   *
+   * @param $dfile
+   *   The full path to the fasta file to load.
+   * @param $organism_id
+   *   The organism_id of the organism these features are from.
+   * @param $type
+   *   The type of features contained in the fasta file.
+   * @param $re_name
+   *   The regular expression to extract the feature.name from the fasta header.
+   * @param $re_uname
+   *   The regular expression to extract the feature.uniquename from the fasta
+   *   header.
+   * @param $re_accession
+   *   The regular expression to extract the accession of the feature.dbxref_id.
+   * @param $db_id
+   *   The database ID of the above accession.
+   * @param $rel_type
+   *   The type of relationship when creating a feature_relationship between
+   *   this feature (object) and an extracted subject.
+   * @param $re_subject
+   *   The regular expression to extract the uniquename of the feature to be
+   *   the subject of the above specified relationship.
+   * @param $parent_type
+   *   The type of the parent feature.
+   * @param $method
+   *   The method of feature adding. (ie: 'Insert only', 'Update only',
+   *   'Insert and update').
+   * @param $analysis_id
+   *   The analysis_id to associate the features in this fasta file with.
+   * @param $match_type
+   *  Whether to match existing features based on the 'Name' or 'Unique name'.
+   */
+  private function loadFasta($dfile, $organism_id, $type, $re_name, $re_uname, $re_accession,
+      $db_id, $rel_type, $re_subject, $parent_type, $method, $analysis_id, $match_type) {
+
+      // First get the type for this sequence.
+    $cvtermsql = "
+      SELECT CVT.cvterm_id
+      FROM {cvterm} CVT
+        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
+        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
+    ";
+    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $type,':synonym' => $type))->fetchObject();
+    if (!$cvterm) {
+      $this->logMessage("Cannot find the term type: '%type'", array('%type' => $type), TRIPAL_ERROR);
+      return 0;
+    }
+
+    // Second, if there is a parent type then get that.
+    if ($parent_type) {
+      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type,':synonym' => $parent_type))->fetchObject();
+      if (!$parentcvterm) {
+        $this->logMessage("Cannot find the paretne term type: '%type'", array(
+          '%type' => $parentcvterm
+        ), TRIPAL_ERROR);
+        return 0;
+      }
+    }
+
+    // Third, if there is a relationship type then get that.
+    if ($rel_type) {
+      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence',':name' => $rel_type,':synonym' => $rel_type))->fetchObject();
+      if (!$relcvterm) {
+        $this->logMessage("Cannot find the relationship term type: '%type'", array(
+          '%type' => $relcvterm
+        ), TRIPAL_ERROR);
+        return 0;
+      }
+    }
+
+    // We need to get the table schema to make sure we don't overrun the
+    // size of fields with what our regular expressions retrieve
+    $feature_tbl = chado_get_schema('feature');
+    $dbxref_tbl = chado_get_schema('dbxref');
+
+    $this->logMessage(t("Step 1: Finding sequences..."));
+    $filesize = filesize($dfile);
+    $fh = fopen($dfile, 'r');
+    if (!$fh) {
+      throw new Exception(t("Cannot open file: %dfile", array('%dfile' => $dfile)));
+    }
+    $num_read = 0;
+
+    // Iterate through the lines of the file. Keep a record for
+    // where in the file each line is at for later import.
+    $seqs = array();
+    $num_seqs = 0;
+    $prev_pos = 0;
+    $set_start = FALSE;
+    while ($line = fgets($fh)) {
+      $num_read += strlen($line);
+
+      // If we encounter a definition line then get the name, uniquename,
+      // accession and relationship subject from the definition line.
+      if (preg_match('/^>/', $line)) {
+
+        // Remove the > symbol from the defline.
+        $defline = preg_replace("/^>/", '', $line);
+
+        // Get the feature name if a regular expression is provided.
+        if ($re_name) {
+          if (!preg_match("/$re_name/", $defline, $matches)) {
+            $this->logMessage("Regular expression for the feature name finds nothing. Line %line.",
+              array('%line' => $i), TRIPAL_ERROR);
+          }
+          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
+            $this->logMessage("Regular expression retrieves a value too long for the feature name. Line %line.",
+              array('%line' => $i), TRIPAL_WARNING);
+          }
+          else {
+            $name = trim($matches[1]);
+          }
+        }
+
+        // If the match_type is name and no regular expression was provided
+        // then use the first word as the name, otherwise we don't set the name.
+        elseif (strcmp($match_type, 'Name') == 0) {
+          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
+            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
+              $this->logMessage("Regular expression retrieves a feature name too long for the feature name. Line %line.",
+                array('%line' => $i), TRIPAL_WARNING);
+            }
+            else {
+              $name = trim($matches[1]);
+            }
+          }
+          else {
+            $this->logMessage("Cannot find a feature name. Line %line.", array('%line' => $i), TRIPAL_WARNING);
+          }
+        }
+
+        // Get the feature uniquename if a regular expression is provided.
+        if ($re_uname) {
+          if (!preg_match("/$re_uname/", $defline, $matches)) {
+            $this->logMessage("Regular expression for the feature unique name finds nothing. Line %line.",
+              array('%line' => $i), TRIPAL_ERROR);
+          }
+          $uname = trim($matches[1]);
+        }
+        // If the match_type is name and no regular expression was provided
+        // then use the first word as the name, otherwise, we don't set the
+        // unqiuename.
+        elseif (strcmp($match_type, 'Unique name') == 0) {
+          if (preg_match("/^\s*(.*?)[\s\|].*$/", $defline, $matches)) {
+            $uname = trim($matches[1]);
+          }
+          else {
+            $this->logMessage("Cannot find a feature unique name. Line %line.",
+              array('%line' => $i), TRIPAL_ERROR);
+          }
+        }
+
+        // Get the accession if a regular expression is provided.
+        preg_match("/$re_accession/", $defline, $matches);
+        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
+          $this->logMessage("Regular expression retrieves an accession too long for the feature name. " .
+            "Cannot add cross reference. Line %line.", array('%line' => $i), TRIPAL_WARNING);
+        }
+        else {
+          $accession = trim($matches[1]);
+        }
+
+        // Get the relationship subject
+        preg_match("/$re_subject/", $line, $matches);
+        $subject = trim($matches[1]);
+
+        // Add the details to the sequence.
+        $seqs[$num_seqs] = array(
+          'name' => $name,
+          'uname' => $uname,
+          'accession' => $accession,
+          'subject' => $subject,
+          'seq_start' => ftell($fh)
+        );
+        $set_start = TRUE;
+        // If this isn't the first sequence, then we want to specify where
+        // the previous sequence ended.
+        if ($num_seqs > 0) {
+          $seqs[$num_seqs - 1]['seq_end'] = $prev_pos;
+        }
+        $num_seqs++;
+      }
+      // Keep the current file position so we can use it to set the sequence
+      // ending position
+      $prev_pos = ftell($fh);
+
+    }
+
+    // Set the end position for the last sequence.
+    $seqs[$num_seqs - 1]['seq_end'] = $num_read - strlen($line);
+
+    // Now that we know where the sequences are in the file we need to add them.
+    $this->logMessage("Step 2: Importing sequences...");
+    $this->setTotalItems($num_seqs);
+    for ($i = 0; $i < $num_seqs; $i++) {
+      $seq = $seqs[$i];
+      //$this->logMessage("  importing: @name", array('@name' => $seq['name']));
+      $this->loadFastaFeature($fh, $seq['name'], $seq['uname'], $db_id,
+          $seq['accession'], $seq['subject'], $rel_type, $parent_type,
+          $analysis_id, $organism_id, $cvterm, $source, $method, $re_name,
+          $match_type, $parentcvterm, $relcvterm, $seq['seq_start'],
+          $seq['seq_end']);
+      $this->setItemsHandled($i);
+    }
+    fclose($fh);
+    $this->setItemsHandled($i);
+  }
+
+  /**
+   * A helper function for loadFasta() to load a single feature
+   *
+   * @ingroup fasta_loader
+   */
+  private function loadFastaFeature($fh, $name, $uname, $db_id, $accession, $parent,
+      $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, $source, $method, $re_name,
+      $match_type, $parentcvterm, $relcvterm, $seq_start, $seq_end) {
+
+    // Check to see if this feature already exists if the match_type is 'Name'.
+    if (strcmp($match_type, 'Name') == 0) {
+      $values = array('organism_id' => $organism_id,'name' => $name,'type_id' => $cvterm->cvterm_id
+      );
+      $results = chado_select_record('feature', array('feature_id'
+      ), $values);
+      if (count($results) > 1) {
+        $this->logMessage("Multiple features exist with the name '%name' of type '%type' for the organism.  skipping",
+          array('%name' => $name,'%type' => $type), TRIPAL_ERROR);
+        return 0;
+      }
+      if (count($results) == 1) {
+        $feature = $results[0];
+      }
+    }
+
+    // Check if this feature already exists if the match_type is 'Unique Name'.
+    if (strcmp($match_type, 'Unique name') == 0) {
+      $values = array(
+        'organism_id' => $organism_id,
+        'uniquename' => $uname,
+        'type_id' => $cvterm->cvterm_id
+      );
+
+      $results = chado_select_record('feature', array('feature_id'), $values);
+      if (count($results) > 1) {
+        $this->logMessage("Multiple features exist with the name '%name' of type '%type' for the organism.  skipping",
+          array('%name' => $name,'%type' => $type), TRIPAL_WARNING);
+        return 0;
+      }
+      if (count($results) == 1) {
+        $feature = $results[0];
+      }
+
+      // If the feature exists but this is an "insert only" then skip.
+      if ($feature and (strcmp($method, 'Insert only') == 0)) {
+        $this->logMessage("Feature already exists '%name' ('%uname') while matching on %type. Skipping insert.",
+          array('%name' => $name,'%uname' => $uname,'%type' => drupal_strtolower($match_type)), TRIPAL_WARNING);
+        return 0;
+      }
+    }
+
+    // If we don't have a feature and we're doing an insert then do the insert.
+    $inserted = 0;
+    if (!$feature and (strcmp($method, 'Insert only') == 0 or strcmp($method, 'Insert and update') == 0)) {
+      // If we have a unique name but not a name then set them to be the same
+      if (!$uname) {
+        $uname = $name;
+      }
+      elseif (!$name) {
+        $name = $uname;
+      }
+
+      // Insert the feature record.
+      $values = array(
+        'organism_id' => $organism_id,
+        'name' => $name,
+        'uniquename' => $uname,
+        'type_id' => $cvterm->cvterm_id
+      );
+      $success = chado_insert_record('feature', $values);
+      if (!$success) {
+        $this->logMessage("Failed to insert feature '%name (%uname)'", array(
+          '%name' => $name,'%uname' => $numane), TRIPAL_ERROR);
+        return 0;
+      }
+
+      // now get the feature we just inserted
+      $values = array(
+        'organism_id' => $organism_id,
+        'uniquename' => $uname,
+        'type_id' => $cvterm->cvterm_id
+      );
+      $results = chado_select_record('feature', array('feature_id'), $values);
+      if (count($results) == 1) {
+        $inserted = 1;
+        $feature = $results[0];
+      }
+      else {
+        $this->logMessage("Failed to retreive newly inserted feature '%name (%uname)'", array(
+          '%name' => $name,'%uname' => $numane), TRIPAL_ERRORR);
+        return 0;
+      }
+
+      // Add the residues for this feature
+      $this->loadFastaResidues($fh, $feature->feature_id, $seq_start, $seq_end);
+    }
+
+    // if we don't have a feature and the user wants to do an update then fail
+    if (!$feature and (strcmp($method, 'Update only') == 0 or strcmp($method, 'Insert and update') == 0)) {
+      $this->logMessage("Failed to find feature '%name' ('%uname') while matching on " . drupal_strtolower($match_type) . ".",
+          array('%name' => $name,'%uname' => $uname), TRIPAL_ERROR);
+      return 0;
+    }
+
+    // if we do have a feature and this is an update then proceed with the update
+    if ($feature and !$inserted and (strcmp($method, 'Update only') == 0 or strcmp($method, 'Insert and update') == 0)) {
+
+      // if the user wants to match on the Name field
+      if (strcmp($match_type, 'Name') == 0) {
+
+        // if we're matching on the name but do not have a unique name then we
+        // don't want to update the uniquename.
+        $values = array();
+        if ($uname) {
+
+          // First check to make sure that by changing the unique name of this
+          // feature that we won't conflict with another existing feature of
+          // the same name
+          $values = array(
+            'organism_id' => $organism_id,
+            'uniquename' => $uname,
+            'type_id' => $cvterm->cvterm_id
+          );
+          $results = chado_select_record('feature', array('feature_id'), $values);
+          if (count($results) > 0) {
+            $this->logMessage("Cannot update the feature '%name' with a uniquename of '%uname' and type of '%type' as it " .
+              "conflicts with an existing feature with the same uniquename and type.",
+              array('%name' => $name,'%uname' => $uname,'%type' => $type), TRIPAL_ERROR);
+            return 0;
+          }
+
+          // the changes to the uniquename don't conflict so proceed with the update
+          $values = array('uniquename' => $uname);
+            $match = array(
+            'name' => $name,
+            'organism_id' => $organism_id,
+            'type_id' => $cvterm->cvterm_id
+          );
+
+          // perform the update
+          $success = chado_update_record('feature', $match, $values);
+          if (!$success) {
+            $this->logMessage("Failed to update feature '%name' ('%name')",
+              array('%name' => $name,'%uiname' => $uname), TRIPAL_ERROR);
+            return 0;
+          }
+        }
+      }
+
+      // If the user wants to match on the unique name field.
+      if (strcmp($match_type, 'Unique name') == 0) {
+        // If we're matching on the uniquename and have a new name then
+        // we want to update the name.
+        $values = array();
+        if ($name) {
+          $values = array('name' => $name);
+          $match = array(
+            'uniquename' => $uname,
+            'organism_id' => $organism_id,
+            'type_id' => $cvterm->cvterm_id
+          );
+          $success = chado_update_record('feature', $match, $values);
+          if (!$success) {
+           $this->logMessage("Failed to update feature '%name' ('%name')",
+             array('%name' => $name,'%uiname' => $uname), TRIPAL_ERROR);
+           return 0;
+          }
+        }
+      }
+    }
+
+    // Update the residues for this feature
+    $this->loadFastaResidues($fh, $feature->feature_id, $seq_start, $seq_end);
+
+    // add in the analysis link
+    if ($analysis_id) {
+      // if the association doens't alredy exist then add one
+      $values = array(
+          'analysis_id' => $analysis_id,
+          'feature_id' => $feature->feature_id
+      );
+      $results = chado_select_record('analysisfeature', array('analysisfeature_id'), $values);
+      if (count($results) == 0) {
+        $success = chado_insert_record('analysisfeature', $values);
+        if (!$success) {
+          $this->logMessage("Failed to associate analysis and feature '%name' ('%name')",
+            array('%name' => $name,'%uname' => $uname), TRIPAL_ERROR);
+          return 0;
+        }
+      }
+    }
+
+    // now add the database cross reference
+    if ($db_id) {
+      // check to see if this accession reference exists, if not add it
+      $values = array(
+      'db_id' => $db_id,
+          'accession' => $accession
+      );
+      $results = chado_select_record('dbxref', array('dbxref_id'), $values);
+
+      // if the accession doesn't exist then add it
+      if (count($results) == 0) {
+        $results = chado_insert_record('dbxref', $values);
+        if (!$results) {
+          $this->logMessage("Failed to add database accession '%accession'",
+            array('%accession' => $accession), TRIPAL_ERROR);
+            return 0;
+          }
+          $results = chado_select_record('dbxref', array('dbxref_id'), $values);
+          if (count($results) == 1) {
+            $dbxref = $results[0];
+          }
+        else {
+          $this->logMessage("Failed to retreive newly inserted dbxref '%name (%uname)'",
+            array('%name' => $name,'%uname' => $numane), TRIPAL_ERROR);
+            return 0;
+        }
+      }
+      else {
+        $dbxref = $results[0];
+      }
+
+      // check to see if the feature dbxref record exists if not, then add it
+      $values = array(
+        'feature_id' => $feature->feature_id,
+        'dbxref_id' => $dbxref->dbxref_id
+      );
+      $results = chado_select_record('feature_dbxref', array('feature_dbxref_id'), $values);
+      if (count($results) == 0) {
+        $success = chado_insert_record('feature_dbxref', $values);
+        if (!$success) {
+          $this->logMessage("Failed to add associate database accession '%accession' with feature",
+            array('%accession' => $accession), TRIPAL_ERROR);
+          return 0;
+        }
+      }
+    }
+
+    // now add in the relationship if one exists. If not, then add it
+    if ($rel_type) {
+      $values = array('organism_id' => $organism_id,'uniquename' => $parent, 'type_id' => $parentcvterm->cvterm_id);
+      $results = chado_select_record('feature', array('feature_id'), $values);
+      if (count($results) != 1) {
+        $this->logMessage("Cannot find a unique fature for the parent '%parent' of type '%type' for the feature.",
+          array('%parent' => $parent,'%type' => $parent_type), TRIPAL_ERROR);
+          return 0;
+      }
+      $parent_feature = $results[0];
+
+      // check to see if the relationship already exists if not then add it
+      $values = array(
+        'subject_id' => $feature->feature_id,
+        'object_id' => $parent_feature->feature_id,
+        'type_id' => $relcvterm->cvterm_id
+      );
+      $results = chado_select_record('feature_relationship', array('feature_relationship_id'), $values);
+      if (count($results) == 0) {
+        $success = chado_insert_record('feature_relationship', $values);
+        if (!$success) {
+          $this->logMessage("Failed to add associate database accession '%accession' with feature",
+            array('%accession' => $accession), TRIPAL_ERROR);
+          return 0;
+        }
+      }
+    }
+  }
+
+  /**
+   * Adds the residues column to the feature.
+   *
+   * This function seeks to the proper location in the file for the sequence
+   * and reads in chunks of sequence and appends them to the feature.residues
+   * column in the database.
+   *
+   * @param $fh
+   * @param $feature_id
+   * @param $seq_start
+   * @param $seq_end
+   */
+  private function loadFastaResidues($fh, $feature_id, $seq_start, $seq_end) {
+
+    // First position the file at the beginning of the sequence
+    fseek($fh, $seq_start, SEEK_SET);
+    $chunk_size = 100000000;
+    $chunk = '';
+    $seqlen = ($seq_end - $seq_start) + 1;
+
+    $num_read = 0;
+    $total_seq_size = 0;
+
+    // First, make sure we don't have a null in the residues
+    $sql = "UPDATE {feature} SET residues = '' WHERE feature_id = :feature_id";
+    chado_query($sql, array(':feature_id' => $feature_id));
+
+    // Read in the lines until we reach the end of the sequence. Once we
+    // get a specific bytes read then append the sequence to the one in the
+    // database.
+    while ($line = fgets($fh)) {
+      $num_read += strlen($line) + 1;
+      $chunk_intv_read += strlen($line) + 1;
+      $this->addItemsHandled($num_read);
+      $chunk .= trim($line);
+
+      // If we've read in enough of the sequence then append it to the database.
+      if ($chunk_intv_read >= $chunk_size) {
+        $sql = "
+          UPDATE {feature}
+          SET residues = residues || :chunk
+          WHERE feature_id = :feature_id
+        ";
+        $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk));
+        if (!$success) {
+          return FALSE;
+        }
+        $total_seq_size += strlen($chunk);
+        $chunk = '';
+        $chunk_intv_read = 0;
+      }
+
+      // If we've reached the ned of the sequence then break out of the loop
+      if (ftell($fh) == $seq_end) {
+        break;
+      }
+    }
+
+    // write the last bit of sequence if it remains
+    if (strlen($chunk) > 0) {
+      $sql = "
+          UPDATE {feature}
+          SET residues = residues || :chunk
+          WHERE feature_id = :feature_id
+        ";
+      $success = chado_query($sql, array(':feature_id' => $feature_id,':chunk' => $chunk));
+      if (!$success) {
+        return FALSE;
+      }
+      $total_seq_size += strlen($chunk);
+      $chunk = '';
+      $chunk_intv_read = 0;
+    }
+
+    // Now update the seqlen and md5checksum fields
+    $sql = "UPDATE {feature} SET seqlen = char_length(residues),  md5checksum = md5(residues) WHERE feature_id = :feature_id";
+    chado_query($sql, array(':feature_id' => $feature_id));
+
+  }
+}

File diff suppressed because it is too large
+ 516 - 607
tripal_chado/includes/TripalImporter/GFF3Importer.inc


+ 0 - 3
tripal_chado/includes/setup/tripal_chado.setup.inc

@@ -140,9 +140,6 @@ function tripal_chado_prepare_chado() {
     // We want to force the version of Chado to be set properly.
     $real_version = chado_get_version(TRUE);
 
-    // Create the temp table we will use for loading OBO files.
-    tripal_chado_add_tripal_obo_temp_table();
-
     // Import commonly used ontologies if needed.
     tripal_chado_load_ontologies();
 

+ 10 - 10
tripal_chado/tripal_chado.module

@@ -357,16 +357,16 @@ function tripal_chado_menu() {
     'type' => MENU_NORMAL_ITEM,
     'weight' => 6
   );
-  $items['admin/tripal/loaders/fasta_loader'] = array(
-    'title' => 'Chado FASTA File Loader',
-    'description' => 'Load sequences from a multi-FASTA file into Chado',
-    'page callback' => 'drupal_get_form',
-    'page arguments' => array('tripal_feature_fasta_load_form'),
-    'access arguments' => array('administer tripal'),
-    'file' => 'includes/loaders/tripal_chado.fasta_loader.inc',
-    'file path' => drupal_get_path('module', 'tripal_chado'),
-    'type' => MENU_NORMAL_ITEM,
-  );
+//   $items['admin/tripal/loaders/fasta_loader'] = array(
+//     'title' => 'Chado FASTA File Loader',
+//     'description' => 'Load sequences from a multi-FASTA file into Chado',
+//     'page callback' => 'drupal_get_form',
+//     'page arguments' => array('tripal_feature_fasta_load_form'),
+//     'access arguments' => array('administer tripal'),
+//     'file' => 'includes/loaders/tripal_chado.fasta_loader.inc',
+//     'file path' => drupal_get_path('module', 'tripal_chado'),
+//     'type' => MENU_NORMAL_ITEM,
+//   );
 //   $items['admin/tripal/loaders/gff3_load'] = array(
 //     'title' => 'Chado GFF3 File Loader',
 //     'description' => 'Import a GFF3 file into Chado',

Some files were not shown because too many files changed in this diff