Quellcode durchsuchen

Fixed bug in fasta loader and fixed markup elements in forms

Stephen Ficklin vor 11 Jahren
Ursprung
Commit
7e68ffabb9

+ 3 - 5
tripal_feature/includes/tripal_feature.admin.inc

@@ -72,11 +72,10 @@ function tripal_feature_admin() {
   //      $allowedoptions ['allow_feature_browser'] = "Allow loading of the feature browsing through AJAX. For large sites the initial page load will be quick with the feature browser loading afterwards.";
 
     $form['browser']['browser_desc'] = array(
-       '#type'        => 'markup',
-       '#value' => 'A feature browser can be added to an organism page to allow users to quickly ' .
+       '#markup' => t('A feature browser can be added to an organism page to allow users to quickly ' .
           'access a feature.  This will most likely not be the ideal mechanism for accessing feature ' .
           'information, especially for large sites, but it will alow users exploring the site (such ' .
-          'as students) to better understand the data types available on the site.',
+          'as students) to better understand the data types available on the site.'),
 
     );
     $form['browser']['feature_types'] = array(
@@ -309,8 +308,7 @@ function get_tripal_feature_admin_form_title_set(&$form) {
     '#collapsed' => FALSE,
   );
   $form['title']['desc'] = array(
-    '#type'        => 'markup',
-    '#value' => t('Each synced feature must have a unique page title, however, features
+    '#markup' => t('Each synced feature must have a unique page title, however, features
                    may have the same name if they are of different types or from
                    different organisms.  Therefore, we must be sure that the
                    page titles can uniquely identify the feature being viewed.  Select

+ 1 - 2
tripal_feature/includes/tripal_feature.delete.inc

@@ -19,8 +19,7 @@ function tripal_feature_delete_form() {
     $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
   }
   $form['desc'] = array(
-    '#type' => 'markup',
-    '#value' => t("Use one or more of the following fields to identify sets of features to be deleted."),
+    '#markup' => t("Use one or more of the following fields to identify sets of features to be deleted."),
   );
 
   $form['feature_names']= array(

+ 158 - 157
tripal_feature/includes/tripal_feature.fasta_loader.inc

@@ -47,11 +47,17 @@ function tripal_feature_fasta_load_form() {
    '#options'     => $organisms,
   );
 
+  // get the sequence ontology CV ID
+  $values = array('name' => 'sequence');
+  $cv = chado_select_record('cv', array('cv_id'), $values);
+  $cv_id = $cv[0]->cv_id;
+  
   $form['seqtype']= array(
     '#type' => 'textfield',
     '#title' => t('Sequence Type'),
     '#required' => TRUE,
     '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, protein, etc...)'),
+    '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv_id",
   );
 
 
@@ -120,8 +126,7 @@ function tripal_feature_fasta_load_form() {
     '#collapsed' => TRUE
   );
   $form['analysis']['desc'] = array(
-    '#type' => 'markup',
-    '#value' => t("Why specify an analysis for a data load?  All data comes
+    '#markup' => t("Why specify an analysis for a data load?  All data comes
        from some place, even if downloaded from Genbank. By specifying
        analysis details for all data uploads, it allows an end user to reproduce the
        data set, but at least indicates the source of the data."),
@@ -272,19 +277,18 @@ function tripal_feature_fasta_load_form() {
  * @ingroup fasta_loader
  */
 function tripal_feature_fasta_load_form_validate($form, &$form_state) {
-  $fasta_file = trim($form_state['values']['fasta_file']);
+  $fasta_file   = trim($form_state['values']['fasta_file']);
   $organism_id  = $form_state['values']['organism_id'];
   $type         = trim($form_state['values']['seqtype']);
   $method       = trim($form_state['values']['method']);
   $match_type   = trim($form_state['values']['match_type']);
-  $library_id   = $form_state['values']['library_id'];
   $re_name      = trim($form_state['values']['re_name']);
   $re_uname     = trim($form_state['values']['re_uname']);
   $re_accession = trim($form_state['values']['re_accession']);
   $db_id        = $form_state['values']['db_id'];
   $rel_type     = $form_state['values']['rel_type'];
   $re_subject   = trim($form_state['values']['re_subject']);
-  $parent_type   = trim($form_state['values']['parent_type']);
+  $parent_type  = trim($form_state['values']['parent_type']);
 
   if ($method == 0) {
     $method = 'Insert only';
@@ -379,7 +383,6 @@ function tripal_feature_fasta_load_form_submit($form, &$form_state) {
   $type         = trim($form_state['values']['seqtype']);
   $method       = trim($form_state['values']['method']);
   $match_type   = trim($form_state['values']['match_type']);
-  $library_id   = $form_state['values']['library_id'];
   $re_name      = trim($form_state['values']['re_name']);
   $re_uname     = trim($form_state['values']['re_uname']);
   $re_accession = trim($form_state['values']['re_accession']);
@@ -461,179 +464,177 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
   $re_subject, $parent_type, $method, $uid, $analysis_id,
   $match_type, $job = NULL) {
 
-  // begin the transaction
-  $connection = tripal_db_start_transaction();
-
-  // if we cannot get a connection then let the user know the loading will be slow
-  if (!$connection) {
-     print "A persistant connection was not obtained. Loading will be slow\n";
-  }
-  else {
-     print "\nNOTE: Loading of this FASTA file is performed using a database transaction. \n" .
-           "If the load fails or is terminated prematurely then the entire set of \n" .
-           "insertions/updates is rolled back and will not be found in the database\n\n";
-  }
-
-  // first get the type for this sequence
-  $cvtermsql = "SELECT CVT.cvterm_id
-               FROM {cvterm} CVT
-                  INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-                  LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
-               WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)";
-  $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject();
-  if (!$cvterm) {
-    tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type));
-    return 0;
-  }
-  if ($parent_type) {
-    $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject();
-    if (!$parentcvterm) {
-      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm));
+  $transaction = db_transaction();
+  print "\nNOTE: Loading of this GFF file is performed using a database transaction. \n" .
+       "If the load fails or is terminated prematurely then the entire set of \n" .
+       "insertions/updates is rolled back and will not be found in the database\n\n";
+  try {
+
+    // first get the type for this sequence
+    $cvtermsql = "SELECT CVT.cvterm_id
+                 FROM {cvterm} CVT
+                    INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
+                    LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+                 WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)";
+    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject();
+    if (!$cvterm) {
+      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type));
       return 0;
     }
-  }
-  if ($rel_type) {
-    $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject();
-    if (!$relcvterm) {
-      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm));
-      return 0;
+    if ($parent_type) {
+      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject();
+      if (!$parentcvterm) {
+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm));
+        return 0;
+      }
     }
-  }
-
-  print "Opening FASTA file $dfile\n";
-
-  //$lines = file($dfile, FILE_SKIP_EMPTY_LINES);
-  $fh = fopen($dfile, 'r');
-  if (!$fh) {
-    tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile));
-    return 0;
-  }
-  $filesize = filesize($dfile);
-  $i = 0;
-
-  $name = '';
-  $uname = '';
-  $residues = '';
-  $interval = intval($filesize * 0.01);
-  if ($interval < 1) {
-    $interval = 1;
-  }
-  $inv_read = 0;
-
-  // we need to get the table schema to make sure we don't overrun the
-  // size of fields with what our regular expressions retrieve
-  $feature_tbl = chado_get_schema('feature');
-  $dbxref_tbl = chado_get_schema('dbxref');
-
-  //foreach ($lines as $line_num => $line) {
-  while ($line = fgets($fh)) {
-    $i++;  // update the line count
-    $num_read += drupal_strlen($line);
-    $intv_read += drupal_strlen($line);
-
-    // if we encounter a definition line then get the name, uniquename,
-    // accession and relationship subject from the definition line
-    if (preg_match('/^>/', $line)) {
-      // if we have a feature name then we are starting a new sequence
-      // so lets handle the previous one before moving on
-      if ($name or $uname) {
-        tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
-          $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
-          $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
-        $residues = '';
-        $name = '';
-        $uname = '';
+    if ($rel_type) {
+      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject();
+      if (!$relcvterm) {
+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm));
+        return 0;
       }
-
-      $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline
-
-      // get the feature name
-      if ($re_name) {
-        if (!preg_match("/$re_name/", $line, $matches)) {
-          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error');
+    }
+  
+    print "Opening FASTA file $dfile\n";
+  
+    //$lines = file($dfile, FILE_SKIP_EMPTY_LINES);
+    $fh = fopen($dfile, 'r');
+    if (!$fh) {
+      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile));
+      return 0;
+    }
+    $filesize = filesize($dfile);
+    $i = 0;
+  
+    $name = '';
+    $uname = '';
+    $residues = '';
+    $interval = intval($filesize * 0.01);
+    if ($interval < 1) {
+      $interval = 1;
+    }
+    $inv_read = 0;
+  
+    // we need to get the table schema to make sure we don't overrun the
+    // size of fields with what our regular expressions retrieve
+    $feature_tbl = chado_get_schema('feature');
+    $dbxref_tbl = chado_get_schema('dbxref');
+  
+    //foreach ($lines as $line_num => $line) {
+    while ($line = fgets($fh)) {
+      $i++;  // update the line count
+      $num_read += drupal_strlen($line);
+      $intv_read += drupal_strlen($line);
+  
+      // if we encounter a definition line then get the name, uniquename,
+      // accession and relationship subject from the definition line
+      if (preg_match('/^>/', $line)) {
+        // if we have a feature name then we are starting a new sequence
+        // so lets handle the previous one before moving on
+        if ($name or $uname) {
+          tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
+            $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
+            $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
+          $residues = '';
+          $name = '';
+          $uname = '';
         }
-        elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
-          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error');
+  
+        $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline
+  
+        // get the feature name
+        if ($re_name) {
+          if (!preg_match("/$re_name/", $line, $matches)) {
+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error');
+          }
+          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
+            tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error');
+          }
+          else {
+            $name = trim($matches[1]);
+          }
         }
         else {
-          $name = trim($matches[1]);
-        }
-      }
-      else {
-        // if the match_type is name and no regular expression was provided
-        // then use the first word as the name, otherwise we don't set the name
-        if (strcmp($match_type, 'Name')==0) {
-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
-            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
-              tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error');
+          // if the match_type is name and no regular expression was provided
+          // then use the first word as the name, otherwise we don't set the name
+          if (strcmp($match_type, 'Name')==0) {
+            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
+              if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
+                tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error');
+              }
+              else {
+                $name = trim($matches[1]);
+              }
             }
             else {
-              $name = trim($matches[1]);
+              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error');
             }
           }
-          else {
-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error');
+        }
+  
+        // get the feature unique name
+        if ($re_uname) {
+          if (!preg_match("/$re_uname/", $line, $matches)) {
+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error');
           }
+          $uname = trim($matches[1]);
         }
-      }
-
-      // get the feature unique name
-      if ($re_uname) {
-        if (!preg_match("/$re_uname/", $line, $matches)) {
-          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error');
+        else {
+          // if the match_type is name and no regular expression was provided
+          // then use the first word as the name, otherwise, we don't set the unqiuename
+          if (strcmp($match_type, 'Unique name')==0) {
+            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
+              $uname = trim($matches[1]);
+            }
+            else {
+              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error');
+            }
+          }
         }
-        $uname = trim($matches[1]);
+        // get the accession
+        preg_match("/$re_accession/", $line, $matches);
+        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
+          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning');
+        }
+        else {
+          $accession = trim($matches[1]);
+        }
+  
+        // get the relationship subject
+        preg_match("/$re_subject/", $line, $matches);
+        $subject = trim($matches[1]);
       }
       else {
-        // if the match_type is name and no regular expression was provided
-        // then use the first word as the name, otherwise, we don't set the unqiuename
-        if (strcmp($match_type, 'Unique name')==0) {
-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
-            $uname = trim($matches[1]);
+        $residues .= trim($line);
+  
+        // update the job status every % features
+        if ($job and $intv_read >= $interval) {
+          $intv_read = 0;
+          $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
+          if ($name) {
+            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r";
           }
           else {
-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error');
+            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r";
           }
+          tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
         }
       }
-      // get the accession
-      preg_match("/$re_accession/", $line, $matches);
-      if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
-        tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning');
-      }
-      else {
-        $accession = trim($matches[1]);
-      }
-
-      // get the relationship subject
-      preg_match("/$re_subject/", $line, $matches);
-      $subject = trim($matches[1]);
-    }
-    else {
-      $residues .= trim($line);
-
-      // update the job status every % features
-      if ($job and $intv_read >= $interval) {
-        $intv_read = 0;
-        $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
-        if ($name) {
-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r";
-        }
-        else {
-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r";
-        }
-        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
-      }
     }
-  }
-
-  // now load the last sequence in the file
-  tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
-    $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
-    $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
-
-  // commit the transaction
-  tripal_db_commit_transaction();
+  
+    // now load the last sequence in the file
+    tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
+      $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
+      $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
+  }
+  catch (Exception $e) {
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('T_fasta_loader', $e);
+    $transaction->rollback();
+    print "FAILED: Rolling back database changes...\n";
+  }
+  
   print "\nDone\n";
 }
 

+ 5 - 5
tripal_feature/includes/tripal_feature.gff_loader.inc

@@ -122,6 +122,7 @@ function tripal_feature_gff3_load_form() {
     '#description' => t('Existing features will be updated and new features will be added.  Attributes
                          for a feature that are not present in the GFF but which are present in the
                          database will not be altered.'),
+    '#default_value' => 1,
   );
   $form['import_options']['refresh']= array(
     '#type' => 'checkbox',
@@ -154,8 +155,7 @@ function tripal_feature_gff3_load_form() {
     '#collapsed' => TRUE
   );
   $form['targets']['adesc'] = array(
-    '#type' => 'markup',
-    '#value' => t("When alignments are represented in the GFF file (e.g. such as
+    '#markup' => t("When alignments are represented in the GFF file (e.g. such as
        alignments of cDNA sequences to a whole genome, or blast matches), they are
        represented using two feature types: 'match' (or cDNA_match, EST_match, etc.)
        and 'match_part'.  These features may also have a 'Target' attribute to
@@ -375,9 +375,9 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     // default is the 'sequence' ontology
     $sql = "SELECT * FROM {cv} WHERE name = :cvname";
     $cv = chado_query($sql, array(':cvname' => 'sequence'))->fetchObject();
-    if (!$tripal_core_report_error) {
-      tripal_report_error('T_gff3_loader', TRIPAL_ERROR, "Cannot find the 'sequence' ontology",
-        array());
+    if (!$cv) {
+      tripal_report_error('T_gff3_loader', TRIPAL_ERROR, 
+        "Cannot find the 'sequence' ontology", array());
       return '';
     }
 

+ 2 - 4
tripal_feature/includes/tripal_feature.seq_extract.inc

@@ -98,8 +98,7 @@ function theme_tripal_feature_seq_extract_form($form) {
 
   $markup .= $table;
   $form['criteria'] = array(
-    '#type' => 'markup',
-    '#value' =>  $markup,
+    '#markup' =>  $markup,
     '#weight' => -10,
   );
   return drupal_render($form);
@@ -167,8 +166,7 @@ function tripal_feature_seq_extract_form(&$form_state = NULL) {
   $form['#method'] = 'POST';
 
   $form['description'] = array(
-    '#type' => 'markup',
-    '#value' => t('Use this form to retrieve sequences in FASTA format.')
+    '#markup' => t('Use this form to retrieve sequences in FASTA format.')
   );
 
   $sql = "

+ 1 - 2
tripal_stock/includes/tripal_stock.admin.inc

@@ -59,8 +59,7 @@ function tripal_stock_admin() {
     '#collapsed' => FALSE,
   );
   $form['title']['desc'] = array(
-    '#type'  => 'markup',
-    '#value' => t('Each synced stock must have a unique page title, however, stocks
+    '#markup' => t('Each synced stock must have a unique page title, however, stocks
       may have the same name if they are of different types or from different
       organisms.  Therefore, we must be sure that the page titles can uniquely
       identify the stock being viewed.  Select an option below that will