| 
					
				 | 
			
			
				@@ -47,11 +47,17 @@ function tripal_feature_fasta_load_form() { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    '#options'     => $organisms, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   ); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  // get the sequence ontology CV ID 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $values = array('name' => 'sequence'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $cv = chado_select_record('cv', array('cv_id'), $values); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $cv_id = $cv[0]->cv_id; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $form['seqtype']= array( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     '#type' => 'textfield', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     '#title' => t('Sequence Type'), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     '#required' => TRUE, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, protein, etc...)'), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv_id", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   ); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -120,8 +126,7 @@ function tripal_feature_fasta_load_form() { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     '#collapsed' => TRUE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   ); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $form['analysis']['desc'] = array( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    '#type' => 'markup', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    '#value' => t("Why specify an analysis for a data load?  All data comes 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '#markup' => t("Why specify an analysis for a data load?  All data comes 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        from some place, even if downloaded from Genbank. By specifying 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        analysis details for all data uploads, it allows an end user to reproduce the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        data set, but at least indicates the source of the data."), 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -272,19 +277,18 @@ function tripal_feature_fasta_load_form() { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  * @ingroup fasta_loader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 function tripal_feature_fasta_load_form_validate($form, &$form_state) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $fasta_file = trim($form_state['values']['fasta_file']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $fasta_file   = trim($form_state['values']['fasta_file']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $organism_id  = $form_state['values']['organism_id']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $type         = trim($form_state['values']['seqtype']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $method       = trim($form_state['values']['method']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $match_type   = trim($form_state['values']['match_type']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $library_id   = $form_state['values']['library_id']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_name      = trim($form_state['values']['re_name']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_uname     = trim($form_state['values']['re_uname']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_accession = trim($form_state['values']['re_accession']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $db_id        = $form_state['values']['db_id']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $rel_type     = $form_state['values']['rel_type']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_subject   = trim($form_state['values']['re_subject']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $parent_type   = trim($form_state['values']['parent_type']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $parent_type  = trim($form_state['values']['parent_type']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   if ($method == 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     $method = 'Insert only'; 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -379,7 +383,6 @@ function tripal_feature_fasta_load_form_submit($form, &$form_state) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $type         = trim($form_state['values']['seqtype']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $method       = trim($form_state['values']['method']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $match_type   = trim($form_state['values']['match_type']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $library_id   = $form_state['values']['library_id']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_name      = trim($form_state['values']['re_name']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_uname     = trim($form_state['values']['re_uname']); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_accession = trim($form_state['values']['re_accession']); 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -461,179 +464,177 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $re_subject, $parent_type, $method, $uid, $analysis_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $match_type, $job = NULL) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // begin the transaction 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $connection = tripal_db_start_transaction(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // if we cannot get a connection then let the user know the loading will be slow 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if (!$connection) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     print "A persistant connection was not obtained. Loading will be slow\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     print "\nNOTE: Loading of this FASTA file is performed using a database transaction. \n" . 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           "If the load fails or is terminated prematurely then the entire set of \n" . 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           "insertions/updates is rolled back and will not be found in the database\n\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // first get the type for this sequence 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $cvtermsql = "SELECT CVT.cvterm_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-               FROM {cvterm} CVT 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-               WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if (!$cvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if ($parent_type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if (!$parentcvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $transaction = db_transaction(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  print "\nNOTE: Loading of this GFF file is performed using a database transaction. \n" . 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       "If the load fails or is terminated prematurely then the entire set of \n" . 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       "insertions/updates is rolled back and will not be found in the database\n\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  try { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // first get the type for this sequence 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $cvtermsql = "SELECT CVT.cvterm_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                 FROM {cvterm} CVT 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                 WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!$cvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if ($rel_type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if (!$relcvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if ($parent_type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      if (!$parentcvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  print "Opening FASTA file $dfile\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  //$lines = file($dfile, FILE_SKIP_EMPTY_LINES); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $fh = fopen($dfile, 'r'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if (!$fh) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $filesize = filesize($dfile); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $i = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $name = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $uname = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $residues = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $interval = intval($filesize * 0.01); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  if ($interval < 1) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $interval = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $inv_read = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // we need to get the table schema to make sure we don't overrun the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // size of fields with what our regular expressions retrieve 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $feature_tbl = chado_get_schema('feature'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $dbxref_tbl = chado_get_schema('dbxref'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  //foreach ($lines as $line_num => $line) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  while ($line = fgets($fh)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $i++;  // update the line count 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $num_read += drupal_strlen($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $intv_read += drupal_strlen($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // if we encounter a definition line then get the name, uniquename, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // accession and relationship subject from the definition line 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if (preg_match('/^>/', $line)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // if we have a feature name then we are starting a new sequence 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // so lets handle the previous one before moving on 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if ($name or $uname) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $residues = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $name = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $uname = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if ($rel_type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      if (!$relcvterm) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // get the feature name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if ($re_name) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (!preg_match("/$re_name/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print "Opening FASTA file $dfile\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    //$lines = file($dfile, FILE_SKIP_EMPTY_LINES); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $fh = fopen($dfile, 'r'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!$fh) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $filesize = filesize($dfile); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $i = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $name = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $uname = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $residues = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $interval = intval($filesize * 0.01); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if ($interval < 1) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $interval = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $inv_read = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // we need to get the table schema to make sure we don't overrun the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // size of fields with what our regular expressions retrieve 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $feature_tbl = chado_get_schema('feature'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $dbxref_tbl = chado_get_schema('dbxref'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    //foreach ($lines as $line_num => $line) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    while ($line = fgets($fh)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $i++;  // update the line count 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $num_read += drupal_strlen($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $intv_read += drupal_strlen($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      // if we encounter a definition line then get the name, uniquename, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      // accession and relationship subject from the definition line 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      if (preg_match('/^>/', $line)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // if we have a feature name then we are starting a new sequence 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // so lets handle the previous one before moving on 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if ($name or $uname) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $residues = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $name = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $uname = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // get the feature name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if ($re_name) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          if (!preg_match("/$re_name/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            $name = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $name = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        // if the match_type is name and no regular expression was provided 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        // then use the first word as the name, otherwise we don't set the name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (strcmp($match_type, 'Name')==0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          // if the match_type is name and no regular expression was provided 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          // then use the first word as the name, otherwise we don't set the name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          if (strcmp($match_type, 'Name')==0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                $name = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              $name = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // get the feature unique name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if ($re_uname) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          if (!preg_match("/$re_uname/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $uname = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // get the feature unique name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if ($re_uname) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (!preg_match("/$re_uname/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          // if the match_type is name and no regular expression was provided 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          // then use the first word as the name, otherwise, we don't set the unqiuename 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          if (strcmp($match_type, 'Unique name')==0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              $uname = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $uname = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // get the accession 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        preg_match("/$re_accession/", $line, $matches); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $accession = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // get the relationship subject 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        preg_match("/$re_subject/", $line, $matches); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $subject = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        // if the match_type is name and no regular expression was provided 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        // then use the first word as the name, otherwise, we don't set the unqiuename 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (strcmp($match_type, 'Unique name')==0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            $uname = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $residues .= trim($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // update the job status every % features 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if ($job and $intv_read >= $interval) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $intv_read = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          $percent = sprintf("%.2f", ($num_read / $filesize) * 100); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          if ($name) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          tripal_set_job_progress($job, intval(($num_read / $filesize) * 100)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // get the accession 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      preg_match("/$re_accession/", $line, $matches); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $accession = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // get the relationship subject 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      preg_match("/$re_subject/", $line, $matches); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      $subject = trim($matches[1]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      $residues .= trim($line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // update the job status every % features 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if ($job and $intv_read >= $interval) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $intv_read = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $percent = sprintf("%.2f", ($num_read / $filesize) * 100); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if ($name) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // now load the last sequence in the file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // commit the transaction 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  tripal_db_commit_transaction(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // now load the last sequence in the file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  catch (Exception $e) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print "\n"; // make sure we start errors on new line 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    watchdog_exception('T_fasta_loader', $e); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $transaction->rollback(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print "FAILED: Rolling back database changes...\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   print "\nDone\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |