|  | @@ -47,11 +47,17 @@ function tripal_feature_fasta_load_form() {
 | 
	
		
			
				|  |  |     '#options'     => $organisms,
 | 
	
		
			
				|  |  |    );
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +  // get the sequence ontology CV ID
 | 
	
		
			
				|  |  | +  $values = array('name' => 'sequence');
 | 
	
		
			
				|  |  | +  $cv = chado_select_record('cv', array('cv_id'), $values);
 | 
	
		
			
				|  |  | +  $cv_id = $cv[0]->cv_id;
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  |    $form['seqtype']= array(
 | 
	
		
			
				|  |  |      '#type' => 'textfield',
 | 
	
		
			
				|  |  |      '#title' => t('Sequence Type'),
 | 
	
		
			
				|  |  |      '#required' => TRUE,
 | 
	
		
			
				|  |  |      '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, protein, etc...)'),
 | 
	
		
			
				|  |  | +    '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv_id",
 | 
	
		
			
				|  |  |    );
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -120,8 +126,7 @@ function tripal_feature_fasta_load_form() {
 | 
	
		
			
				|  |  |      '#collapsed' => TRUE
 | 
	
		
			
				|  |  |    );
 | 
	
		
			
				|  |  |    $form['analysis']['desc'] = array(
 | 
	
		
			
				|  |  | -    '#type' => 'markup',
 | 
	
		
			
				|  |  | -    '#value' => t("Why specify an analysis for a data load?  All data comes
 | 
	
		
			
				|  |  | +    '#markup' => t("Why specify an analysis for a data load?  All data comes
 | 
	
		
			
				|  |  |         from some place, even if downloaded from Genbank. By specifying
 | 
	
		
			
				|  |  |         analysis details for all data uploads, it allows an end user to reproduce the
 | 
	
		
			
				|  |  |         data set, but at least indicates the source of the data."),
 | 
	
	
		
			
				|  | @@ -272,19 +277,18 @@ function tripal_feature_fasta_load_form() {
 | 
	
		
			
				|  |  |   * @ingroup fasta_loader
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  |  function tripal_feature_fasta_load_form_validate($form, &$form_state) {
 | 
	
		
			
				|  |  | -  $fasta_file = trim($form_state['values']['fasta_file']);
 | 
	
		
			
				|  |  | +  $fasta_file   = trim($form_state['values']['fasta_file']);
 | 
	
		
			
				|  |  |    $organism_id  = $form_state['values']['organism_id'];
 | 
	
		
			
				|  |  |    $type         = trim($form_state['values']['seqtype']);
 | 
	
		
			
				|  |  |    $method       = trim($form_state['values']['method']);
 | 
	
		
			
				|  |  |    $match_type   = trim($form_state['values']['match_type']);
 | 
	
		
			
				|  |  | -  $library_id   = $form_state['values']['library_id'];
 | 
	
		
			
				|  |  |    $re_name      = trim($form_state['values']['re_name']);
 | 
	
		
			
				|  |  |    $re_uname     = trim($form_state['values']['re_uname']);
 | 
	
		
			
				|  |  |    $re_accession = trim($form_state['values']['re_accession']);
 | 
	
		
			
				|  |  |    $db_id        = $form_state['values']['db_id'];
 | 
	
		
			
				|  |  |    $rel_type     = $form_state['values']['rel_type'];
 | 
	
		
			
				|  |  |    $re_subject   = trim($form_state['values']['re_subject']);
 | 
	
		
			
				|  |  | -  $parent_type   = trim($form_state['values']['parent_type']);
 | 
	
		
			
				|  |  | +  $parent_type  = trim($form_state['values']['parent_type']);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    if ($method == 0) {
 | 
	
		
			
				|  |  |      $method = 'Insert only';
 | 
	
	
		
			
				|  | @@ -379,7 +383,6 @@ function tripal_feature_fasta_load_form_submit($form, &$form_state) {
 | 
	
		
			
				|  |  |    $type         = trim($form_state['values']['seqtype']);
 | 
	
		
			
				|  |  |    $method       = trim($form_state['values']['method']);
 | 
	
		
			
				|  |  |    $match_type   = trim($form_state['values']['match_type']);
 | 
	
		
			
				|  |  | -  $library_id   = $form_state['values']['library_id'];
 | 
	
		
			
				|  |  |    $re_name      = trim($form_state['values']['re_name']);
 | 
	
		
			
				|  |  |    $re_uname     = trim($form_state['values']['re_uname']);
 | 
	
		
			
				|  |  |    $re_accession = trim($form_state['values']['re_accession']);
 | 
	
	
		
			
				|  | @@ -461,179 +464,177 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
 | 
	
		
			
				|  |  |    $re_subject, $parent_type, $method, $uid, $analysis_id,
 | 
	
		
			
				|  |  |    $match_type, $job = NULL) {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  // begin the transaction
 | 
	
		
			
				|  |  | -  $connection = tripal_db_start_transaction();
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // if we cannot get a connection then let the user know the loading will be slow
 | 
	
		
			
				|  |  | -  if (!$connection) {
 | 
	
		
			
				|  |  | -     print "A persistant connection was not obtained. Loading will be slow\n";
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -  else {
 | 
	
		
			
				|  |  | -     print "\nNOTE: Loading of this FASTA file is performed using a database transaction. \n" .
 | 
	
		
			
				|  |  | -           "If the load fails or is terminated prematurely then the entire set of \n" .
 | 
	
		
			
				|  |  | -           "insertions/updates is rolled back and will not be found in the database\n\n";
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // first get the type for this sequence
 | 
	
		
			
				|  |  | -  $cvtermsql = "SELECT CVT.cvterm_id
 | 
	
		
			
				|  |  | -               FROM {cvterm} CVT
 | 
	
		
			
				|  |  | -                  INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
 | 
	
		
			
				|  |  | -                  LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
 | 
	
		
			
				|  |  | -               WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)";
 | 
	
		
			
				|  |  | -  $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject();
 | 
	
		
			
				|  |  | -  if (!$cvterm) {
 | 
	
		
			
				|  |  | -    tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type));
 | 
	
		
			
				|  |  | -    return 0;
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -  if ($parent_type) {
 | 
	
		
			
				|  |  | -    $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject();
 | 
	
		
			
				|  |  | -    if (!$parentcvterm) {
 | 
	
		
			
				|  |  | -      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm));
 | 
	
		
			
				|  |  | +  $transaction = db_transaction();
 | 
	
		
			
				|  |  | +  print "\nNOTE: Loading of this GFF file is performed using a database transaction. \n" .
 | 
	
		
			
				|  |  | +       "If the load fails or is terminated prematurely then the entire set of \n" .
 | 
	
		
			
				|  |  | +       "insertions/updates is rolled back and will not be found in the database\n\n";
 | 
	
		
			
				|  |  | +  try {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // first get the type for this sequence
 | 
	
		
			
				|  |  | +    $cvtermsql = "SELECT CVT.cvterm_id
 | 
	
		
			
				|  |  | +                 FROM {cvterm} CVT
 | 
	
		
			
				|  |  | +                    INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
 | 
	
		
			
				|  |  | +                    LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
 | 
	
		
			
				|  |  | +                 WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)";
 | 
	
		
			
				|  |  | +    $cvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $type, ':synonym' => $type))->fetchObject();
 | 
	
		
			
				|  |  | +    if (!$cvterm) {
 | 
	
		
			
				|  |  | +      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the term type: '%type'", array('%type' => $type));
 | 
	
		
			
				|  |  |        return 0;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -  if ($rel_type) {
 | 
	
		
			
				|  |  | -    $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject();
 | 
	
		
			
				|  |  | -    if (!$relcvterm) {
 | 
	
		
			
				|  |  | -      tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm));
 | 
	
		
			
				|  |  | -      return 0;
 | 
	
		
			
				|  |  | +    if ($parent_type) {
 | 
	
		
			
				|  |  | +      $parentcvterm = chado_query($cvtermsql, array(':cvname' => 'sequence', ':name' => $parent_type, ':synonym' => $parent_type))->fetchObject();
 | 
	
		
			
				|  |  | +      if (!$parentcvterm) {
 | 
	
		
			
				|  |  | +        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm));
 | 
	
		
			
				|  |  | +        return 0;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  print "Opening FASTA file $dfile\n";
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  //$lines = file($dfile, FILE_SKIP_EMPTY_LINES);
 | 
	
		
			
				|  |  | -  $fh = fopen($dfile, 'r');
 | 
	
		
			
				|  |  | -  if (!$fh) {
 | 
	
		
			
				|  |  | -    tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile));
 | 
	
		
			
				|  |  | -    return 0;
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -  $filesize = filesize($dfile);
 | 
	
		
			
				|  |  | -  $i = 0;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  $name = '';
 | 
	
		
			
				|  |  | -  $uname = '';
 | 
	
		
			
				|  |  | -  $residues = '';
 | 
	
		
			
				|  |  | -  $interval = intval($filesize * 0.01);
 | 
	
		
			
				|  |  | -  if ($interval < 1) {
 | 
	
		
			
				|  |  | -    $interval = 1;
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -  $inv_read = 0;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // we need to get the table schema to make sure we don't overrun the
 | 
	
		
			
				|  |  | -  // size of fields with what our regular expressions retrieve
 | 
	
		
			
				|  |  | -  $feature_tbl = chado_get_schema('feature');
 | 
	
		
			
				|  |  | -  $dbxref_tbl = chado_get_schema('dbxref');
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  //foreach ($lines as $line_num => $line) {
 | 
	
		
			
				|  |  | -  while ($line = fgets($fh)) {
 | 
	
		
			
				|  |  | -    $i++;  // update the line count
 | 
	
		
			
				|  |  | -    $num_read += drupal_strlen($line);
 | 
	
		
			
				|  |  | -    $intv_read += drupal_strlen($line);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    // if we encounter a definition line then get the name, uniquename,
 | 
	
		
			
				|  |  | -    // accession and relationship subject from the definition line
 | 
	
		
			
				|  |  | -    if (preg_match('/^>/', $line)) {
 | 
	
		
			
				|  |  | -      // if we have a feature name then we are starting a new sequence
 | 
	
		
			
				|  |  | -      // so lets handle the previous one before moving on
 | 
	
		
			
				|  |  | -      if ($name or $uname) {
 | 
	
		
			
				|  |  | -        tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 | 
	
		
			
				|  |  | -          $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
 | 
	
		
			
				|  |  | -          $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
 | 
	
		
			
				|  |  | -        $residues = '';
 | 
	
		
			
				|  |  | -        $name = '';
 | 
	
		
			
				|  |  | -        $uname = '';
 | 
	
		
			
				|  |  | +    if ($rel_type) {
 | 
	
		
			
				|  |  | +      $relcvterm = chado_query($cvtermsql, array(':cvname' => 'relationship', ':name' => $rel_type, ':synonym' => $rel_type))->fetchObject();
 | 
	
		
			
				|  |  | +      if (!$relcvterm) {
 | 
	
		
			
				|  |  | +        tripal_report_error("T_fasta_loader", TRIPAL_ERROR, "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm));
 | 
	
		
			
				|  |  | +        return 0;
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // get the feature name
 | 
	
		
			
				|  |  | -      if ($re_name) {
 | 
	
		
			
				|  |  | -        if (!preg_match("/$re_name/", $line, $matches)) {
 | 
	
		
			
				|  |  | -          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    print "Opening FASTA file $dfile\n";
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    //$lines = file($dfile, FILE_SKIP_EMPTY_LINES);
 | 
	
		
			
				|  |  | +    $fh = fopen($dfile, 'r');
 | 
	
		
			
				|  |  | +    if (!$fh) {
 | 
	
		
			
				|  |  | +      tripal_report_error('T_fasta_loader', TRIPAL_ERROR, "cannot open file: %dfile", array('%dfile' => $dfile));
 | 
	
		
			
				|  |  | +      return 0;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    $filesize = filesize($dfile);
 | 
	
		
			
				|  |  | +    $i = 0;
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    $name = '';
 | 
	
		
			
				|  |  | +    $uname = '';
 | 
	
		
			
				|  |  | +    $residues = '';
 | 
	
		
			
				|  |  | +    $interval = intval($filesize * 0.01);
 | 
	
		
			
				|  |  | +    if ($interval < 1) {
 | 
	
		
			
				|  |  | +      $interval = 1;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    $inv_read = 0;
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    // we need to get the table schema to make sure we don't overrun the
 | 
	
		
			
				|  |  | +    // size of fields with what our regular expressions retrieve
 | 
	
		
			
				|  |  | +    $feature_tbl = chado_get_schema('feature');
 | 
	
		
			
				|  |  | +    $dbxref_tbl = chado_get_schema('dbxref');
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    //foreach ($lines as $line_num => $line) {
 | 
	
		
			
				|  |  | +    while ($line = fgets($fh)) {
 | 
	
		
			
				|  |  | +      $i++;  // update the line count
 | 
	
		
			
				|  |  | +      $num_read += drupal_strlen($line);
 | 
	
		
			
				|  |  | +      $intv_read += drupal_strlen($line);
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +      // if we encounter a definition line then get the name, uniquename,
 | 
	
		
			
				|  |  | +      // accession and relationship subject from the definition line
 | 
	
		
			
				|  |  | +      if (preg_match('/^>/', $line)) {
 | 
	
		
			
				|  |  | +        // if we have a feature name then we are starting a new sequence
 | 
	
		
			
				|  |  | +        // so lets handle the previous one before moving on
 | 
	
		
			
				|  |  | +        if ($name or $uname) {
 | 
	
		
			
				|  |  | +          tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 | 
	
		
			
				|  |  | +            $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
 | 
	
		
			
				|  |  | +            $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
 | 
	
		
			
				|  |  | +          $residues = '';
 | 
	
		
			
				|  |  | +          $name = '';
 | 
	
		
			
				|  |  | +          $uname = '';
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
 | 
	
		
			
				|  |  | -          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +        $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +        // get the feature name
 | 
	
		
			
				|  |  | +        if ($re_name) {
 | 
	
		
			
				|  |  | +          if (!preg_match("/$re_name/", $line, $matches)) {
 | 
	
		
			
				|  |  | +            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
 | 
	
		
			
				|  |  | +            tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          else {
 | 
	
		
			
				|  |  | +            $name = trim($matches[1]);
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          else {
 | 
	
		
			
				|  |  | -          $name = trim($matches[1]);
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -      else {
 | 
	
		
			
				|  |  | -        // if the match_type is name and no regular expression was provided
 | 
	
		
			
				|  |  | -        // then use the first word as the name, otherwise we don't set the name
 | 
	
		
			
				|  |  | -        if (strcmp($match_type, 'Name')==0) {
 | 
	
		
			
				|  |  | -          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
 | 
	
		
			
				|  |  | -            if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
 | 
	
		
			
				|  |  | -              tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +          // if the match_type is name and no regular expression was provided
 | 
	
		
			
				|  |  | +          // then use the first word as the name, otherwise we don't set the name
 | 
	
		
			
				|  |  | +          if (strcmp($match_type, 'Name')==0) {
 | 
	
		
			
				|  |  | +            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
 | 
	
		
			
				|  |  | +              if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
 | 
	
		
			
				|  |  | +                tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +              }
 | 
	
		
			
				|  |  | +              else {
 | 
	
		
			
				|  |  | +                $name = trim($matches[1]);
 | 
	
		
			
				|  |  | +              }
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |              else {
 | 
	
		
			
				|  |  | -              $name = trim($matches[1]);
 | 
	
		
			
				|  |  | +              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |            }
 | 
	
		
			
				|  |  | -          else {
 | 
	
		
			
				|  |  | -            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +        // get the feature unique name
 | 
	
		
			
				|  |  | +        if ($re_uname) {
 | 
	
		
			
				|  |  | +          if (!preg_match("/$re_uname/", $line, $matches)) {
 | 
	
		
			
				|  |  | +            tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  |            }
 | 
	
		
			
				|  |  | +          $uname = trim($matches[1]);
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // get the feature unique name
 | 
	
		
			
				|  |  | -      if ($re_uname) {
 | 
	
		
			
				|  |  | -        if (!preg_match("/$re_uname/", $line, $matches)) {
 | 
	
		
			
				|  |  | -          tripal_report_error('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +        else {
 | 
	
		
			
				|  |  | +          // if the match_type is name and no regular expression was provided
 | 
	
		
			
				|  |  | +          // then use the first word as the name, otherwise, we don't set the unqiuename
 | 
	
		
			
				|  |  | +          if (strcmp($match_type, 'Unique name')==0) {
 | 
	
		
			
				|  |  | +            if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
 | 
	
		
			
				|  |  | +              $uname = trim($matches[1]);
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            else {
 | 
	
		
			
				|  |  | +              tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        $uname = trim($matches[1]);
 | 
	
		
			
				|  |  | +        // get the accession
 | 
	
		
			
				|  |  | +        preg_match("/$re_accession/", $line, $matches);
 | 
	
		
			
				|  |  | +        if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
 | 
	
		
			
				|  |  | +          tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning');
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        else {
 | 
	
		
			
				|  |  | +          $accession = trim($matches[1]);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +        // get the relationship subject
 | 
	
		
			
				|  |  | +        preg_match("/$re_subject/", $line, $matches);
 | 
	
		
			
				|  |  | +        $subject = trim($matches[1]);
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |        else {
 | 
	
		
			
				|  |  | -        // if the match_type is name and no regular expression was provided
 | 
	
		
			
				|  |  | -        // then use the first word as the name, otherwise, we don't set the unqiuename
 | 
	
		
			
				|  |  | -        if (strcmp($match_type, 'Unique name')==0) {
 | 
	
		
			
				|  |  | -          if (preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)) {
 | 
	
		
			
				|  |  | -            $uname = trim($matches[1]);
 | 
	
		
			
				|  |  | +        $residues .= trim($line);
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +        // update the job status every % features
 | 
	
		
			
				|  |  | +        if ($job and $intv_read >= $interval) {
 | 
	
		
			
				|  |  | +          $intv_read = 0;
 | 
	
		
			
				|  |  | +          $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
 | 
	
		
			
				|  |  | +          if ($name) {
 | 
	
		
			
				|  |  | +            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r";
 | 
	
		
			
				|  |  |            }
 | 
	
		
			
				|  |  |            else {
 | 
	
		
			
				|  |  | -            tripal_report_error('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error');
 | 
	
		
			
				|  |  | +            print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r";
 | 
	
		
			
				|  |  |            }
 | 
	
		
			
				|  |  | +          tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | -      // get the accession
 | 
	
		
			
				|  |  | -      preg_match("/$re_accession/", $line, $matches);
 | 
	
		
			
				|  |  | -      if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
 | 
	
		
			
				|  |  | -        tripal_report_error('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning');
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -      else {
 | 
	
		
			
				|  |  | -        $accession = trim($matches[1]);
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // get the relationship subject
 | 
	
		
			
				|  |  | -      preg_match("/$re_subject/", $line, $matches);
 | 
	
		
			
				|  |  | -      $subject = trim($matches[1]);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -    else {
 | 
	
		
			
				|  |  | -      $residues .= trim($line);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      // update the job status every % features
 | 
	
		
			
				|  |  | -      if ($job and $intv_read >= $interval) {
 | 
	
		
			
				|  |  | -        $intv_read = 0;
 | 
	
		
			
				|  |  | -        $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
 | 
	
		
			
				|  |  | -        if ($name) {
 | 
	
		
			
				|  |  | -          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r";
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        else {
 | 
	
		
			
				|  |  | -          print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r";
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        tripal_set_job_progress($job, intval(($num_read / $filesize) * 100));
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | -  }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // now load the last sequence in the file
 | 
	
		
			
				|  |  | -  tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 | 
	
		
			
				|  |  | -    $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
 | 
	
		
			
				|  |  | -    $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // commit the transaction
 | 
	
		
			
				|  |  | -  tripal_db_commit_transaction();
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    // now load the last sequence in the file
 | 
	
		
			
				|  |  | +    tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 | 
	
		
			
				|  |  | +      $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
 | 
	
		
			
				|  |  | +      $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  catch (Exception $e) {
 | 
	
		
			
				|  |  | +    print "\n"; // make sure we start errors on new line
 | 
	
		
			
				|  |  | +    watchdog_exception('T_fasta_loader', $e);
 | 
	
		
			
				|  |  | +    $transaction->rollback();
 | 
	
		
			
				|  |  | +    print "FAILED: Rolling back database changes...\n";
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  |    print "\nDone\n";
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 |